#' The stochastic degree sequence model (sdsm)
#'
#' `sdsm` computes the proportion of generated edges
#'     above or below the observed value using the stochastic degree sequence model.
#'     Once computed, use \code{\link{backbone.extract}} to return
#'     the backbone matrix for a given alpha value.
#'
#' @param B Matrix: Bipartite adjacency matrix
#' @param trials Integer: Number of random bipartite graphs generated. Default is 0.
#' @param model String: A generalized linear model (glm) used to generate random bipartite graphs.
#' @param sparse Boolean: If sparse matrix manipulations should be used
#' @param maxiter Integer: Maximum number of iterations if "model" is a glm.
#' @param dyad vector length 2: two row entries i,j. Saves each value of the i-th row and j-th column in each projected B* matrix. This is useful for visualizing an example of the empirical null edge weight distribution generated by the model. These correspond to the row and column indices of a cell in the projected matrix , and can be written as their string row names or as numeric values.
#' @param alpha Real: proposed alpha threshold to be used for determining statistical significance of edges
#' @param tolerance Real: tolerance for p-value computation using RNA poisson-binomial approximation
#' @param progress Boolean: If \link[utils]{txtProgressBar} should be used to measure progress
#'
#' @details The 'model' parameter can take in a 'link' function, as described by \link[stats]{glm} and \link[stats]{family}. This can be one of c('logit', 'probit', 'cauchit', 'log', 'cloglog').
#' @details If 'trials'>0, the function uses repeat Bernoulli trials to compute the proportions, using the following steps:
#' During each iteration, sdsm computes a new B* matrix using probabilities computed using the `glm`. This is a random bipartite matrix with about the same row and column sums as the original matrix B.
#' If the dyad_parameter is indicated to be used in the parameters, when the B* matrix is projected, the projected value for the corresponding row and column will be saved.
#' This allows the user to see the distribution of the edge weights for desired row and column.
#' @details If 'trials'=0, the proportion of edges above or below the observed values are computed using the Poisson Binomial distribution.
#' These values are approximated using either a Discrete Fourier Transform (DFT method) or a Refined Normal Approximation (RNA method). These functions are described by \link[poibin]{ppoibin}.
#' The RNA method is used by default, unless the computed value is within the margin of 'alpha'-'tolerance' and 'alpha'+'tolerance', the DFT method is used.
#'
#' @return list(positive, negative, dyad_values, summary).
#' positive: matrix of proportion of times each entry of the projected matrix B is above the corresponding entry in the generated projection.
#' negative: matrix of proportion of times each entry of the projected matrix B is below the corresponding entry in the generated projection.
#' dyad_values: list of edge weight for i,j in each generated projection, included if 'dyad' not NULL and 'trials > 0'.
#' summary: a data frame summary of the inputted matrix and the model used including: model name, number of rows, skew of row sums, number of columns, skew of column sums, and running time.
#'
#' @references \href{https://www.sciencedirect.com/science/article/abs/pii/S0378873314000343}{Neal, Z. P. (2014). The backbone of bipartite projections: Inferring relationships from co-authorship, co-sponsorship, co-attendance, and other co-behaviors. Social Networks, 39, Elsevier: 84-97. DOI: 10.1016/j.socnet.2014.06.001}
#'
#'
#' @export
#'
#' @examples
#'sdsm_bt <- sdsm(davis, trials = 100,dyad = c("EVELYN", "CHARLOTTE" ))
#'sdsm_rna <- sdsm(davis, trials = 0, tolerance = 0)
#'sdsm_dft <- sdsm(davis, trials = 0, tolerance = 1)
sdsm <- function(B,
                 trials = 0,
                 model = "logit",
                 sparse = TRUE,
                 maxiter = 25,
                 dyad = NULL,
                 alpha = 0.05,
                 tolerance = 0,
                 progress = FALSE){

  #Argument Checks
  if ((sparse!="TRUE") & (sparse!="FALSE")) {stop("sparse must be either TRUE or FALSE")}
  if ((model!="logit") & (model!="probit") & (model!="log") & (model!="cloglog")) {stop("model must be: logit | probit | log | cloglog")}
  if ((trials < 0) | (trials%%1!=0)) {stop("trials must be a non-negative integer")}
  if (!(methods::is(B, "matrix")) & !(methods::is(B, "sparseMatrix"))) {stop("input bipartite data must be a matrix")}

  #If sparse matrix input, use sparse matrix operations
  if (methods::is(B, "sparseMatrix")) {sparse <- TRUE}

  #Run Time
  run.time.start <- Sys.time()

  #Project to one-mode data
  if (sparse=="TRUE") {
    if (!methods::is(B, "sparseMatrix")) {
      B <- Matrix::Matrix(B, sparse = T)
    }
    P <- Matrix::tcrossprod(B)
  } else {
    P <- tcrossprod(B)
  }

  #Create Positive and Negative Matrices to hold backbone
  Positive <- matrix(0, nrow(P), ncol(P))
  Negative <- matrix(0, nrow(P), ncol(P))

  #Compute probabilities for SDSM (alternative is in star)
  #Vectorize the bipartite data
  A <- data.frame(as.vector(B))
  names(A)[names(A)=="as.vector.B."] <- "value"

  #Assign row and column IDs in the vectorized data
  A$row <- rep(1:nrow(B), times=ncol(B))
  A$col <- rep(1:ncol(B), each=nrow(B))

  #Compute and attach rowsums, columnsums, interact
  A$rowmarg <- stats::ave(A$value,A$row,FUN=sum)
  A$colmarg <- stats::ave(A$value,A$col,FUN=sum)
  A$rowcol<-A$rowmarg*A$colmarg

  #Estimate logit model, compute probabilities
  if (requireNamespace("speedglm", quietly = TRUE)){
    model.estimates <- speedglm::speedglm(formula= value ~  rowmarg + colmarg + rowcol, family = stats::binomial(link=model), data=A, control = list(maxit = maxiter))
    probs <- as.vector(stats::predict(model.estimates,newdata=A,type = "response"))
  } else {
    model.estimates <- stats::glm(formula= value ~  rowmarg + colmarg + rowcol, family = stats::binomial(link=model), data=A, control = list(maxit = maxiter))
    probs <- as.vector(stats::predict(model.estimates,newdata=A,type = "response"))
  }

  #Assemble and compute probabilities
  prob.mat <- matrix(probs, nrow = nrow(B), ncol = ncol(B))  #Probability matrix
  rows <- dim(prob.mat)[1]

  #Monte Carlo Method
  if (trials > 0){
    #Dyad save
    edge_weights <- numeric(trials)
    if (length(dyad) > 0){
      if (class(dyad[1]) != "numeric"){
        vec <- match(c(dyad[1], dyad[2]), rownames(B))
      }
      else{
        vec <- dyad
      }
    }

    #Build null models
    for (i in 1:trials){

      #Start estimation timer; print message
      if (i == 1) {
        start.time <- Sys.time()
        message("Finding the Backbone using ", model, " SDSM")
      }

      #Use GLM probabilities to create an SDSM Bstar
      #Bstar <- matrix(rbinom(nrow(B) * ncol(B), 1, probs), nrow(B), ncol(B))  #Equivalent, but slightly slower
      Bstar <- matrix(((stats::runif(nrow(B) * ncol(B)))<=probs)+0, nrow(B), ncol(B))
      if (sparse=="TRUE") {Bstar <- Matrix::Matrix(Bstar,sparse=T)}


      #Construct Pstar from Bstar
      if (sparse=="TRUE") {
        Pstar <- Matrix::tcrossprod(Bstar)
      } else {
        Pstar <- tcrossprod(Bstar)
      }

      #Check whether Pstar edge is larger/smaller than P edge
      Positive <- Positive + (Pstar >= P)+0
      Negative <- Negative + (Pstar <= P)+0

      #Save Dyad of P
      if (length(dyad) > 0){
        edge_weights[i] <- Pstar[vec[1], vec[2]]
      }

      #Report estimated running time, update progress bar
      if (i==10){
        end.time <- Sys.time()
        est = (round(difftime(end.time, start.time), 2) * (trials/10))
        message("Estimated time to complete is ", est," ", units(est))
        if (progress == "TRUE"){
          pb <- utils::txtProgressBar(min = 0, max = trials, style = 3)
        }
      } #end timer estimate

      if ((progress == "TRUE") & (i>=10)) {utils::setTxtProgressBar(pb, i)}
    } #end for loop
    if (progress == "TRUE"){close(pb)}

    #Proporition of greater than expected and less than expected
    Positive <- (Positive/trials)
    Negative <- (Negative/trials)
    rownames(Positive) <- rownames(B)
    colnames(Positive) <- rownames(B)
    rownames(Negative) <- rownames(B)
    colnames(Negative) <- rownames(B)

    #Save Dyad of P
    if (length(dyad) == 0){
      edge_weights <- NULL
    }
  } #end if trials > 0

  # Poisson Binomial Distribution Method
  if (trials == 0){
    message("Finding the Backbone using Poisson Binomial SDSM with alpha = ", alpha, " and tolerance = ", tolerance)
    for (i in 1:rows){
      #Compute prob.mat[i,]*prob.mat[j,] for each j
      prob.imat <- sweep(prob.mat, MARGIN = 2, prob.mat[i,], `*`)

      #Find cdf, below or equal to value for negative, above or equal to value for positive
      #Using RNA approximation
      negative <- as.array(mapply(poibin::ppoibin, kk= as.data.frame(t(P[i,])), pp = as.data.frame(t(prob.imat)), method = "RNA"))
      positive <- as.array((1- mapply(poibin::ppoibin, kk=(as.data.frame(t(P[i,])-1)), pp = as.data.frame(t(prob.imat)), method = "RNA")))

      #Find which values are within a tolerance distance from alpha
      wn <- as.vector(which((negative > (alpha - tolerance)) & (negative < (alpha + tolerance)), arr.ind = TRUE))
      wp <- as.vector(which((positive > (alpha - tolerance)) & (positive < (alpha + tolerance)), arr.ind = TRUE))

      #Change these values to DFT approximation
      if (length(wn)>1){
        dft.negative <- as.array(mapply(poibin::ppoibin, kk = as.data.frame(t(P[i,wn])), pp = as.data.frame(t(prob.imat[wn,])), method = "DFT-CF"))
        negative[wn] <- dft.negative
      }
      if (length(wp)>1){
        dft.positive <- as.array((1 - mapply(poibin::ppoibin, kk = as.data.frame(t(P[i, wp])-1), pp = as.data.frame(t(prob.imat[wp,])), method = "DFT-CF")))
        positive[wp] <- dft.positive
      }
      if (length(wn)==1){
        dft.negative <- poibin::ppoibin(kk = as.data.frame(t(P[i,wn])), pp = as.data.frame(t(prob.imat[wn,])), method = "DFT-CF")
        negative[wn] <- dft.negative
      }
      if (length(wp)==1){
        dft.positive <- 1 - poibin::ppoibin(kk = as.data.frame(t(P[i, wp])-1), pp = as.data.frame(t(prob.imat[wp,])), method = "DFT-CF")
        positive[wp] <- dft.positive
      }

      #Set values in Positive & Negative matrices
      Positive[i,] <- positive
      Negative[i,] <- negative
    } #end for i in rows
    rownames(Positive) <- rownames(B)
    colnames(Positive) <- rownames(B)
    rownames(Negative) <- rownames(B)
    colnames(Negative) <- rownames(B)
  } #end if trials == 0

  #Run Time
  run.time.end <- Sys.time()
  total.time = (round(difftime(run.time.end, run.time.start), 2))

  #Compile Summary
  if (sparse=="TRUE") {
    r <- Matrix::rowSums(B)
    c <- Matrix::colSums(B)
    } else {
    r <- rowSums(B)
    c <- colSums(B)
    }

  a <- c("Model", "Number of Rows", "Skew of Row Sums", "Number of Columns", "Skew of Column Sums", "Running Time")
  b <- c("Stochastic Degree Sequence Model", dim(B)[1], round((sum((r-mean(r))**3))/((length(r))*((stats::sd(r))**3)), 5), dim(B)[2], round((sum((c-mean(c))**3))/((length(c))*((stats::sd(c))**3)), 5), as.numeric(total.time))
  model.summary <- data.frame(a,b, row.names = 1)
  colnames(model.summary)<-"Model Summary"

  if ((length(dyad) > 0)&(trials > 0)){
    return(list(positive = Positive, negative = Negative, dyad_values = edge_weights, summary = model.summary))
  }

  else {
    return(list(positive = Positive, negative = Negative, summary = model.summary))
  }

} #end sdsm function

