% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/estimateMissSBM.R
\name{estimateMissSBM}
\alias{estimateMissSBM}
\title{Estimation of simple SBMs with missing data}
\usage{
estimateMissSBM(
  adjacencyMatrix,
  vBlocks,
  sampling,
  covariates = NULL,
  control = list()
)
}
\arguments{
\item{adjacencyMatrix}{The N x N adjacency matrix of the network data. If \code{adjacencyMatrix} is symmetric,
we assume an undirected network with no loop; otherwise the network is assumed to be directed.}

\item{vBlocks}{The vector of number of blocks considered in the collection.}

\item{sampling}{The model used to described the process that originates the missing data:
MAR designs ("dyad", "node","covar-dyad","covar-node","snowball") and NMAR designs
("double-standard", "block-dyad", "block-node" , "degree") are available. See details.}

\item{covariates}{A list with M entries (the M covariates). If the covariates are node-centered, each entry of \code{covariates}
must be a size-N vector;  if the covariates are dyad-centered, each entry of \code{covariates} must be N x N matrix.}

\item{control}{a list of parameters controlling advanced features. See details.}
}
\value{
Returns an R6 object with class \code{\link{missSBM_collection}}.
}
\description{
Variational EM inference of Stochastic Block Models indexed by block number from a partially observed network.
}
\details{
The list of parameters \code{control} tunes more advanced features, such as the
initialization, how covariates are handled in the model, and the variational EM algorithm:
\itemize{
\item{"useCovSBM": }{logical. If \code{covariates} is not null, should they be used for the
for the SBM inference (or just for the sampling)? Default is TRUE.}
\item{"clusterInit": }{Initial method for clustering: either a character in "hierarchical", "spectral"
or "kmeans", or a list with \code{length(vBlocks)} vectors, each with size
\code{ncol(adjacencyMatrix)},  providing a user-defined clustering. Default is "spectral".}
\item{"similarity": }{An R x R -> R function to compute similarities between node covariates. Default is
\code{missSBM:::l1_similarity}, that is, -abs(x-y). Only relevant when the covariates are node-centered
(i.e. \code{covariates} is a list of size-N vectors).}
\item{"threshold": }{V-EM algorithm stops stop when an optimization step changes the objective function
by less than threshold. Default is 1e-3.}
\item{"maxIter": }{V-EM algorithm stops when the number of iteration exceeds maxIter.
Default is 100 with no covariate, 50 otherwise.}
\item{"fixPointIter": }{number of fix-point iterations in the V-E step.
Default is 5 with no covariate, 2 otherwise.}
\item{"cores": }{integer for number of cores used. Default is 1.}
\item{"trace": }{integer for verbosity (0, 1, 2). Default is 1. Useless when \code{cores} > 1}
}

The different sampling designs are split into two families in which we find dyad-centered and
node-centered samplings. See \doi{10.1080/01621459.2018.1562934} for a complete description.
\itemize{
\item Missing at Random (MAR)
\itemize{
\item{"dyad": parameter = p = Prob(Dyad(i,j) is observed)}
\item{"node": parameter = p = Prob(Node i is observed)}
\item{"covar-dyad": parameter = beta in R^M, such that Prob(Dyad (i,j) is observed) = logistic(parameter' covarArray (i,j, .))}
\item{"covar-node": parameter = nu in R^M such that Prob(Node i is observed)  = logistic(parameter' covarMatrix (i,)}
\item{"snowball": parameter = number of waves with Prob(Node i is observed in the 1st wave)}
}
\item Not Missing At Random (NMAR)
\itemize{
\item{"double-standard": parameter = (p0,p1) with p0 = Prob(Dyad (i,j) is observed | the dyad is equal to 0), p1 = Prob(Dyad (i,j) is observed | the dyad is equal to 1)}
\item{"block-node": parameter = c(p(1),...,p(Q)) and p(q) = Prob(Node i is observed | node i is in cluster q)}
\item{"block-dyad": parameter = c(p(1,1),...,p(Q,Q)) and p(q,l) = Prob(Edge (i,j) is observed | node i is in cluster q and node j is in cluster l)}
\item{"degree": parameter = c(a,b) and logit(a+b*degree(i)) = Prob(Node i is observed | Degree(i))}
}
}
}
\examples{
## SBM parameters
N <- 150 # number of nodes
Q <- 3   # number of clusters
pi <- rep(1,Q)/Q     # block proportion
theta <- list(mean = diag(.45,Q) + .05 ) # connectivity matrix

## Sampling parameters
samplingParameters <- .5 # the sampling rate
sampling  <- "dyad"      # the sampling design

## generate a undirected binary SBM with no covariate
sbm <- sbm::sampleSimpleSBM(N, pi, theta)

## Sample some dyads data + Infer SBM with missing data
collection <-
   observeNetwork(sbm$netMatrix, sampling, samplingParameters) \%>\%
   estimateMissSBM(vBlocks = 1:5, sampling = sampling)
collection$ICL
coef(collection$bestModel$fittedSBM, "connectivity")

myModel <- collection$bestModel
plot(myModel, "network")
coef(myModel, "sampling")
coef(myModel, "connectivity")
predict(myModel)[1:5, 1:5]
fitted(myModel)[1:5, 1:5]

}
\seealso{
\code{\link{observeNetwork}}, \code{\link{missSBM_collection}} and \code{\link{missSBM_fit}}.
}
