% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/overdispersedStan.R
\name{overdispersedStan}
\alias{overdispersedStan}
\title{Fit ARD using the Overdispersed model in Stan}
\usage{
overdispersedStan(
  ard,
  known_sizes = NULL,
  known_ind = NULL,
  G1_ind = NULL,
  G2_ind = NULL,
  B2_ind = NULL,
  N = NULL,
  chains = 3,
  cores = 1,
  warmup = 1000,
  iter = 1500,
  thin = 1,
  return_fit = FALSE,
  ...
)
}
\arguments{
\item{ard}{The \verb{n_i x n_k} matrix of non-negative ARD integer responses,
where the \verb{(i,k)th} element corresponds to the number of people that
respondent \code{i} knows in subpopulation \code{k}.}

\item{known_sizes}{The known subpopulation sizes corresponding to a subset of
the columns of \code{ard}.}

\item{known_ind}{The indices that correspond to the columns of \code{ard}
with known_sizes. By default, the function assumes the first \code{n_known}
columns, where \code{n_known} corresponds to the number of
\code{known_sizes}.}

\item{G1_ind}{A vector of indices denoting the columns of \code{ard} that
correspond to the primary scaling groups, i.e. the collection of rare
girls' names in Zheng, Salganik, and Gelman (2006). By default, all
known_sizes are used. If G2_ind and B2_ind are not provided, \code{C = C_1}, so
only G1_ind are used. If G1_ind is not provided, no scaling is performed.}

\item{G2_ind}{A vector of indices denoting the columns of \code{ard} that
correspond to the subpopulations that belong to the first secondary scaling
groups, i.e. the collection of somewhat popular girls' names.}

\item{B2_ind}{A vector of indices denoting the columns of \code{ard} that
correspond to the subpopulations that belong to the second secondary
scaling groups, i.e. the collection of somewhat popular boys' names.}

\item{N}{The known total population size.}

\item{chains}{A positive integer specifying the number of Markov chains.}

\item{cores}{A positive integer specifying the number of cores to use to run
the Markov chains in parallel.}

\item{warmup}{A positive integer specifying the total number of samples for
each chain (including warmup). Matches the usage in \link[rstan]{stan}.}

\item{iter}{A positive integer specifying the number of warmup samples for
each chain. Matches the usage in \link[rstan]{stan}.}

\item{thin}{A positive integer specifying the interval for saving posterior
samples. Default value is 1 (i.e. no thinning).}

\item{return_fit}{A logical indicating whether the fitted Stan model should
be returned instead of the rstan::extracted and scaled parameters. This is
FALSE by default.}

\item{...}{Additional arguments to be passed to \link[rstan]{stan}.}
}
\value{
Either the full fitted Stan model if \code{return_fit = TRUE}, else a
named list with the estimated parameters extracted using
\link[rstan]{extract} (the default). The estimated parameters are named as
follows, with additional descriptions as needed:

\describe{\item{alphas}{Log degree, if \code{scaling = TRUE}, else raw alpha parameters}
\item{betas}{Log prevalence, if \code{scaling = TRUE}, else raw beta parameters}
\item{inv_omegas}{Inverse of overdispersion parameters}
\item{sigma_alpha}{Standard deviation of alphas}
\item{mu_beta}{Mean of betas}
\item{sigma_beta}{Standard deviation of betas}
\item{omegas}{Overdispersion parameters}}

If \code{scaling = TRUE}, the following additional parameters are included:
\describe{\item{mu_alpha}{Mean of log degrees}
\item{degrees}{Degree estimates}
\item{sizes}{Subpopulation size estimates}}
}
\description{
This function fits the ARD using the Overdispersed model in Stan. The
population size estimates and degrees are scaled using a post-hoc procedure.
For the Gibbs-Metropolis algorithm implementation, see
\link[networkscaleup]{overdispersed}.
}
\details{
This function fits the overdispersed NSUM model using the
Gibbs-Metropolis algorithm provided in Zheng et al. (2006).
}
\examples{
# Analyze an example ard data set using Zheng et al. (2006) models
# Note that in practice, both warmup and iter should be much higher
\dontrun{
data(example_data)

ard = example_data$ard
subpop_sizes = example_data$subpop_sizes
known_ind = c(1, 2, 4)
N = example_data$N

overdisp.est = overdispersedStan(ard,
known_sizes = subpop_sizes[known_ind],
known_ind = known_ind,
G1_ind = 1,
G2_ind = 2,
B2_ind = 4,
N = N,
chains = 1,
cores = 1,
warmup = 250,
iter = 500)

# Compare size estimates
round(data.frame(true = subpop_sizes,
basic = colMeans(overdisp.est$sizes)))

# Compare degree estimates
plot(example_data$degrees, colMeans(overdisp.est$degrees))

# Look at overdispersion parameter
colMeans(overdisp.est$omegas)
}
}
\references{
Zheng, T., Salganik, M. J., and Gelman, A. (2006). How many
people do you know in prison, \emph{Journal of the American Statistical
Association}, \bold{101:474}, 409--423
}
