% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/seq_GEE_model.R
\name{seq_GEE_model}
\alias{seq_GEE_model}
\title{The The sequential method for generalized estimating equations case.}
\usage{
seq_GEE_model(formula, data = list(), clusterID, data_pool = list(),
  clusterID_pool, strategy, d = 0.4, family = stats::gaussian(link =
  "identity"), corstr = "independence", contrasts = NULL, ...)
}
\arguments{
\item{formula}{An object of class "formula" (or one that can be coerced to
that class): a symbolic description of the model to be fitted.}

\item{data}{A data frame containing the initial random samples to obtain the
initial estimate of the coefficient. Note that the first column of the data
frame is the response variable, and the rest is the explanatory variables.}

\item{clusterID}{The id for each subject in the initial samples. Note that
the subjects in the same cluster will have identical id.}

\item{data_pool}{A data frame containing all the random samples which we will
choose subject from. The first column of the data frame is the response
variable, and the rest is the explanatory variables.}

\item{clusterID_pool}{The id for each subject in the data_pool. Note that the
subjects in the same cluster will have identical id.}

\item{strategy}{A character string that determines the sample selection
criterion to be used, matching one of 'random' or 'D_optimal. The default
value is 'D_optimal'.}

\item{d}{A numeric number specifying the length of the fixed size confidence set for our model.
The default value is 0.4.}

\item{family}{A description of the error distribution and link function to be
used in the model. See family for details of \code{\link{family}}
functions. Matching one of 'gaussian' or 'binomial'}

\item{corstr}{A character string specifying the correlation structure. The
following are permitted: "independence", "exchangeable" and "ar1".}

\item{contrasts}{An optional list. See the contrasts.arg of
\code{\link{model.matrix.default}}.}

\item{...}{Further arguments passed to or from other methods.}
}
\value{
a list containing the following components
\item{beta}{the parameters that we estimate when the the iteration is
finished}
\item{rho}{estimate of correlation coefficient}
\item{nonZeroIdx}{the index of the non zero coefficient}
\item{N}{the current sample size when the stopping criterion is satisfied}
\item{d}{the length of the fixed size confidence set that we specify}
\item{is_stopped}{the label of sequential iterations stop or not. When the
value of is_stopped is TRUE, it means the iteration stops}
\item{corstr}{the correlation structure. The following are permitted:
"independence", "exchangeable" and "ar1".}
\item{family}{a description of the error distribution and link function to be
used in the model.}
}
\description{
\code{seq_GEE_model} estimates the the effective variables and chooses the
subjects sequentially by the generalized estimating equations with adaptive
shrinkage estimate method.
}
\details{
seq_GEE_model fits the clustered data sequentially by generalized estimating
equations with adaptive shrinkage estimate. It can detect the effective
variables which have the impact on the response and choose the most
representative sample point at the same time. Specifically, we fit a initial
sample data and determine if the stop condition is reached. If not, we will
select the most informative subjects by some criterion. Iteration stops once
it meets our requirements.
}
\examples{
# generate the toy example
data <- gen_GEE_data(numClusters = 75, clusterSize = 5,
                    clusterCorstr = 'ar1', clusterRho = 0.3,
                    beta = c(1, -1.1, 1.5, -2, rep(0, 50)), family = gaussian(),
                    intercept = TRUE, xCorstr = 'ar1',
                    xCorRho = 0.5, xVariance = 0.2)
df <- data.frame(y = data$y, data$x)
clusterID <- data$clusterID
pool <- gen_GEE_data(numClusters = 8000, clusterSize = 5,
                     clusterCorstr = 'ar1', clusterRho = 0.3,
                     beta = c(1, -1.1, 1.5, -2, rep(0, 50)), family = gaussian(),
                     intercept = TRUE, xCorstr = 'ar1',
                     xCorRho = 0.5, xVariance = 0.2)
df_pool <- data.frame(y = pool$y, pool$x)
clusterID_pool <- pool$clusterID
d<- 0.25

# use seq_GEE_model to generalized estimating equations case.
# You can remove #' to run the command.
# seqRes.ASED <- seq_GEE_model(y ~ .-1, data = df, clusterID = clusterID,
#                             data_pool = df_pool,  clusterID_pool = clusterID_pool,
#                             strategy = "D-optimal",  d = d, family = gaussian(), corstr = 'ar1')
}
\references{
{
Chen, Z., Wang, Z., & Chang, Y. I. (2019). Sequential adaptive variables and
subject selection for GEE methods. \emph{Biometrics}. doi:10.1111/biom.13160
}
}
\seealso{
{
   \code{\link{seq_cat_model}} for categorical case

   \code{\link{seq_bin_model}} for binary classification case

   \code{\link{seq_ord_model}} for ordinal case.

}
}
