% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stan_clogit.R
\name{stan_clogit}
\alias{stan_clogit}
\title{Conditional logistic (clogit) models via Stan}
\usage{
stan_clogit(formula, data, subset, na.action = NULL, ..., strata,
  prior = normal(), prior_covariance = decov(), prior_PD = FALSE,
  algorithm = c("sampling", "optimizing", "meanfield", "fullrank"),
  adapt_delta = NULL, QR = FALSE, sparse = FALSE)
}
\arguments{
\item{formula, data, subset, na.action}{Same as for \code{\link[lme4]{glmer}}, 
except that any intercept included in the formula will be dropped. \emph{We
strongly advise against omitting the \code{data} argument}. Unless 
\code{data} is specified (and is a data frame) many post-estimation 
functions (including \code{update}, \code{loo}, \code{kfold}) are not 
guaranteed to work properly.}

\item{...}{Further arguments passed to the function in the \pkg{rstan} 
package (\code{\link[rstan]{sampling}}, \code{\link[rstan]{vb}}, or 
\code{\link[rstan]{optimizing}}), corresponding to the estimation method 
named by \code{algorithm}. For example, if \code{algorithm} is
\code{"sampling"} it is possibly to specify \code{iter}, \code{chains},
\code{cores}, \code{refresh}, etc.}

\item{strata}{A factor indicating the groups in the data where the number of 
successes (possibly one) is fixed by the research design. It may be useful 
to use \code{\link{interaction}} or \code{\link[survival]{strata}} to
create this factor. However, the \code{strata} argument must not rely on
any object besides the \code{data} \code{\link{data.frame}}.}

\item{prior}{The prior distribution for the regression coefficients. 
\code{prior} should be a call to one of the various functions provided by 
\pkg{rstanarm} for specifying priors. The subset of these functions that 
can be used for the prior on the coefficients can be grouped into several 
"families":

\tabular{ll}{
  \strong{Family} \tab \strong{Functions} \cr 
  \emph{Student t family} \tab \code{normal}, \code{student_t}, \code{cauchy} \cr 
  \emph{Hierarchical shrinkage family} \tab \code{hs}, \code{hs_plus} \cr 
  \emph{Laplace family} \tab \code{laplace}, \code{lasso} \cr
  \emph{Product normal family} \tab \code{product_normal} \cr
}

See the \link[=priors]{priors help page} for details on the families and 
how to specify the arguments for all of the functions in the table above.
To omit a prior ---i.e., to use a flat (improper) uniform prior---
\code{prior} can be set to \code{NULL}, although this is rarely a good
idea.

\strong{Note:} Unless \code{QR=TRUE}, if \code{prior} is from the Student t
family or Laplace family, and if the \code{autoscale} argument to the 
function used to specify the prior (e.g. \code{\link{normal}}) is left at 
its default and recommended value of \code{TRUE}, then the default or 
user-specified prior scale(s) may be adjusted internally based on the
scales of the predictors. See the \link[=priors]{priors help page} and the
\emph{Prior Distributions} vignette for details on the rescaling and the
\code{\link{prior_summary}} function for a summary of the priors used for a
particular model.}

\item{prior_covariance}{Cannot be \code{NULL} when lme4-style group-specific
terms are included in the \code{formula}. See \code{\link{decov}} for
more information about the default arguments. Ignored when there are no
group-specific terms.}

\item{prior_PD}{A logical scalar (defaulting to \code{FALSE}) indicating
whether to draw from the prior predictive distribution instead of
conditioning on the outcome.}

\item{algorithm}{A string (possibly abbreviated) indicating the 
estimation approach to use. Can be \code{"sampling"} for MCMC (the
default), \code{"optimizing"} for optimization, \code{"meanfield"} for
variational inference with independent normal distributions, or
\code{"fullrank"} for variational inference with a multivariate normal
distribution. See \code{\link{rstanarm-package}} for more details on the
estimation algorithms. NOTE: not all fitting functions support all four
algorithms.}

\item{adapt_delta}{Only relevant if \code{algorithm="sampling"}. See 
\code{\link{adapt_delta}} for details.}

\item{QR}{A logical scalar defaulting to \code{FALSE}, but if \code{TRUE} 
applies a scaled \code{\link{qr}} decomposition to the design matrix, 
\eqn{X = Q^\ast R^\ast}{X = Q* R*}, where \eqn{Q^\ast = Q \sqrt{n-1}}{Q* = 
Q (n-1)^0.5} and \eqn{R^\ast = \frac{1}{\sqrt{n-1}} R}{R* = (n-1)^(-0.5) 
R}. The coefficients relative to \eqn{Q^\ast}{Q*} are obtained and then 
premultiplied by the inverse of \eqn{R^{\ast}}{R*} to obtain coefficients 
relative to the original predictors, \eqn{X}. These transformations do not 
change the likelihood of the data but are recommended for computational 
reasons when there are multiple predictors. Importantly, while the columns
of \eqn{X} are almost always correlated, the columns of \eqn{Q^\ast}{Q*}
are uncorrelated by design, which often makes sampling from the posterior
easier. However, because when \code{QR} is \code{TRUE} the \code{prior}
argument applies to the coefficients relative to \eqn{Q^\ast}{Q*} (and
those are not very interpretable), setting \code{QR=TRUE} is only
recommended if you do not have an informative prior for the regression
coefficients. 

For more details see the Stan case study 
\emph{The QR Decomposition For Regression Models} at 
\url{http://mc-stan.org/users/documentation/case-studies/qr_regression.html}.}

\item{sparse}{A logical scalar (defaulting to \code{FALSE}) indicating
whether to use a sparse representation of the design (X) matrix. 
If \code{TRUE}, the the design matrix is not centered (since that would 
destroy the sparsity) and likewise it is not possible to specify both 
\code{QR = TRUE} and \code{sparse = TRUE}. Depending on how many zeros
there are in the design matrix, setting \code{sparse = TRUE} may make
the code run faster and can consume much less RAM.}
}
\value{
A \link[=stanreg-objects]{stanreg} object is returned 
for \code{stan_clogit}.
}
\description{
A model for case-control studies with optional prior distributions for the
coefficients, intercept, and auxiliary parameters.
}
\details{
The \code{stan_clogit} function is mostly similar in syntax to 
  \code{\link[survival]{clogit}} but rather than performing maximum
  likelihood estimation of generalized linear models, full Bayesian
  estimation is performed (if \code{algorithm} is \code{"sampling"}) via
  MCMC. The Bayesian model adds priors (independent by default) on the
  coefficients of the GLM.
  
  The \code{data.frame} passed to the \code{data} argument must be sorted by 
  the variable passed to the \code{strata} argument.
  
  The \code{formula} may have group-specific terms like in 
  \code{\link{stan_glmer}} but should not allow the intercept to vary by the
  stratifying variable, since there is no information in the data with which
  to estimate such deviations in the intercept.
}
\examples{
post <- stan_clogit(case ~ spontaneous + induced + (1 | education), 
                    strata = stratum,
                    data = infert[order(infert$stratum), ],
                    subset = parity <= 2,
                    QR = TRUE,
                    chains = 2, iter = 500) # for speed only

nd <- infert[infert$parity > 2, c("case", "spontaneous", "induced", 
                                  "education", "stratum")]
# next line would fail without case and stratum variables                                 
pr <- posterior_linpred(post, newdata = nd, transform = TRUE)
all.equal(rep(sum(nd$case), nrow(pr)), rowSums(pr)) # not a random variable
            
}
\seealso{
\code{\link{stanreg-methods}} and 
\code{\link[survival]{clogit}}.

The vignette for Bernoulli and binomial models.
}

