% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gwqs.R
\name{gwqs}
\alias{gwqs}
\alias{gwqsrh}
\title{Fitting Weighted Quantile Sum regression models}
\usage{
gwqs(formula, data, na.action, weights, mix_name, stratified, valid_var, b = 100,
             b1_pos = TRUE, b1_constr = FALSE, zero_infl = FALSE, q = 4,
             validation = 0.6, family = gaussian, signal = c("t2", "one", "abst", "expt"),
             rs = FALSE, n_vars = NULL,
             zilink = c("logit", "probit", "cloglog", "cauchit", "log"), seed = NULL,
             plan_strategy = "sequential",
             optim.method = c("BFGS", "Nelder-Mead", "CG", "SANN"),
             control = list(trace = FALSE, maxit = 2000, reltol = 1e-9), ...)

gwqsrh(formula, data, na.action, weights, mix_name, stratified, valid_var, rh = 100,
               b = 100, b1_pos = TRUE, b1_constr = FALSE, zero_infl = FALSE, q = 4,
               validation = 0.6, family = gaussian,
               signal = c("t2", "one", "abst", "expt"), rs = FALSE, n_vars = NULL,
               zilink = c("logit", "probit", "cloglog", "cauchit", "log"), seed = NULL,
               plan_strategy = "sequential",
               optim.method = c("BFGS", "Nelder-Mead", "CG", "SANN"),
               control = list(trace = FALSE, maxit = 2000, reltol = 1e-9), ...)
}
\arguments{
\item{formula}{An object of class \code{formula} specifying the relationship to be tested. The \code{wqs}
term must be included in \code{formula}, e.g. \code{y ~ wqs + ...}. To test for an interaction term with
a continuous variable \code{a} or for a quadratic term we can specify the \code{formula} as below:
\code{y ~ wqs*a + ...} and \code{y ~ wqs + I(wqs^2) + ...}, respectively.}

\item{data}{The \code{data.frame} containing the variables to be included in the model.}

\item{na.action}{\code{\link[stats]{model.frame}}. \code{na.omit} is the default.}

\item{weights}{An optional vector of weights to be used in the fitting process.
Should be \code{NULL} or a numeric vector.}

\item{mix_name}{A character vector listing the variables contributing to a mixture effect.}

\item{stratified}{The character name of the variable for which you want to stratify for.
It has to be a \code{factor}.}

\item{valid_var}{A character value containing the name of the variable that identifies the validation
and the training dataset. You previously need to create a variable in the dataset which is equal to 1
for the observations you want to include in the validation dataset, equal to 0 for the observation
you want to include in the training dataset (use 0 also for the validation dataset if you want to train and
validate the model on the same data) and equal to 2 if you want to keep part of the data for the
predictive model.}

\item{b}{Number of bootstrap samples used in parameter estimation.}

\item{b1_pos}{A logical value that determines whether weights are derived from models where the beta
values were positive or negative.}

\item{b1_constr}{A logial value that determines whether to apply positive (if \code{b1_pos = TRUE}) or
negative (if \code{b1_pos = FALSE}) constraints in the optimization function for the weight estimation.}

\item{zero_infl}{A logical value (\code{TRUE} or \code{FALSE}) that allows to fit a zero inflated
model in case \code{family = "poisson"} or \code{family = "negbin"}.}

\item{q}{An \code{integer} to specify how mixture variables will be ranked, e.g. in quartiles
(\code{q = 4}), deciles (\code{q = 10}), or percentiles (\code{q = 100}). If \code{q = NULL} then
the values of the mixture variables are taken (these must be standardized).}

\item{validation}{Percentage of the dataset to be used to validate the model. If
\code{validation = 0} then the test dataset is used as validation dataset too.}

\item{family}{A character value that allows to decide for the glm: \code{gaussian} for linear regression,
\code{binomial} for logistic regression \code{"multinomial"} for multinomial regression,
\code{poisson} for Poisson regression, \code{quasipoisson} for quasi-Poisson regression,
\code{"negbin"} for negative binomial regression.}

\item{signal}{Character identifying the signal function to be used when the average weights
are estimated. It can take values from \code{"one"} to apply the identity, \code{"abst"} to apply
the absolute value of the t-statistic, \code{"t2"} to apply the squared value of the t-statistic,
\code{"expt"} to apply the exponential of the t-statistic as signal function.}

\item{rs}{A logic value. If \code{rs = FALSE} then the bootstrap implementation of WQS is performed.
If \code{rs = TRUE} then the random subset implementation of WQS is applied (see the "Details" and the
vignette for further infromation).}

\item{n_vars}{The number of mixture components to be included at each random subset step.
If \code{rs = TRUE} and \code{n_vars = NULL} then the square root of the number of elements
in the mixture is taken.}

\item{zilink}{Character specification of link function in the binary zero-inflation model
(you can choose among \code{"logit", "probit", "cloglog", "cauchit", "log"}).}

\item{seed}{An \code{integer} value to fix the seed, if it is equal to \code{NULL} no seed is chosen.}

\item{plan_strategy}{A character value that allows to choose the evaluation strategies for the
\code{plan} function. You can choose among "sequential", "transparent", "multisession", "multicore",
"multiprocess", "cluster" and "remote" (see \code{\link[future]{plan}} help page for more details).}

\item{optim.method}{A character identifying the method to be used by the \code{\link[stats]{optim}} function
(you can choose among \code{"BFGS", "Nelder-Mead", "CG", "SANN"}, \code{"BFGS"} is the default).
See \code{\link[stats]{optim}} for details.}

\item{control}{The control list of optimization parameters. See \code{\link[stats]{optim}} for details.}

\item{...}{Additional arguments to be passed to the function}

\item{rh}{Number of repeated holdout validations. This option is only available for \code{gwqsrh} function.}
}
\value{
\code{gwqs} return the results of the WQS regression as well as many other objects and datasets.

\item{fit}{The object that summarizes the output of the WQS model, reflecting a
linear, logistic, multinomial, Poisson, quasi-Poisson or negative binomial regression
depending on how the \code{family} parameter was specified.
The summary function can be used to call and print fit data (not for multinomial regression).}
\item{final_weights}{\code{data.frame} containing the final weights associated to each chemical.}
\item{conv}{Indicates whether the solver has converged (0) or not (1 or 2).}
\item{bres}{Matrix of estimated weights, mixture effect parameter estimates and the associated
standard errors, statistics and p-values estimated for each bootstrap iteration.}
\item{wqs}{Vector containing the wqs index for each subject.}
\item{qi}{List of the cutoffs used to divide in quantiles the variables in the mixture}
\item{bindex}{List of vectors containing the \code{rownames} of the subjects included in each
bootstrap dataset.}
\item{tindex}{Vector containing the rows used to estimate the weights in each bootstrap.}
\item{vindex}{Vector containing the rows used to estimate the parameters of the final model.}
\item{y_wqs_df}{\code{data.frame} containing the dependent variable values adjusted for the
residuals of a fitted model adjusted for covariates (original values when \code{family = binomial}
or \code{"multinomial"}) and the wqs index estimated values.}
\item{family}{The family specified.}
\item{call}{The matched call.}
\item{formula}{The formula supplied.}
\item{mix_name}{The vector of variable names used to identify the elements in the mixture.}
\item{q}{The method used to rank varibales included in the mixture.}
\item{n_levels}{The number of levels of the of the dependent variable when a multinomial regression is ran.}
\item{zero_infl}{If a zero inflated model was ran (\code{TRUE}) or not (\code{FALE})}
\item{zilink}{The chosen link function when a zero inflated model was ran.}
\item{levelnames}{The name of each level when a multinomial regression is ran.}
\item{data}{The data used in the WQS analysis.}
\item{objfn_values}{The vector of the b values of the objective function corresponding to the optima values}
\item{optim_messages}{The vector of character strings giving any additional information returned by the
optimizer, or NULL.}
\item{gwqslist}{List of the output from the \code{rh} WQS models.}
\item{coefmat}{Matrix containing the parameter estimates from each repeated holdout WQS model.}
\item{wmat}{Matrix containing the weight estimates from each repeated holdout WQS model.}
}
\description{
Fits Weighted Quantile Sum (WQS) regression  (Carrico et al. (2014) \doi{10.1007/s13253-014-0180-3}),
a random subset implementation of WQS (Curtin et al. (2019) \doi{10.1080/03610918.2019.1577971}) and
a repeated holdout validation WQS (Tanner et al. (2019) \doi{10.1016/j.mex.2019.11.008}) for continuous,
binomial, multinomial, Poisson, quasi-Poisson and negative binomial outcomes.
}
\details{
\code{gWQS} uses the \code{glm} function in the \bold{stats} package to fit the linear, logistic,
the Poisson and the quasi-Poisson regression, while the \code{glm.nb} function from the \bold{MASS}
package is used to fit the negative binomial regression respectively. The \code{nlm} function from
the \bold{stats} package was used to optimize the log-likelihood of the multinomial regression.\cr

The \code{\link[stats]{optim}} optimization function is used to estimate the weights at each
bootstrap step.\cr

The \code{seed} argument specifies a fixed seed through the \code{set.seed} function.\cr

The \code{rs} term allows to choose the type of methodology between the bootstrap implementation
(WQSBS) or the random subset implementation (WQSRS) of the WQS. The first method performs \code{b}
bootstrapped samples to estimate the weights while the second creates \code{b} randomly-selected
subset of the total predictor set. For further details please see the vignette
("How to use gWQS package") and the references below.
}
\examples{
# we save the names of the mixture variables in the variable
# "toxic_chems"
toxic_chems = names(wqs_data)[1:34]

# To run a linear model and save the results in the variable
# "results". This linear model (family = gaussian) will
# rank/standardize variables in deciles (q = 10), perform a
# 40/60 split of the data for training/validation
# (validation = 0.6), and estimate weights over 2 bootstrap
# samples (b = 2; in practical applications at least 100
# bootstraps should be used). Weights will be derived from
# mixture effect parameters that are positive (b1_pos = TRUE).
# A unique seed was specified (seed = 2016) so this model will
# be reproducible, and plots describing the variable weights
# and linear relationship will be generated as output
# (plots = TRUE). In the end tables describing the weights
# values and the model parameters with the respectively
# statistics are generated in the plots window (tables = TRUE):
results = gwqs(yLBX ~ wqs, mix_name = toxic_chems,
               data = wqs_data, q = 10, validation = 0.6,
               b = 2, b1_pos = TRUE, b1_constr = FALSE,
               family = gaussian, seed = 2016)

# to test the significance of the covariates
summary(results)

}
\references{
Carrico C, Gennings C, Wheeler D, Factor-Litvak P. Characterization of a weighted quantile sum
regression for highly correlated data in a risk analysis setting. J Biol Agricul Environ Stat.
2014:1-21. ISSN: 1085-7117. \doi{10.1007/s13253-014-0180-3}.\cr

Czarnota J, Gennings C, Colt JS, De Roos AJ, Cerhan JR, Severson RK, Hartge P, Ward MH,
Wheeler D. 2015. Analysis of environmental chemical mixtures and non-Hodgkin lymphoma risk in the
NCI-SEER NHL study. Environmental Health Perspectives, \doi{10.1289/ehp.1408630}.\cr

Czarnota J, Gennings C, Wheeler D. 2015. Assessment of weighted quantile sum regression for modeling
chemical mixtures and cancer risk. Cancer Informatics,
2015:14(S2) 159-171 \doi{10.4137/CIN.S17295}.\cr

Brunst KJ, Sanchez Guerra M, Gennings C, et al. Maternal Lifetime Stress and Prenatal Psychological
Functioning and Decreased Placental Mitochondrial DNA Copy Number in the PRISM Study.
Am J Epidemiol. 2017;186(11):1227-1236. \doi{10.1093/aje/kwx183}.\cr

Curtin P, Kellogg J, Cech N, Gennings C. 2019. A random subset implementation of weighted quantile
sum (WQSRS) regression for analysis of high-dimensional mixtures, Communications in Statistics -
Simulation and Computation. \doi{10.1080/03610918.2019.1577971}.\cr

Tanner EM, Bornehag CG, Gennings C. Repeated holdout validation for weighted quantile sum regression.
MethodsX. 2019 Nov 22;6:2855-2860. \doi{10.1016/j.mex.2019.11.008}.\cr
}
\seealso{
\link[stats]{glm}, \link[MASS]{glm.nb}, \link[nnet]{multinom}, \link[pscl]{zeroinfl}.
}
\author{
Stefano Renzetti, Paul Curtin, Allan C Just, Ghalib Bello, Chris Gennings
}
