% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sparseLTS.R
\name{sparseLTS}
\alias{sparseLTS}
\alias{print.sparseLTS}
\alias{sparseLTS.formula}
\alias{sparseLTS.default}
\title{Sparse least trimmed squares regression}
\usage{
sparseLTS(x, ...)

\method{sparseLTS}{formula}(formula, data, ...)

\method{sparseLTS}{default}(
  x,
  y,
  lambda,
  mode = c("lambda", "fraction"),
  alpha = 0.75,
  normalize = TRUE,
  intercept = TRUE,
  nsamp = c(500, 10),
  initial = c("sparse", "hyperplane", "random"),
  ncstep = 2,
  use.correction = TRUE,
  tol = .Machine$double.eps^0.5,
  eps = .Machine$double.eps,
  use.Gram,
  crit = c("BIC", "PE"),
  splits = foldControl(),
  cost = rtmspe,
  costArgs = list(),
  selectBest = c("hastie", "min"),
  seFactor = 1,
  ncores = 1,
  cl = NULL,
  seed = NULL,
  model = TRUE,
  ...
)
}
\arguments{
\item{x}{a numeric matrix containing the predictor variables.}

\item{\dots}{additional arguments to be passed down.}

\item{formula}{a formula describing the model.}

\item{data}{an optional data frame, list or environment (or object coercible
to a data frame by \code{\link{as.data.frame}}) containing the variables in
the model.  If not found in data, the variables are taken from
\code{environment(formula)}, typically the environment from which
\code{sparseLTS} is called.}

\item{y}{a numeric vector containing the response variable.}

\item{lambda}{a numeric vector of non-negative values to be used as penalty
parameter.}

\item{mode}{a character string specifying the type of penalty parameter.  If
\code{"lambda"}, \code{lambda} gives the grid of values for the penalty
parameter directly.  If \code{"fraction"}, the smallest value of the penalty
parameter that sets all coefficients to 0 is first estimated based on
bivariate winsorization, then \code{lambda} gives the fractions of that
estimate to be used (hence all values of \code{lambda} should be in the
interval [0,1] in that case).}

\item{alpha}{a numeric value giving the percentage of the residuals for
which the \eqn{L_{1}}{L1} penalized sum of squares should be minimized (the
default is 0.75).}

\item{normalize}{a logical indicating whether the predictor variables
should be normalized to have unit \eqn{L_{2}}{L2} norm (the default is
\code{TRUE}).  Note that normalization is performed on the subsamples
rather than the full data set.}

\item{intercept}{a logical indicating whether a constant term should be
included in the model (the default is \code{TRUE}).}

\item{nsamp}{a numeric vector giving the number of subsamples to be used in
the two phases of the algorithm.  The first element gives the number of
initial subsamples to be used.  The second element gives the number of
subsamples to keep after the first phase of \code{ncstep} C-steps.  For
those remaining subsets, additional C-steps are performed until
convergence.  The default is to first perform \code{ncstep} C-steps on 500
initial subsamples, and then to keep the 10 subsamples with the lowest value
of the objective function for additional C-steps until convergence.}

\item{initial}{a character string specifying the type of initial subsamples
to be used.  If \code{"sparse"}, the lasso fit given by three randomly
selected data points is first computed.  The corresponding initial subsample
is then formed by the fraction \code{alpha} of data points with the smallest
squared residuals.  Note that this is optimal from a robustness point of
view, as the probability of including an outlier in the initial lasso fit is
minimized.  If \code{"hyperplane"}, a hyperplane through \eqn{p} randomly
selected data points is first computed, where \eqn{p} denotes the number of
variables.  The corresponding initial subsample is then again formed by the
fraction \code{alpha} of data points with the smallest squared residuals.
Note that this cannot be applied if \eqn{p} is larger than the number of
observations.  Nevertheless, the probability of including an outlier
increases with increasing dimension \eqn{p}.  If \code{"random"}, the
initial subsamples are given by a fraction \code{alpha} of randomly
selected data points.  Note that this leads to the largest probability of
including an outlier.}

\item{ncstep}{a positive integer giving the number of C-steps to perform on
all subsamples in the first phase of the algorithm (the default is to
perform two C-steps).}

\item{use.correction}{currently ignored.  Small sample correction factors
may be added in the future.}

\item{tol}{a small positive numeric value giving the tolerance for
convergence.}

\item{eps}{a small positive numeric value used to determine whether the
variability within a variable is too small (an effective zero).}

\item{use.Gram}{a logical indicating whether the Gram matrix of the
explanatory variables should be precomputed in the lasso fits on the
subsamples.  If the number of variables is large, computation may be faster
when this is set to \code{FALSE}.  The default is to use \code{TRUE} if the
number of variables is smaller than the number of observations in the
subsamples and smaller than 100, and \code{FALSE} otherwise.}

\item{crit}{a character string specifying the optimality criterion to be
used for selecting the final model.  Possible values are \code{"BIC"} for
the Bayes information criterion and \code{"PE"} for resampling-based
prediction error estimation.  This is ignored if \code{lambda} contains
only one value of the penalty parameter, as selecting the optimal value
is trivial in that case.}

\item{splits}{an object giving data splits to be used for prediction error
estimation (see \code{\link[perry]{perryTuning}}).  This is only relevant
if selecting the optimal \code{lambda} via prediction error estimation.}

\item{cost}{a cost function measuring prediction loss (see
\code{\link[perry]{perryTuning}} for some requirements).  The
default is to use the root trimmed mean squared prediction error
(see \code{\link[perry]{cost}}).  This is only relevant if selecting
the optimal \code{lambda} via prediction error estimation.}

\item{costArgs}{a list of additional arguments to be passed to the
prediction loss function \code{cost}.  This is only relevant if
selecting the optimal \code{lambda} via prediction error estimation.}

\item{selectBest, seFactor}{arguments specifying a criterion for selecting
the best model (see \code{\link[perry]{perryTuning}}).  The default is to
use a one-standard-error rule.  This is only relevant if selecting the
optimal \code{lambda} via prediction error estimation.}

\item{ncores}{a positive integer giving the number of processor cores to be
used for parallel computing (the default is 1 for no parallelization).  If
this is set to \code{NA}, all available processor cores are used.  For
prediction error estimation, parallel computing is implemented on the \R
level using package \pkg{parallel}.  Otherwise parallel computing is
implemented on the C++ level  via OpenMP (\url{https://www.openmp.org/}).}

\item{cl}{a \pkg{parallel} cluster for parallel computing as generated by
\code{\link[parallel]{makeCluster}}.  This is preferred over \code{ncores}
for prediction error estimation, in which case \code{ncores} is only used on
the C++ level for computing the final model.}

\item{seed}{optional initial seed for the random number generator (see
\code{\link{.Random.seed}}).  On parallel \R worker processes for prediction
error estimation, random number streams are used and the seed is set via
\code{\link{clusterSetRNGStream}}.}

\item{model}{a logical indicating whether the data \code{x} and \code{y}
should be added to the return object.  If \code{intercept} is \code{TRUE},
a column of ones is added to \code{x} to account for the intercept.}
}
\value{
If \code{crit} is \code{"PE"} and \code{lambda} contains more than one
value of the penalty parameter, an object of class \code{"perrySparseLTS"}
(inheriting from class \code{"perryTuning"}, see
\code{\link[perry]{perryTuning}}).  It contains information on the
prediction error criterion, and includes the final model with the optimal
tuning paramter as component \code{finalModel}.

Otherwise an object of class \code{"sparseLTS"} with the following
components:
\describe{
  \item{\code{lambda}}{a numeric vector giving the values of the penalty
  parameter.}
  \item{\code{best}}{an integer vector or matrix containing the respective
  best subsets of \eqn{h} observations found and used for computing the raw
  estimates.}
  \item{\code{objective}}{a numeric vector giving the respective values of
  the sparse LTS objective function, i.e., the \eqn{L_{1}}{L1} penalized
  sums of the \eqn{h} smallest squared residuals from the raw fits.}
  \item{\code{coefficients}}{a numeric vector or matrix containing the
  respective coefficient estimates from the reweighted fits.}
  \item{\code{fitted.values}}{a numeric vector or matrix containing the
  respective fitted values of the response from the reweighted fits.}
  \item{\code{residuals}}{a numeric vector or matrix containing the
  respective residuals from the reweighted fits.}
  \item{\code{center}}{a numeric vector giving the robust center estimates
  of the corresponding reweighted residuals.}
  \item{\code{scale}}{a numeric vector giving the robust scale estimates of
  the corresponding reweighted residuals.}
  \item{\code{cnp2}}{a numeric vector giving the respective consistency
  factors applied to the scale estimates of the reweighted residuals.}
  \item{\code{wt}}{an integer vector or matrix containing binary weights
  that indicate outliers from the respective reweighted fits, i.e., the
  weights are \eqn{1} for observations with reasonably small reweighted
  residuals and \eqn{0} for observations with large reweighted residuals.}
  \item{\code{df}}{an integer vector giving the respective degrees of
  freedom of the obtained reweighted model fits, i.e., the number of
  nonzero coefficient estimates.}
  \item{\code{intercept}}{a logical indicating whether the model includes a
  constant term.}
  \item{\code{alpha}}{a numeric value giving the percentage of the residuals
  for which the \eqn{L_{1}}{L1} penalized sum of squares was minimized.}
  \item{\code{quan}}{the number \eqn{h} of observations used to compute the
  raw estimates.}
  \item{\code{raw.coefficients}}{a numeric vector or matrix containing the
  respective coefficient estimates from the raw fits.}
  \item{\code{raw.fitted.values}}{a numeric vector or matrix containing the
  respective fitted values of the response from the raw fits.}
  \item{\code{raw.residuals}}{a numeric vector or matrix containing the
  respective residuals from the raw fits.}
  \item{\code{raw.center}}{a numeric vector giving the robust center
  estimates of the corresponding raw residuals.}
  \item{\code{raw.scale}}{a numeric vector giving the robust scale estimates
  of the corresponding raw residuals.}
  \item{\code{raw.cnp2}}{a numeric value giving the consistency factor
  applied to the scale estimate of the raw residuals.}
  \item{\code{raw.wt}}{an integer vector or matrix containing binary weights
  that indicate outliers from the respective raw fits, i.e., the weights
  used for the reweighted fits.}
  \item{\code{crit}}{an object of class \code{"bicSelect"} containing the
  BIC values and indicating the final model (only returned if argument
  \code{crit} is \code{"BIC"} and argument \code{lambda} contains more
  than one value for the penalty parameter).}
  \item{\code{x}}{the predictor matrix (if \code{model} is \code{TRUE}).}
  \item{\code{y}}{the response variable (if \code{model} is \code{TRUE}).}
  \item{\code{call}}{the matched function call.}
}
}
\description{
Compute least trimmed squares regression with an \eqn{L_{1}}{L1} penalty on
the regression coefficients, which allows for sparse model estimates.
}
\note{
The underlying C++ code uses the C++ library Armadillo.  From package
version 0.6.0, the back end for sparse least trimmed squares from package
\pkg{sparseLTSEigen}, which uses the C++ library Eigen, is no longer
supported and can no longer be used.

Parallel computing is implemented via OpenMP (\url{https://www.openmp.org/}).
}
\examples{
## generate data
# example is not high-dimensional to keep computation time low
library("mvtnorm")
set.seed(1234)  # for reproducibility
n <- 100  # number of observations
p <- 25   # number of variables
beta <- rep.int(c(1, 0), c(5, p-5))  # coefficients
sigma <- 0.5      # controls signal-to-noise ratio
epsilon <- 0.1    # contamination level
Sigma <- 0.5^t(sapply(1:p, function(i, j) abs(i-j), 1:p))
x <- rmvnorm(n, sigma=Sigma)    # predictor matrix
e <- rnorm(n)                   # error terms
i <- 1:ceiling(epsilon*n)       # observations to be contaminated
e[i] <- e[i] + 5                # vertical outliers
y <- c(x \%*\% beta + sigma * e)  # response
x[i,] <- x[i,] + 5              # bad leverage points

## fit sparse LTS model for one value of lambda
sparseLTS(x, y, lambda = 0.05, mode = "fraction")

## fit sparse LTS models over a grid of values for lambda
frac <- seq(0.2, 0.05, by = -0.05)
sparseLTS(x, y, lambda = frac, mode = "fraction")
}
\references{
Alfons, A., Croux, C. and Gelper, S. (2013) Sparse least trimmed squares
regression for analyzing high-dimensional large data sets. \emph{The Annals
of Applied Statistics}, \bold{7}(1), 226--248.
}
\seealso{
\code{\link[=coef.sparseLTS]{coef}},
\code{\link[=fitted.sparseLTS]{fitted}},
\code{\link[=plot.sparseLTS]{plot}},
\code{\link[=predict.sparseLTS]{predict}},
\code{\link[=residuals.sparseLTS]{residuals}},
\code{\link[=weights.sparseLTS]{weights}},
\code{\link[robustbase]{ltsReg}}
}
\author{
Andreas Alfons
}
\keyword{regression}
\keyword{robust}
