% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/optimize_controls.R
\name{optimize_controls}
\alias{optimize_controls}
\title{Select control units that optimize covariate balance}
\usage{
optimize_controls(
  z,
  X,
  st,
  importances = NULL,
  treated = 1,
  ratio = NULL,
  q_s = NULL,
  treated_star = NULL,
  q_star_s = NULL,
  weight_star = 1,
  integer = FALSE,
  solver = "Rglpk",
  seed = NULL,
  runs = 1,
  time_limit = Inf,
  threads = 1,
  correct_sizes = TRUE,
  low_memory = FALSE
)
}
\arguments{
\item{z}{a factor with the \code{i}th entry equal to the treatment of unit \code{i}.}

\item{X}{a matrix or data frame containing constraints in the columns. The number
of rows should equal the length of \code{z}. Balance is achieved when a constraint
sums to 0, such that numbers closer to 0 are better. When a constraint
does not apply to a particular unit, the entry should be \code{NA}.
This should typically be generated using \code{\link{generate_constraints}()}.}

\item{st}{a stratum vector with the \code{i}th entry equal to the
stratum of unit \code{i}. This should have the same order of units and length
as \code{z}.}

\item{importances}{a vector with length equal to the number of constraints or columns
in \code{X}. This can be generated using \code{\link{generate_constraints}()} and each nonnegative value
denotes how much to prioritize each constraint, with the default being 1
for all constraints.}

\item{treated}{which treatment value should be considered the treated units. This
must be one of the values of \code{z}.}

\item{ratio}{a numeric or vector specifying the desired ratio of controls to `treated` in
each stratum. If there is one control group and all treated units should be included,
this can be a numeric. Otherwise, this should be
a vector with one entry per treatment group, in the same order as the levels of
\code{z}, including the treated level. If \code{NULL}, \code{q_s} should be specified.}

\item{q_s}{a named vector or matrix indicating how many units are to be selected from each stratum.
If there is one control group and all treated units are desired, this can be a vector; otherwise,
this should have one row per treatment group, where the order of the rows matches the order of
the levels of \code{z}, including the treated level.
If \code{NULL}, \code{ratio} should be specified. If both are specified, \code{q_s} will take priority.
Typically, if the desired ratio is not feasible for every stratum, \code{q_s} should be generated
using \code{\link{generate_qs}()}.}

\item{treated_star}{which treatment value should be considered the treated units
for the supplemental comparison. This
must be one of the values of \code{z}.
 If multiple supplemental comparisons are desired, this should be a vector with one entry per supplemental
  comparison.}

\item{q_star_s}{a named vector or matrix,
indicating how many supplemental units are to be selected from each stratum.
The matrix should have one row per treatment group, where the order of the rows matches the order of
the levels of \code{z}, including the treated level.
If multiple supplemental comparisons are desired, this should be a list with one entry per supplemental
comparison.}

\item{weight_star}{a numeric stating how much to prioritize balance between the supplemental units as
compared to balance between the main units.
  If multiple supplemental comparisons are desired, this should be a vector with one entry per supplemental
  comparison.}

\item{integer}{a logical stating whether to use a mixed integer programming solver
instead of randomized rounding. Default is \code{FALSE}.}

\item{solver}{a character stating which solver to use to run the linear program.
Options are "Rglpk" (default) or "gurobi". You must have the 'gurobi' package
installed to use the "gurobi" option. If available, this is the recommended solver.}

\item{seed}{the seed to use when doing the randomized rounding of the linear program.
This will allow results to be reproduced if desired. The default is \code{NULL},
which will choose a random seed to use and return.}

\item{runs}{the number of times to run randomized rounding of the linear solution.
The objective values of all runs will be reported, but the detailed results
will only be reported for the run with the lowest objective value. The default is 1.}

\item{time_limit}{numeric stating maximum amount of seconds for which the
program is allowed to run before aborting. Default is \code{Inf} for no time limit.}

\item{threads}{The maximum number of threads that should be used. This is only
applicable if \code{solver = 'gurobi'}.}

\item{correct_sizes}{boolean stating whether the desired sample sizes should
be exactly correct (if \code{correct_sizes = TRUE}) or only need to be correct
in expectation. For multiple comparisons, sample sizes may only be
correct in expectation.}

\item{low_memory}{boolean stating whether some outputs should not be included
due to the scale of the problem being too large compared to memory space.
If \code{TRUE}, \code{eps} and \code{eps_star} will not be reported. Imbalances
can be computed post hoc using the \code{\link{check_balance}()} instead.}
}
\value{
List containing:
\describe{
  \item{\code{objective}}{objective value of the randomized rounding or mixed integer
    linear program solution.}
  \item{\code{objective_wo_importances}}{objective value of the randomized rounding or mixed integer
    linear program solution not weighted by the variable importances.}
  \item{\code{eps}}{the amount of imbalance obtained in each constraint from the linear program.
  The row names specify the covariate, the population of interest, and, if there are
  more than two comparison groups, which groups are being compared.}
  \item{\code{eps_star}}{same as \code{eps} but for the supplemental units instead of the units
  in the main comparison. If there are multiple supplemental comparisons, this is a list.
  If there are none, this is \code{NULL}.}
  \item{\code{importances}}{the importance of each on the balance constraints.}
  \item{\code{weight_star}}{the importance of balancing in the supplemental comparison
  relative to the main comparison. If there are multiple supplemental comparisons,
  this is a vector. If there are none, this is \code{NULL}.}
  \item{\code{selected}}{whether each unit was selected for the main comparison.}
  \item{\code{selected_star}}{whether each unit was selected for the supplement.
  If there are multiple supplemental comparisons, this is a list.
  If there are none, this is \code{NULL}.}
  \item{\code{pr}}{the linear program weight assigned to each unit for the main comparison.}
  \item{\code{pr_star}}{the linear program weight assigned to each unit for the supplement.
  If there are multiple supplemental comparisons, this is a list.
  If there are none, this is \code{NULL}.}
  \item{\code{rrdetails}}{A list containing:
  \describe{
  \item{\code{seed}}{the seed used before commencing the random sampling.}
  \item{\code{run_objectives}}{the objective values for each run of randomized rounding.}
  \item{\code{run_objectives_wo_importances}}{the objective values for each run of randomized rounding,
  not scaled by constraint importances.}
  }}
  \item{\code{lpdetails}}{the full return of the function \code{\link[Rglpk]{Rglpk_solve_LP}()}
    or \code{gurobi()} plus information about the epsilons and objective values
    for the linear program solution.}
}
}
\description{
Select control units within strata that optimize covariate balance.
Uses randomized rounding of a linear program or a mixed
integer linear program.
}
\examples{

data('nh0506')

# Create strata
age_cat <- cut(nh0506$age,
               breaks = c(19, 39, 50, 85),
               labels = c('< 40 years', '40 - 50 years', '> 50 years'))
strata <- age_cat : nh0506$sex

# Balance age, race, education, poverty ratio, and bmi both across and within the levels of strata
constraints <- generate_constraints(
                 balance_formulas = list(age + race + education + povertyr + bmi ~ 1 + strata),
                 z = nh0506$z,
                 data = nh0506)

# Choose one control for every treated unit in each stratum,
# balancing the covariates as described by the constraints
results <- optimize_controls(z = nh0506$z,
                             X = constraints$X,
                             st = strata,
                             importances = constraints$importances,
                             ratio = 1)

# If you want to use a ratio that's not feasible,
# you can supply a vector of the desired number of controls per stratum, q_s,
# typically generated by creating a distance matrix between strata and using
# generate_qs():

\dontrun{
age_dist <- matrix(data = c(0, 1, 2, 1, 0, 1, 2, 1, 0),
                   nrow = 3,
                   byrow = TRUE,
                   dimnames = list(levels(age_cat), levels(age_cat)))

sex_dist <- matrix(data = c(0, 1, 1, 0),
                   nrow = 2,
                   dimnames = list(levels(nh0506$sex), levels(nh0506$sex)))

strata_dist <- create_dist_matrix(age_dist, sex_dist)

qs <- generate_qs(z = nh0506$z,
                  st = strata,
                  ratio = 2.5,
                  max_ratio = 2.6,
                  max_extra_s = 0,
                  strata_dist = strata_dist)

results <- optimize_controls(z = nh0506$z,
                             X = constraints$X,
                             st = strata,
                             importances = constraints$importances,
                             q_s = qs)

}

# We can also have multiple treatment and control groups,
# as well as multiple simultaneous comparisons:

\dontrun{
data('nh0506_3groups')
strata2 <- cut(nh0506_3groups$age, breaks = c(19, 39, 50, 85),
              labels = c('< 40 years', '40 - 50 years', '> 50 years'))
constraints2 <- generate_constraints(
  balance_formulas = list(age + race + education + povertyr + bmi + sex ~ 1 + strata2),
  z = nh0506_3groups$z,
  data = nh0506_3groups,
  treated = 'daily smoker')
q_star_s <- matrix(c(rep(table(nh0506_3groups$z, strata2)['some smoking', ] -
                           table(nh0506_3groups$z, strata2)['daily smoker', ], 2),
                     rep(0, 3)), byrow = TRUE, nrow = 3,
                   dimnames = list(levels(nh0506_3groups$z), levels(strata2)))

results <- optimize_controls(z = nh0506_3groups$z,
                             X = constraints2$X,
                             importances = constraints2$importances,
                             st = strata2,
                             ratio = 1,
                             treated = 'daily smoker',
                             treated_star = 'some smoking',
                             q_star_s = q_star_s,
                             correct_sizes = FALSE)

}
}
