% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/marginalmeans.R
\name{marginal_means}
\alias{marginal_means}
\title{Marginal Means}
\usage{
marginal_means(
  model,
  variables = NULL,
  variables_grid = NULL,
  vcov = TRUE,
  conf_level = 0.95,
  type = NULL,
  transform_post = NULL,
  cross = FALSE,
  hypothesis = NULL,
  df = Inf,
  wts = "equal",
  by = NULL,
  ...
)
}
\arguments{
\item{model}{Model object}

\item{variables}{character vector Categorical predictors over which to
compute marginal means. \code{NULL} calculates marginal means for all logical,
character, or factor variables in the dataset used to fit \code{model}. Set
\code{cross=TRUE} to compute marginal means at combinations of the
predictors specified in the \code{variables} argument.}

\item{variables_grid}{character vector Categorical predictors used to
construct the prediction grid over which adjusted predictions are averaged
(character vector). \code{NULL} creates a grid with all combinations of all
categorical predictors. This grid can be very large when there are many
variables and many response levels, so it is advisable to select a limited
number of variables in the \code{variables} and \code{variables_grid} arguments.}

\item{vcov}{Type of uncertainty estimates to report (e.g., for robust standard errors). Acceptable values:
\itemize{
\item FALSE: Do not compute standard errors. This can speed up computation considerably.
\item TRUE: Unit-level standard errors using the default \code{vcov(model)} variance-covariance matrix.
\item String which indicates the kind of uncertainty estimates to return.
\itemize{
\item Heteroskedasticity-consistent: \code{"HC"}, \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, \code{"HC5"}. See \code{?sandwich::vcovHC}
\item Heteroskedasticity and autocorrelation consistent: \code{"HAC"}
\item Mixed-Models degrees of freedom: "satterthwaite", "kenward-roger"
\item Other: \code{"NeweyWest"}, \code{"KernHAC"}, \code{"OPG"}. See the \code{sandwich} package documentation.
}
\item One-sided formula which indicates the name of cluster variables (e.g., \code{~unit_id}). This formula is passed to the \code{cluster} argument of the \code{sandwich::vcovCL} function.
\item Square covariance matrix
\item Function which returns a covariance matrix (e.g., \code{stats::vcov(model)})
}}

\item{conf_level}{numeric value between 0 and 1. Confidence level to use to build a confidence interval.}

\item{type}{string indicates the type (scale) of the predictions used to
compute marginal effects or contrasts. This can differ based on the model
type, but will typically be a string such as: "response", "link", "probs",
or "zero". When an unsupported string is entered, the model-specific list of
acceptable values is returned in an error message. When \code{type} is \code{NULL}, the
default value is used. This default is the first model-related row in
the \code{marginaleffects:::type_dictionary} dataframe. If \code{type} is \code{NULL} and
the default value is "response", the function tries to compute marginal means
on the link scale before backtransforming them using the inverse link function.}

\item{transform_post}{(experimental) A function applied to unit-level adjusted predictions and confidence intervals just before the function returns results. For bayesian models, this function is applied to individual draws from the posterior distribution, before computing summaries.}

\item{cross}{TRUE or FALSE
\itemize{
\item \code{FALSE} (default): Marginal means are computed for each predictor individually.
\item \code{TRUE}: Marginal means are computed for each combination of predictors specified in the \code{variables} argument.
}}

\item{hypothesis}{specify a hypothesis test or custom contrast using a numeric value, vector, or matrix, a string, or a string formula.
\itemize{
\item Numeric:
\itemize{
\item Single value: the null hypothesis used in the computation of Z and p (before applying \code{transform_post}).
\item Vector: Weights to compute a linear combination of (custom contrast between) estimates. Length equal to the number of rows generated by the same function call, but without the \code{hypothesis} argument.
\item Matrix: Each column is a vector of weights, as describe above, used to compute a distinct linear combination of (contrast between) estimates. The column names of the matrix are used as labels in the output.
}
\item String formula to specify linear or non-linear hypothesis tests. If the \code{term} column uniquely identifies rows, terms can be used in the formula. Otherwise, use \code{b1}, \code{b2}, etc. to identify the position of each parameter. Examples:
\itemize{
\item \code{hp = drat}
\item \code{hp + drat = 12}
\item \code{b1 + b2 + b3 = 0}
}
\item String:
\itemize{
\item "pairwise": pairwise differences between estimates in each row.
\item "reference": differences between the estimates in each row and the estimate in the first row.
\item "sequential": difference between an estimate and the estimate in the next row.
\item "revpairwise", "revreference", "revsequential": inverse of the corresponding hypotheses, as described above.
}
\item See the Examples section below and the vignette: https://vincentarelbundock.github.io/marginaleffects/articles/hypothesis.html
}}

\item{df}{Degrees of freedom used to compute p values and confidence intervals. A single numeric value between 1 and \code{Inf}. When \code{df} is \code{Inf}, the normal distribution is used. When \code{df} is finite, the \code{t} distribution is used. See \link[insight:get_df]{insight::get_df} for a convenient function to extract degrees of freedom. Ex: \code{slopes(model, df = insight::get_df(model))}}

\item{wts}{character value. Weights to use in the averaging.
\itemize{
\item "equal": each combination of variables in \code{variables_grid} gets an equal weight.
\item "cells": each combination of values for the variables in the \code{variables_grid} gets a weight proportional to its frequency in the original data.
\item "proportional": each combination of values for the variables in the \code{variables_grid} -- except for those in the \code{variables} argument -- gets a weight proportional to its frequency in the original data.
}}

\item{by}{Collapse marginal means into categories. Data frame with a \code{by} column of group labels, and merging columns shared by \code{newdata} or the data frame produced by calling the same function without the \code{by} argument.}

\item{...}{Additional arguments are passed to the \code{predict()} method
supplied by the modeling package.These arguments are particularly useful
for mixed-effects or bayesian models (see the online vignettes on the
\code{marginaleffects} website). Available arguments can vary from model to
model, depending on the range of supported arguments by each modeling
package. See the "Model-Specific Arguments" section of the
\code{?marginaleffects} documentation for a non-exhaustive list of available
arguments.}
}
\value{
Data frame of marginal means with one row per variable-value
combination.
}
\description{
Marginal means are adjusted predictions, averaged across a grid of categorical predictors,
holding other numeric predictors at their means. To learn more, read the marginal means vignette, visit the
package website, or scroll down this page for a full list of vignettes:
\itemize{
\item \url{https://vincentarelbundock.github.io/marginaleffects/articles/marginalmeans.html}
\item \url{https://vincentarelbundock.github.io/marginaleffects/}
}
}
\details{
This function begins by calling the \code{predictions} function to obtain a
grid of predictors, and adjusted predictions for each cell. The grid
includes all combinations of the categorical variables listed in the
\code{variables} and \code{variables_grid} arguments, or all combinations of the
categorical variables used to fit the model if \code{variables_grid} is \code{NULL}.
In the prediction grid, numeric variables are held at their means.

After constructing the grid and filling the grid with adjusted predictions,
\code{marginal_means} computes marginal means for the variables listed in the
\code{variables} argument, by average across all categories in the grid.

\code{marginal_means} can only compute standard errors for linear models, or for
predictions on the link scale, that is, with the \code{type} argument set to
"link".

The \code{marginaleffects} website compares the output of this function to the
popular \code{emmeans} package, which provides similar but more advanced
functionality: https://vincentarelbundock.github.io/marginaleffects/
}
\section{Model-Specific Arguments}{


Some model types allow model-specific arguments to modify the nature of
marginal effects, predictions, marginal means, and contrasts.\tabular{llll}{
   Package \tab Class \tab Argument \tab Documentation \cr
   \code{brms} \tab \code{brmsfit} \tab \code{ndraws} \tab \link[brms:posterior_predict.brmsfit]{brms::posterior_predict} \cr
    \tab  \tab \code{re_formula} \tab  \cr
   \code{lme4} \tab \code{merMod} \tab \code{include_random} \tab \link[insight:get_predicted]{insight::get_predicted} \cr
    \tab  \tab \code{re.form} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
    \tab  \tab \code{allow.new.levels} \tab \link[lme4:predict.merMod]{lme4::predict.merMod} \cr
   \code{glmmTMB} \tab \code{glmmTMB} \tab \code{re.form} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
    \tab  \tab \code{allow.new.levels} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
    \tab  \tab \code{zitype} \tab \link[glmmTMB:predict.glmmTMB]{glmmTMB::predict.glmmTMB} \cr
   \code{mgcv} \tab \code{bam} \tab \code{exclude} \tab \link[mgcv:predict.bam]{mgcv::predict.bam} \cr
   \code{robustlmm} \tab \code{rlmerMod} \tab \code{re.form} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
    \tab  \tab \code{allow.new.levels} \tab \link[robustlmm:rlmerMod-class]{robustlmm::predict.rlmerMod} \cr
}
}

\section{Bayesian posterior summaries}{


By default, credible intervals in bayesian models are built as equal-tailed
intervals. This can be changed to a highest density interval by setting a global
option:

\code{options("marginaleffects_posterior_interval" = "eti")}

\code{options("marginaleffects_posterior_interval" = "hdi")}

By default, the center of the posterior distribution in bayesian models is
identified by the median. Users can use a different summary function by setting a
global option:

\code{options("marginaleffects_posterior_center" = "mean")}

\code{options("marginaleffects_posterior_center" = "median")}

When estimates are averaged using the \code{by} argument, the \code{tidy()} function, or
the \code{summary()} function, the posterior distribution is marginalized twice over.
First, we take the average \emph{across} units but \emph{within} each iteration of the
MCMC chain, according to what the user requested in \code{by} argument or
\code{tidy()/summary()} functions. Then, we identify the center of the resulting
posterior using the function supplied to the
\code{"marginaleffects_posterior_center"} option (the median by default).
}

\examples{
library(marginaleffects)

# simple marginal means for each level of `cyl`
dat <- mtcars
dat$carb <- factor(dat$carb)
dat$cyl <- factor(dat$cyl)
dat$am <- as.logical(dat$am)
mod <- lm(mpg ~ carb + cyl + am, dat)

marginal_means(
  mod,
  variables = "cyl")

# collapse levels of cyl by averaging
by <- data.frame(
  cyl = c(4, 6, 8),
  by = c("4 & 6", "4 & 6", "8"))
marginal_means(mod,
  variables = "cyl",
  by = by)

# pairwise differences between collapsed levels
marginal_means(mod,
  variables = "cyl",
  by = by,
  hypothesis = "pairwise")

# cross
marginal_means(mod,
  variables = c("cyl", "carb"),
  cross = TRUE)

# collapsed cross
by <- expand.grid(
  cyl = unique(mtcars$cyl),
  carb = unique(mtcars$carb))
by$by <- ifelse(
  by$cyl == 4,
  paste("Control:", by$carb),
  paste("Treatment:", by$carb))


# Convert numeric variables to categorical before fitting the model
dat <- mtcars
dat$am <- as.logical(dat$am)
dat$carb <- as.factor(dat$carb)
mod <- lm(mpg ~ hp + am + carb, data = dat)

# Compute and summarize marginal means
marginal_means(mod)

# Contrast between marginal means (carb2 - carb1), or "is the 1st marginal means equal to the 2nd?"
# see the vignette on "Hypothesis Tests and Custom Contrasts" on the `marginaleffects` website.
lc <- c(-1, 1, 0, 0, 0, 0)
marginal_means(mod, variables = "carb", hypothesis = "b2 = b1")

marginal_means(mod, variables = "carb", hypothesis = lc)

# Multiple custom contrasts
lc <- matrix(c(
    -2, 1, 1, 0, -1, 1,
    -1, 1, 0, 0, 0, 0
    ),
  ncol = 2,
  dimnames = list(NULL, c("A", "B")))
marginal_means(mod, variables = "carb", hypothesis = lc)

}
