% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/explanation.R
\name{explain}
\alias{explain}
\alias{explain.empirical}
\alias{explain.gaussian}
\alias{explain.copula}
\alias{explain.combined}
\title{Explain the output of machine learning models with more accurately estimated Shapley values}
\usage{
explain(x, explainer, approach, prediction_zero, ...)

\method{explain}{empirical}(
  x,
  explainer,
  approach,
  prediction_zero,
  type = "fixed_sigma",
  fixed_sigma_vec = 0.1,
  n_samples_aicc = 1000,
  eval_max_aicc = 20,
  start_aicc = 0.1,
  w_threshold = 0.95,
  ...
)

\method{explain}{gaussian}(
  x,
  explainer,
  approach,
  prediction_zero,
  mu = NULL,
  cov_mat = NULL,
  ...
)

\method{explain}{copula}(x, explainer, approach, prediction_zero, ...)

\method{explain}{combined}(
  x,
  explainer,
  approach,
  prediction_zero,
  mu = NULL,
  cov_mat = NULL,
  ...
)
}
\arguments{
\item{x}{A matrix or data.frame. Contains the the features, whose
predictions ought to be explained (test data).}

\item{explainer}{An \code{explainer} object to use for explaining the observations.
See \code{\link{shapr}}.}

\item{approach}{Character vector of length \code{1} or \code{n_features}.
\code{n_features} equals the total number of features in the model. All elements should
either be \code{"gaussian"}, \code{"copula"} or \code{"empirical"}. See details for more information.}

\item{prediction_zero}{Numeric. The prediction value for unseen data, typically equal to the mean of
the response.}

\item{...}{Additional arguments passed to \code{\link{prepare_data}}}

\item{type}{Character. Should be equal to either \code{"independence"},
\code{"fixed_sigma"}, \code{"AICc_each_k"} or \code{"AICc_full"}.}

\item{fixed_sigma_vec}{Numeric. Represents the kernel bandwidth. Note that this argument is only
applicable when \code{approach = "empirical"}, and \code{type = "fixed_sigma"}}

\item{n_samples_aicc}{Positive integer. Number of samples to consider in AICc optimization.
Note that this argument is only applicable when \code{approach = "empirical"}, and \code{type}
is either equal to \code{"AICc_each_k"} or \code{"AICc_full"}}

\item{eval_max_aicc}{Positive integer. Maximum number of iterations when
optimizing the AICc. Note that this argument is only applicable when
\code{approach = "empirical"}, and \code{type} is either equal to
\code{"AICc_each_k"} or \code{"AICc_full"}}

\item{start_aicc}{Numeric. Start value of \code{sigma} when optimizing the AICc. Note that this argument
is only applicable when \code{approach = "empirical"}, and \code{type} is either equal to
\code{"AICc_each_k"} or \code{"AICc_full"}}

\item{w_threshold}{Positive integer between 0 and 1.}

\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
If \code{NULL} the expected values are estimated from the data. Note that this is only used
when \code{approach = "gaussian"}.}

\item{cov_mat}{Numeric matrix. (Optional) Containing the covariance matrix of the data
generating distribution. \code{NULL} means it is estimated from the data if needed
(in the Gaussian approach).}
}
\value{
Object of class \code{c("shapr", "list")}. Contains the following items:
\describe{
  \item{dt}{data.table}
  \item{model}{Model object}
  \item{p}{Numeric vector}
  \item{x_test}{data.table}
}

Note that the returned items \code{model}, \code{p} and \code{x_test} are mostly added due
to the implementation of \code{plot.shapr}. If you only want to look at the numerical results
it is sufficient to focus on \code{dt}. \code{dt} is a data.table where the number of rows equals
the number of observations you'd like to explain, and the number of columns equals \code{m +1},
where \code{m} equals the total number of features in your model.

If \code{dt[i, j + 1] > 0} it indicates that the j-th feature increased the prediction for
the i-th observation. Likewise, if \code{dt[i, j + 1] < 0} it indicates that the j-th feature
decreased the prediction for the i-th observation. The magnitude of the value is also important
to notice. E.g. if \code{dt[i, k + 1]} and \code{dt[i, j + 1]} are greater than \code{0},
where \code{j != k}, and \code{dt[i, k + 1]} > \code{dt[i, j + 1]} this indicates that feature
\code{j} and \code{k} both increased the value of the prediction, but that the effect of the k-th
feature was larger than the j-th feature.

The first column in \code{dt}, called `none`, is the prediction value not assigned to any of the features
(\ifelse{html}{\eqn{\phi}\out{<sub>0</sub>}}{\eqn{\phi_0}}).
It's equal for all observations and set by the user through the argument \code{prediction_zero}.
In theory this value should be the expected prediction without conditioning on any features.
Typically we set this value equal to the mean of the response variable in our training data, but other choices
such as the mean of the predictions in the training data are also reasonable.
}
\description{
Explain the output of machine learning models with more accurately estimated Shapley values
}
\details{
The most important thing to notice is that \code{shapr} has implemented three different
approaches for estimating the conditional distributions of the data, namely \code{"empirical"},
\code{"gaussian"} and \code{"copula"}.

In addition to this the user will also have the option of combining the three approaches.
E.g. if you're in a situation where you have trained a model the consists of 10 features,
and you'd like to use the \code{"gaussian"} approach when you condition on a single feature,
the \code{"empirical"} approach if you condition on 2-5 features, and \code{"copula"} version
if you condition on more than 5 features this can be done by simply passing
\code{approach = c("gaussian", rep("empirical", 4), rep("copula", 5))}. If
\code{"approach[i]" = "gaussian"} it means that you'd like to use the \code{"gaussian"} approach
when conditioning on \code{i} features.
}
\examples{
# Load example data
data("Boston", package = "MASS")

# Split data into test- and training data
x_train <- head(Boston, -3)
x_test <- tail(Boston, 3)

# Fit a linear model
model <- lm(medv ~ lstat + rm + dis + indus, data = x_train)

# Create an explainer object
explainer <- shapr(x_train, model)

# Explain predictions
p <- mean(x_train$medv)

# Empirical approach
explain1 <- explain(x_test, explainer, approach = "empirical", prediction_zero = p, n_samples = 1e2)

# Gaussian approach
explain2 <- explain(x_test, explainer, approach = "gaussian", prediction_zero = p, n_samples = 1e2)

# Gaussian copula approach
explain3 <- explain(x_test, explainer, approach = "copula", prediction_zero = p, n_samples = 1e2)

# Combined approach
approach <- c("gaussian", "gaussian", "empirical", "empirical")
explain4 <- explain(x_test, explainer, approach = approach, prediction_zero = p, n_samples = 1e2)

# Print the Shapley values
print(explain1$dt)

# Plot the results
plot(explain1)
}
\author{
Camilla Lingjaerde, Nikolai Sellereite
}
