% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DeepLift.R
\name{DeepSHAP}
\alias{DeepSHAP}
\title{Deep Shapley additive explanations (DeepSHAP)}
\description{
The \emph{DeepSHAP} method extends the \code{\link{DeepLift}} technique by not only
considering a single reference value but by calculating the average
from several, ideally representative reference values at each layer. The
obtained feature-wise results are approximate Shapley values for the
chosen output, where the conditional expectation is computed using these
different reference values, i.e., the \emph{DeepSHAP} method decompose the
difference from the prediction and the mean prediction \eqn{f(x) - E[f(\tilde{x})]}
in feature-wise effects. The reference values can be passed by the argument
\code{data_ref}.

The R6 class can also be initialized using the \code{\link{run_deepshap}} function
as a helper function so that no prior knowledge of R6 classes is required.
}
\examples{
\dontshow{if (torch::torch_is_installed()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
#----------------------- Example 1: Torch ----------------------------------
library(torch)

# Create nn_sequential model and data
model <- nn_sequential(
  nn_linear(5, 12),
  nn_relu(),
  nn_linear(12, 2),
  nn_softmax(dim = 2)
)
data <- torch_randn(25, 5)

# Create a reference dataset for the estimation of the conditional
# expectation
ref <- torch_randn(5, 5)

# Create Converter
converter <- convert(model, input_dim = c(5))

# Apply method DeepSHAP
deepshap <- DeepSHAP$new(converter, data, data_ref = ref)

# You can also use the helper function `run_deepshap` for initializing
# an R6 DeepSHAP object
deepshap <- run_deepshap(converter, data, data_ref = ref)

# Print the result as a torch tensor for first two data points
get_result(deepshap, "torch.tensor")[1:2]

# Plot the result for both classes
plot(deepshap, output_idx = 1:2)

# Plot the boxplot of all datapoints and for both classes
boxplot(deepshap, output_idx = 1:2)

# ------------------------- Example 2: Neuralnet ---------------------------
if (require("neuralnet")) {
  library(neuralnet)
  data(iris)

  # Train a neural network
  nn <- neuralnet((Species == "setosa") ~ Petal.Length + Petal.Width,
    iris,
    linear.output = FALSE,
    hidden = c(3, 2), act.fct = "tanh", rep = 1
  )

  # Convert the model
  converter <- convert(nn)

  # Apply DeepSHAP with rescale-rule and a 100 (default of `limit_ref`)
  # instances as the reference dataset
  deepshap <- run_deepshap(converter, iris[, c(3, 4)],
                           data_ref = iris[, c(3, 4)])

  # Get the result as a dataframe and show first 5 rows
  get_result(deepshap, type = "data.frame")[1:5, ]

  # Plot the result for the first datapoint in the data
  plot(deepshap, data_idx = 1)

  # Plot the result as boxplots
  boxplot(deepshap)
}
\dontshow{\}) # examplesIf}
\dontshow{if (torch::torch_is_installed()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
# ------------------------- Example 3: Keras -------------------------------
if (require("keras") & keras::is_keras_available()) {
  library(keras)

  # Make sure keras is installed properly
  is_keras_available()

  data <- array(rnorm(10 * 32 * 32 * 3), dim = c(10, 32, 32, 3))

  model <- keras_model_sequential()
  model \%>\%
    layer_conv_2d(
      input_shape = c(32, 32, 3), kernel_size = 8, filters = 8,
      activation = "softplus", padding = "valid") \%>\%
    layer_conv_2d(
      kernel_size = 8, filters = 4, activation = "tanh",
      padding = "same") \%>\%
    layer_conv_2d(
      kernel_size = 4, filters = 2, activation = "relu",
      padding = "valid") \%>\%
    layer_flatten() \%>\%
    layer_dense(units = 64, activation = "relu") \%>\%
    layer_dense(units = 16, activation = "relu") \%>\%
    layer_dense(units = 2, activation = "softmax")

  # Convert the model
  converter <- convert(model)

  # Apply the DeepSHAP method with zero baseline (wich is equivalent to
  # DeepLift with zero baseline)
  deepshap <- run_deepshap(converter, data, channels_first = FALSE)

  # Plot the result for the first image and both classes
  plot(deepshap, output_idx = 1:2)

  # Plot the pixel-wise median of the results
  plot_global(deepshap, output_idx = 1)
}
\dontshow{\}) # examplesIf}
\dontshow{if (torch::torch_is_installed() & Sys.getenv("RENDER_PLOTLY", unset = 0) == 1) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
#------------------------- Plotly plots ------------------------------------
if (require("plotly")) {
  # You can also create an interactive plot with plotly.
  # This is a suggested package, so make sure that it is installed
  library(plotly)
  boxplot(deepshap, as_plotly = TRUE)
}
\dontshow{\}) # examplesIf}
}
\references{
S. Lundberg & S. Lee (2017) \emph{A unified approach to interpreting model
predictions.}  NIPS 2017, p. 4768–4777
}
\seealso{
Other methods: 
\code{\link{ConnectionWeights}},
\code{\link{DeepLift}},
\code{\link{ExpectedGradient}},
\code{\link{Gradient}},
\code{\link{IntegratedGradient}},
\code{\link{LIME}},
\code{\link{LRP}},
\code{\link{SHAP}},
\code{\link{SmoothGrad}}
}
\concept{methods}
\section{Super class}{
\code{\link[innsight:InterpretingMethod]{innsight::InterpretingMethod}} -> \code{DeepSHAP}
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{rule_name}}{(\code{character(1)})\cr
Name of the applied rule to calculate the contributions.
Either \code{'rescale'} or \code{'reveal_cancel'}.\cr}

\item{\code{data_ref}}{(\code{list})\cr
The passed reference dataset for estimating the conditional expectation
as a \code{list} of \code{torch_tensors} in the selected
data format (field \code{dtype}) matching the corresponding shapes of the
individual input layers. Besides, the channel axis is moved to the
second position after the batch size because internally only the
format \emph{channels first} is used.\cr}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-DeepSHAP-new}{\code{DeepSHAP$new()}}
\item \href{#method-DeepSHAP-clone}{\code{DeepSHAP$clone()}}
}
}
\if{html}{\out{
<details open><summary>Inherited methods</summary>
<ul>
<li><span class="pkg-link" data-pkg="innsight" data-topic="InterpretingMethod" data-id="get_result"><a href='../../innsight/html/InterpretingMethod.html#method-InterpretingMethod-get_result'><code>innsight::InterpretingMethod$get_result()</code></a></span></li>
<li><span class="pkg-link" data-pkg="innsight" data-topic="InterpretingMethod" data-id="plot"><a href='../../innsight/html/InterpretingMethod.html#method-InterpretingMethod-plot'><code>innsight::InterpretingMethod$plot()</code></a></span></li>
<li><span class="pkg-link" data-pkg="innsight" data-topic="InterpretingMethod" data-id="plot_global"><a href='../../innsight/html/InterpretingMethod.html#method-InterpretingMethod-plot_global'><code>innsight::InterpretingMethod$plot_global()</code></a></span></li>
<li><span class="pkg-link" data-pkg="innsight" data-topic="InterpretingMethod" data-id="print"><a href='../../innsight/html/InterpretingMethod.html#method-InterpretingMethod-print'><code>innsight::InterpretingMethod$print()</code></a></span></li>
</ul>
</details>
}}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-DeepSHAP-new"></a>}}
\if{latex}{\out{\hypertarget{method-DeepSHAP-new}{}}}
\subsection{Method \code{new()}}{
Create a new instance of the \code{DeepSHAP} R6 class. When initialized,
the method \emph{DeepSHAP} is applied to the given data and the results are
stored in the field \code{result}.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{DeepSHAP$new(
  converter,
  data,
  channels_first = TRUE,
  output_idx = NULL,
  output_label = NULL,
  ignore_last_act = TRUE,
  rule_name = "rescale",
  data_ref = NULL,
  limit_ref = 100,
  winner_takes_all = TRUE,
  verbose = interactive(),
  dtype = "float"
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{converter}}{(\code{\link{Converter}})\cr
An instance of the \code{Converter} class that includes the
torch-converted model and some other model-specific attributes. See
\code{\link{Converter}} for details.\cr}

\item{\code{data}}{(\code{\link{array}}, \code{\link{data.frame}}, \code{\link{torch_tensor}} or \code{list})\cr
The data to which the method is to be applied. These must
have the same format as the input data of the passed model to the
converter object. This means either
\itemize{
\item an \code{array}, \code{data.frame}, \code{torch_tensor} or array-like format of
size \emph{(batch_size, dim_in)}, if e.g., the model has only one input layer, or
\item a \code{list} with the corresponding input data (according to the
upper point) for each of the input layers.\cr
}}

\item{\code{channels_first}}{(\code{logical(1)})\cr
The channel position of the given data (argument
\code{data}). If \code{TRUE}, the channel axis is placed at the second position
between the batch size and the rest of the input axes, e.g.,
\code{c(10,3,32,32)} for a batch of ten images with three channels and a
height and width of 32 pixels. Otherwise (\code{FALSE}), the channel axis
is at the last position, i.e., \code{c(10,32,32,3)}. If the data
has no channel axis, use the default value \code{TRUE}.\cr}

\item{\code{output_idx}}{(\code{integer}, \code{list} or \code{NULL})\cr
These indices specify the output nodes for which
the method is to be applied. In order to allow models with multiple
output layers, there are the following possibilities to select
the indices of the output nodes in the individual output layers:
\itemize{
\item An \code{integer} vector of indices: If the model has only one output
layer, the values correspond to the indices of the output nodes, e.g.,
\code{c(1,3,4)} for the first, third and fourth output node. If there are
multiple output layers, the indices of the output nodes from the first
output layer are considered.
\item A \code{list} of \code{integer} vectors of indices: If the method is to be
applied to output nodes from different layers, a list can be passed
that specifies the desired indices of the output nodes for each
output layer. Unwanted output layers have the entry \code{NULL} instead of
a vector of indices, e.g., \code{list(NULL, c(1,3))} for the first and
third output node in the second output layer.
\item \code{NULL} (default): The method is applied to all output nodes in
the first output layer but is limited to the first ten as the
calculations become more computationally expensive for more output
nodes.\cr
}}

\item{\code{output_label}}{(\code{character}, \code{factor}, \code{list} or \code{NULL})\cr
These values specify the output nodes for which
the method is to be applied. Only values that were previously passed with
the argument \code{output_names} in the \code{converter} can be used. In order to
allow models with multiple
output layers, there are the following possibilities to select
the names of the output nodes in the individual output layers:
\itemize{
\item A \code{character} vector or \code{factor} of labels: If the model has only one output
layer, the values correspond to the labels of the output nodes named in the
passed \code{Converter} object, e.g.,
\code{c("a", "c", "d")} for the first, third and fourth output node if the
output names are \code{c("a", "b", "c", "d")}. If there are
multiple output layers, the names of the output nodes from the first
output layer are considered.
\item A \code{list} of \code{charactor}/\code{factor} vectors of labels: If the method is to be
applied to output nodes from different layers, a list can be passed
that specifies the desired labels of the output nodes for each
output layer. Unwanted output layers have the entry \code{NULL} instead of
a vector of labels, e.g., \code{list(NULL, c("a", "c"))} for the first and
third output node in the second output layer.
\item \code{NULL} (default): The method is applied to all output nodes in
the first output layer but is limited to the first ten as the
calculations become more computationally expensive for more output
nodes.\cr
}}

\item{\code{ignore_last_act}}{(\code{logical(1)})\cr
Set this logical value to include the last
activation functions for each output layer, or not (default: \code{TRUE}).
In practice, the last activation (especially for softmax activation) is
often omitted.\cr}

\item{\code{rule_name}}{(\code{character(1)})\cr
Name of the applied rule to calculate the
contributions. Use either \code{'rescale'} or \code{'reveal_cancel'}. \cr}

\item{\code{data_ref}}{(\code{\link{array}}, \code{\link{data.frame}}, \code{\link{torch_tensor}} or \code{list})\cr
The reference data which is used to estimate the conditional expectation.
These must have the same format as the input data of the passed model to
the converter object. This means either
\itemize{
\item an \code{array}, \code{data.frame}, \code{torch_tensor} or array-like format of
size \emph{(batch_size, dim_in)}, if e.g., the model has only one input layer, or
\item a \code{list} with the corresponding input data (according to the
upper point) for each of the input layers.
\item or \code{NULL} (default) to use only a zero baseline for the estimation.\cr
}}

\item{\code{limit_ref}}{(\code{integer(1)})\cr
This argument limits the number of instances taken from the reference
dataset \code{data_ref} so that only random \code{limit_ref} elements and not
the entire dataset are used to estimate the conditional expectation.
A too-large number can significantly increase the computation time.\cr}

\item{\code{winner_takes_all}}{(\code{logical(1)})\cr
This logical argument is only relevant for MaxPooling
layers and is otherwise ignored. With this layer type, it is possible that
the position of the maximum values in the pooling kernel of the normal input
\eqn{x} and the reference input \eqn{x'} may not match, which leads to a
violation of the summation-to-delta property. To overcome this problem,
another variant is implemented, which treats a MaxPooling layer as an
AveragePooling layer in the backward pass only, leading to an uniform
distribution of the upper-layer contribution to the lower layer.\cr}

\item{\code{verbose}}{(\code{logical(1)})\cr
This logical argument determines whether a progress bar is
displayed for the calculation of the method or not. The default value is
the output of the primitive R function \code{\link[=interactive]{interactive()}}.\cr}

\item{\code{dtype}}{(\code{character(1)})\cr
The data type for the calculations. Use
either \code{'float'} for \link{torch_float} or \code{'double'} for
\link{torch_double}.\cr}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-DeepSHAP-clone"></a>}}
\if{latex}{\out{\hypertarget{method-DeepSHAP-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{DeepSHAP$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
