% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cv_class_base.R
\name{MLCrossValidation}
\alias{MLCrossValidation}
\title{R6 Class to perform cross-validation experiments}
\description{
The \code{MLCrossValidation} class is used to construct a cross validation object
and to perform a k-fold cross validation for a specified machine learning
algorithm using one distinct hyperparameter setting.
}
\details{
The \code{MLCrossValidation} class requires to provide a named list of predefined
row indices for the cross validation folds, e.g., created with the function
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}. This list also defines the \code{k} of the k-fold
cross-validation. When wanting to perform a repeated k-fold cross
validations, just provide a list with all repeated fold definitions, e.g.,
when specifying the argument \code{m_rep} of \code{\link[splitTools:create_folds]{splitTools::create_folds()}}.
}
\examples{
dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))

fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)

cv <- MLCrossValidation$new(
  learner = LearnerKnn$new(),
  fold_list = fold_list,
  seed = 123,
  ncores = 2
)

# learner parameters
cv$learner_args <- list(
  k = 20,
  l = 0,
  test = parse(text = "fold_test$x")
)

# performance parameters
cv$predict_args <- list(type = "response")
cv$performance_metric <- metric("bacc")

# set data
cv$set_data(
  x = data.matrix(dataset[, -7]),
  y = dataset[, 7]
)

cv$execute()


## ------------------------------------------------
## Method `MLCrossValidation$new`
## ------------------------------------------------

dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))
fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)
cv <- MLCrossValidation$new(
  learner = LearnerKnn$new(),
  fold_list = fold_list,
  seed = 123,
  ncores = 2
)


## ------------------------------------------------
## Method `MLCrossValidation$execute`
## ------------------------------------------------

dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))
fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)
cv <- MLCrossValidation$new(
  learner = LearnerKnn$new(),
  fold_list = fold_list,
  seed = 123,
  ncores = 2
)
cv$learner_args <- list(
  k = 20,
  l = 0,
  test = parse(text = "fold_test$x")
)
cv$predict_args <- list(type = "response")
cv$performance_metric <- metric("bacc")

# set data
cv$set_data(
  x = data.matrix(dataset[, -7]),
  y = dataset[, 7]
)

cv$execute()
}
\seealso{
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}

\code{\link[splitTools:create_folds]{splitTools::create_folds()}}, \link[mlr3measures:measures]{mlr3measures::measures},
\code{\link[=metric]{metric()}}
}
\section{Super classes}{
\code{\link[mlexperiments:MLBase]{mlexperiments::MLBase}} -> \code{\link[mlexperiments:MLExperimentsBase]{mlexperiments::MLExperimentsBase}} -> \code{MLCrossValidation}
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{fold_list}}{A named list of predefined row indices for the cross
validation folds, e.g., created with the function
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}.}

\item{\code{return_models}}{A logical. If the fitted models should be returned
with the results (default: \code{FALSE}).}

\item{\code{performance_metric}}{Either a named list with metric functions, a
single metric function, or a character vector with metric names from
the \code{mlr3measures} package. The provided functions must take two named
arguments: \code{ground_truth} and \code{predictions}. For metrics from the
\code{mlr3measures} package, the wrapper function \code{\link[=metric]{metric()}}
exists in order to prepare them for use with the \code{mlexperiments}
package.}

\item{\code{performance_metric_args}}{A list. Further arguments required to
compute the performance metric.}

\item{\code{predict_args}}{A list. Further arguments required to compute the
predictions.}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-MLCrossValidation-new}{\code{MLCrossValidation$new()}}
\item \href{#method-MLCrossValidation-execute}{\code{MLCrossValidation$execute()}}
\item \href{#method-MLCrossValidation-clone}{\code{MLCrossValidation$clone()}}
}
}
\if{html}{\out{
<details open><summary>Inherited methods</summary>
<ul>
<li><span class="pkg-link" data-pkg="mlexperiments" data-topic="MLExperimentsBase" data-id="set_data"><a href='../../mlexperiments/html/MLExperimentsBase.html#method-MLExperimentsBase-set_data'><code>mlexperiments::MLExperimentsBase$set_data()</code></a></span></li>
</ul>
</details>
}}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MLCrossValidation-new"></a>}}
\if{latex}{\out{\hypertarget{method-MLCrossValidation-new}{}}}
\subsection{Method \code{new()}}{
Create a new \code{MLCrossValidation} object.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MLCrossValidation$new(
  learner,
  fold_list,
  seed,
  ncores = -1L,
  return_models = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{learner}}{An initialized learner object that inherits from class
\code{"MLLearnerBase"}.}

\item{\code{fold_list}}{A named list of predefined row indices for the cross
validation folds, e.g., created with the function
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}.}

\item{\code{seed}}{An integer. Needs to be set for reproducibility purposes.}

\item{\code{ncores}}{An integer to specify the number of cores used for
parallelization (default: \code{-1L}).}

\item{\code{return_models}}{A logical. If the fitted models should be returned
with the results (default: \code{FALSE}).}
}
\if{html}{\out{</div>}}
}
\subsection{Details}{
The \code{MLCrossValidation} class requires to provide a named list of
predefined row indices for the cross validation folds, e.g., created
with the function \code{\link[splitTools:create_folds]{splitTools::create_folds()}}. This list also defines
the \code{k} of the k-fold cross-validation. When wanting to perform a
repeated k-fold cross validations, just provide a list with all
repeated fold definitions, e.g., when specifing the argument \code{m_rep} of
\code{\link[splitTools:create_folds]{splitTools::create_folds()}}.
}

\subsection{Examples}{
\if{html}{\out{<div class="r example copy">}}
\preformatted{dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))
fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)
cv <- MLCrossValidation$new(
  learner = LearnerKnn$new(),
  fold_list = fold_list,
  seed = 123,
  ncores = 2
)

}
\if{html}{\out{</div>}}

}

}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MLCrossValidation-execute"></a>}}
\if{latex}{\out{\hypertarget{method-MLCrossValidation-execute}{}}}
\subsection{Method \code{execute()}}{
Execute the cross validation.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MLCrossValidation$execute()}\if{html}{\out{</div>}}
}

\subsection{Details}{
All results of the cross validation are saved in the field
\verb{$results} of the \code{MLCrossValidation} class. After successful execution
of the cross validation, \verb{$results} contains a list with the items:
\itemize{
\item "fold" A list of folds containing the following items for each
cross validation fold:
\itemize{
\item "fold_ids" A vector with the utilized in-sample row indices.
\item "ground_truth" A vector with the ground truth.
\item "predictions" A vector with the predictions.
\item "learner.args" A list with the arguments provided to the learner.
\item "model" If \code{return_models = TRUE}, the fitted model.
}
\item "summary" A data.table with the summarized results (same as
the returned value of the \code{execute} method).
\item "performance" A list with the value of the performance metric
calculated for each of the cross validation folds.
}
}

\subsection{Returns}{
The function returns a data.table with the results of the cross
validation. More results are accessible from the field \verb{$results} of
the \code{MLCrossValidation} class.
}
\subsection{Examples}{
\if{html}{\out{<div class="r example copy">}}
\preformatted{dataset <- do.call(
  cbind,
  c(sapply(paste0("col", 1:6), function(x) {
    rnorm(n = 500)
    },
    USE.NAMES = TRUE,
    simplify = FALSE
   ),
   list(target = sample(0:1, 500, TRUE))
))
fold_list <- splitTools::create_folds(
  y = dataset[, 7],
  k = 3,
  type = "stratified",
  seed = 123
)
cv <- MLCrossValidation$new(
  learner = LearnerKnn$new(),
  fold_list = fold_list,
  seed = 123,
  ncores = 2
)
cv$learner_args <- list(
  k = 20,
  l = 0,
  test = parse(text = "fold_test$x")
)
cv$predict_args <- list(type = "response")
cv$performance_metric <- metric("bacc")

# set data
cv$set_data(
  x = data.matrix(dataset[, -7]),
  y = dataset[, 7]
)

cv$execute()
}
\if{html}{\out{</div>}}

}

}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-MLCrossValidation-clone"></a>}}
\if{latex}{\out{\hypertarget{method-MLCrossValidation-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{MLCrossValidation$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
