% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/collect.R
\name{collect_predictions}
\alias{collect_predictions}
\alias{collect_predictions.default}
\alias{collect_predictions.tune_results}
\alias{collect_metrics}
\alias{collect_metrics.tune_results}
\alias{collect_notes}
\alias{collect_notes.tune_results}
\alias{collect_extracts}
\alias{collect_extracts.tune_results}
\title{Obtain and format results produced by tuning functions}
\usage{
collect_predictions(x, ...)

\method{collect_predictions}{default}(x, ...)

\method{collect_predictions}{tune_results}(x, ..., summarize = FALSE, parameters = NULL)

collect_metrics(x, ...)

\method{collect_metrics}{tune_results}(x, ..., summarize = TRUE, type = c("long", "wide"))

collect_notes(x, ...)

\method{collect_notes}{tune_results}(x, ...)

collect_extracts(x, ...)

\method{collect_extracts}{tune_results}(x, ...)
}
\arguments{
\item{x}{The results of \code{\link[=tune_grid]{tune_grid()}}, \code{\link[=tune_bayes]{tune_bayes()}}, \code{\link[=fit_resamples]{fit_resamples()}},
or \code{\link[=last_fit]{last_fit()}}. For \code{\link[=collect_predictions]{collect_predictions()}}, the control option \code{save_pred = TRUE} should have been used.}

\item{...}{Not currently used.}

\item{summarize}{A logical; should metrics be summarized over resamples
(\code{TRUE}) or return the values for each individual resample. Note that, if \code{x}
is created by \code{\link[=last_fit]{last_fit()}}, \code{summarize} has no effect. For the other object
types, the method of summarizing predictions is detailed below.}

\item{parameters}{An optional tibble of tuning parameter values that can be
used to filter the predicted values before processing. This tibble should
only have columns for each tuning parameter identifier (e.g. \code{"my_param"}
if \code{tune("my_param")} was used).}

\item{type}{One of \code{"long"} (the default) or \code{"wide"}. When \code{type = "long"},
output has columns \code{.metric} and one of \code{.estimate} or \code{mean}.
\code{.estimate}/\code{mean} gives the values for the \code{.metric}. When \code{type = "wide"},
each metric has its own column and the \code{n} and \code{std_err} columns are removed,
if they exist.}
}
\value{
A tibble. The column names depend on the results and the mode of the
model.

For \code{\link[=collect_metrics]{collect_metrics()}} and \code{\link[=collect_predictions]{collect_predictions()}}, when unsummarized,
there are columns for each tuning parameter (using the \code{id} from \code{\link[=tune]{tune()}},
if any).

\code{\link[=collect_metrics]{collect_metrics()}} also has columns \code{.metric}, and \code{.estimator} by default.
For \code{\link[=collect_metrics]{collect_metrics()}} methods that have a \code{type} argument, supplying
\code{type = "wide"} will pivot the output such that each metric has its own
column. When the results are summarized, there are columns for \code{mean}, \code{n},
and \code{std_err}. When not summarized, the additional columns for the resampling
identifier(s) and \code{.estimate}.

For \code{\link[=collect_predictions]{collect_predictions()}}, there are additional columns for the resampling
identifier(s), columns for the predicted values (e.g., \code{.pred},
\code{.pred_class}, etc.), and a column for the outcome(s) using the original
column name(s) in the data.

\code{\link[=collect_predictions]{collect_predictions()}} can summarize the various results over
replicate out-of-sample predictions. For example, when using the bootstrap,
each row in the original training set has multiple holdout predictions
(across assessment sets). To convert these results to a format where every
training set same has a single predicted value, the results are averaged
over replicate predictions.

For regression cases, the numeric predictions are simply averaged.

For classification models, the problem is more complex. When class probabilities
are used, these are averaged and then re-normalized to make sure that they
add to one. If hard class predictions also exist in the data, then these are
determined from the summarized probability estimates (so that they match).
If only hard class predictions are in the results, then the mode is used to
summarize.

With censored outcome models, the predicted survival probabilities (if any)
are averaged while the static predicted event times are summarized using the
median.

\code{\link[=collect_notes]{collect_notes()}} returns a tibble with columns for the resampling
indicators, the location (preprocessor, model, etc.), type (error or warning),
and the notes.

\code{\link[=collect_extracts]{collect_extracts()}} collects objects extracted from fitted workflows
via the \code{extract} argument to \link[=control_grid]{control functions}. The
function returns a tibble with columns for the resampling
indicators, the location (preprocessor, model, etc.), and extracted objects.
}
\description{
Obtain and format results produced by tuning functions
}
\section{Hyperparameters and extracted objects}{


When making use of submodels, tune can generate predictions and calculate
metrics for multiple model \code{.config}urations using only one model fit.
However, this means that if a function was supplied to a
\link[=control_grid]{control function's} \code{extract} argument, tune can only
execute that extraction on the one model that was fitted. As a result,
in the \code{collect_extracts()} output, tune opts to associate the
extracted objects with the hyperparameter combination used to
fit that one model workflow, rather than the hyperparameter
combination of a submodel. In the output, this appears like
a hyperparameter entry is recycled across many \code{.config}
entries---this is intentional.

See \url{https://parsnip.tidymodels.org/articles/Submodels.html} to learn
more about submodels.
}

\examples{
\dontshow{if (tune:::should_run_examples(suggests = "kknn")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
data("example_ames_knn")
# The parameters for the model:
extract_parameter_set_dials(ames_wflow)

# Summarized over resamples
collect_metrics(ames_grid_search)

# Per-resample values
collect_metrics(ames_grid_search, summarize = FALSE)


# ---------------------------------------------------------------------------

library(parsnip)
library(rsample)
library(dplyr)
library(recipes)
library(tibble)

lm_mod <- linear_reg() \%>\% set_engine("lm")
set.seed(93599150)
car_folds <- vfold_cv(mtcars, v = 2, repeats = 3)
ctrl <- control_resamples(save_pred = TRUE, extract = extract_fit_engine)

spline_rec <-
  recipe(mpg ~ ., data = mtcars) \%>\%
  step_ns(disp, deg_free = tune("df"))

grid <- tibble(df = 3:6)

resampled <-
  lm_mod \%>\%
  tune_grid(spline_rec, resamples = car_folds, control = ctrl, grid = grid)

collect_predictions(resampled) \%>\% arrange(.row)
collect_predictions(resampled, summarize = TRUE) \%>\% arrange(.row)
collect_predictions(
  resampled,
  summarize = TRUE,
  parameters = grid[1, ]
) \%>\% arrange(.row)

collect_extracts(resampled)
\dontshow{\}) # examplesIf}
}
