% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stat-correlation.R
\name{stat_correlation}
\alias{stat_correlation}
\title{Annotate plot with correlation test}
\usage{
stat_correlation(
  mapping = NULL,
  data = NULL,
  geom = "text_npc",
  position = "identity",
  ...,
  method = "pearson",
  n.min = 2L,
  alternative = "two.sided",
  exact = NULL,
  r.conf.level = 0.95,
  continuity = FALSE,
  small.r = FALSE,
  small.p = FALSE,
  coef.keep.zeros = TRUE,
  r.digits = 2,
  t.digits = 3,
  p.digits = 3,
  CI.brackets = c("[", "]"),
  label.x = "left",
  label.y = "top",
  hstep = 0,
  vstep = NULL,
  output.type = NULL,
  boot.R = ifelse(method == "pearson", 0, 999),
  na.rm = FALSE,
  parse = NULL,
  show.legend = FALSE,
  inherit.aes = TRUE
)
}
\arguments{
\item{mapping}{The aesthetic mapping, usually constructed with
\code{\link[ggplot2]{aes}}. Only needs to be
set at the layer level if you are overriding the plot defaults.}

\item{data}{A layer specific dataset, only needed if you want to override the
plot defaults.}

\item{geom}{The geometric object to use display the data}

\item{position}{The position adjustment to use for overlapping points on this
layer}

\item{...}{other arguments passed on to \code{\link[ggplot2]{layer}}. This
can include aesthetics whose values you want to set, not map. See
\code{\link[ggplot2]{layer}} for more details.}

\item{method}{character One of "pearson", "kendall" or "spearman".}

\item{n.min}{integer Minimum number of distinct values in the variables for
fitting to the attempted.}

\item{alternative}{character One of "two.sided", "less" or "greater".}

\item{exact}{logical Whether an exact p-value should be computed. Used for
Kendall's tau and Spearman's rho.}

\item{r.conf.level}{numeric Confidence level for the returned confidence
interval.}

\item{continuity}{logical If TRUE , a continuity correction is used for
Kendall's tau and Spearman's rho when not computed exactly.}

\item{small.r, small.p}{logical Flags to switch use of lower case r and p for
coefficient of correlation (only for \code{method = "pearson"}) and
p-value.}

\item{coef.keep.zeros}{logical Keep or drop trailing zeros when formatting
the correlation coefficients and t-value, z-value or S-value (see note
below).}

\item{r.digits, t.digits, p.digits}{integer Number of digits after the decimal
point to use for R, r.squared, tau or rho and P-value in labels.}

\item{CI.brackets}{character vector of length 2. The opening and closing
brackets used for the CI label.}

\item{label.x, label.y}{\code{numeric} with range 0..1 "normalized parent
coordinates" (npc units) or character if using \code{geom_text_npc()} or
\code{geom_label_npc()}. If using \code{geom_text()} or \code{geom_label()}
numeric in native data units. If too short they will be recycled.}

\item{hstep, vstep}{numeric in npc units, the horizontal and vertical step
used between labels for different groups.}

\item{output.type}{character One of "expression", "LaTeX", "text", "markdown"
or "numeric".}

\item{boot.R}{interger The number of bootstrap resamples. Set to zero for no
bootstrap estimates for the CI.}

\item{na.rm}{a logical indicating whether NA values should be stripped before
the computation proceeds.}

\item{parse}{logical Passed to the geom. If \code{TRUE}, the labels will be
parsed into expressions and displayed as described in \code{?plotmath}.
Default is \code{TRUE} if \code{output.type = "expression"} and
\code{FALSE} otherwise.}

\item{show.legend}{logical. Should this layer be included in the legends?
\code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}
never includes, and \code{TRUE} always includes.}

\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather
than combining with them. This is most useful for helper functions that
define both data and aesthetics and shouldn't inherit behaviour from the
default plot specification, e.g. \code{\link[ggplot2]{borders}}.}
}
\description{
\code{stat_correlation()} applies \code{stats::cor.test()}
  respecting grouping with \code{method = "pearson"} default but
  alternatively using \code{"kendall"} or \code{"spearman"} methods. It
  generates labels for correlation coefficients and p-value, coefficient of
  determination (R^2) for method "pearson" and number of observations.
}
\details{
This statistic can be used to annotate a plot with the correlation
  coefficient and the outcome of its test of significance. It supports
  Pearson, Kendall and Spearman methods to compute correlation. This
  statistic generates labels as R expressions by default but LaTeX (use TikZ
  device), markdown (use package 'ggtext') and plain text are also supported,
  as well as numeric values for user-generated text labels. The character
  labels include the symbol describing the quantity together with the numeric
  value. For the confidence interval (CI) the default is to follow the APA
  recommendation of using square brackets.

  The value of \code{parse} is set automatically based on \code{output-type},
  but if you assemble labels that need parsing from \code{numeric} output,
  the default needs to be overridden. By default the value of
  \code{output.type} is guessed from the name of the geometry.

  A ggplot statistic receives as \code{data} a data frame that is not the one
  passed as argument by the user, but instead a data frame with the variables
  mapped to aesthetics. \code{cor.test()} is always applied to the variables
  mapped to the \code{x} and \code{y} aesthetics, so the scales used for
  \code{x} and \code{y} should both be continuous scales rather than
  discrete.
}
\note{
Currently \code{coef.keep.zeros} is ignored, with trailing zeros always
  retained in the labels but not protected from being dropped by R when
  character strings are parsed into expressions.
}
\section{Aesthetics}{
 \code{stat_correaltion()} requires \code{x} and
  \code{y}. In addition, the aesthetics understood by the geom
  (\code{"text"} is the default) are understood and grouping respected.
}

\section{Computed variables}{
 If output.type is \code{"numeric"} the returned
  tibble contains the columns listed below with variations depending on the
  \code{method}. If the model fit function used does not return a value, the
  variable is set to \code{NA_real_}.
\describe{
  \item{x,npcx}{x position}
  \item{y,npcy}{y position}
  \item{r, and cor, tau or rho}{numeric values for correlation coefficient estimates}
  \item{t.value and its df, z.value or S.value }{numeric values for statistic estimates}
  \item{p.value, n}{numeric values.}
  \item{r.conf.level}{numeric value, as fraction of one.}
  \item{r.confint.low}{Confidence interval limit for \code{r}.}
  \item{r.confint.high}{Confidence interval limit for \code{r}.}
  \item{grp.label}{Set according to mapping in \code{aes}.}
  \item{method.label}{Set according \code{method} used.}
  \item{method, test}{character values}}

If output.type different from \code{"numeric"} the returned tibble contains
in addition to the columns listed above those listed below. If the numeric
value is missing the label is set to \code{character(0L)}.

\describe{
  \item{r.label, and cor.label, tau.label or rho.label}{Correlation coefficient as a character string.}
  \item{t.value.label, z.value.label or S.value.label}{t-value and degrees of freedom, z-value or S-value as a character string.}
  \item{p.value.label}{P-value for test against zero, as a character string.}
  \item{r.confint.label, and cor.conint.label, tau.confint.label or rho.confint.label}{Confidence interval for \code{r} (only with \code{method = "pearson"}).}
  \item{n.label}{Number of observations used in the fit, as a character string.}
  \item{grp.label}{Set according to mapping in \code{aes}, as a character string.}}

To explore the computed values returned for a given input we suggest the use
of \code{\link[gginnards]{geom_debug}} as shown in the last examples below.
}

\examples{
# generate artificial data
set.seed(4321)
x <- (1:100) / 10
y <- x + rnorm(length(x))
my.data <- data.frame(x = x,
                      y = y,
                      y.desc = - y,
                      group = c("A", "B"))

# by default only R is displayed
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation()

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(small.r = TRUE)

ggplot(my.data, aes(x, y.desc)) +
  geom_point() +
  stat_correlation(label.x = "right")

# non-default methods
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(method = "kendall")

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(method = "spearman")

# use_label() can map a user selected label
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(use_label("R2"))

# use_label() can assemble and map a combined label
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(use_label(c("R", "P", "n", "method")))

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(use_label(c("R", "R.CI")))

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(use_label(c("R", "R.CI")), boot.R = 999)

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(use_label(c("R", "R.CI")), method = "kendall")

ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(use_label(c("R", "R.CI")), method = "spearman")

# manually assemble and map a specific label using paste() and aes()
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(aes(label = paste(after_stat(r.label),
                                     after_stat(p.value.label),
                                     after_stat(n.label),
                                     sep = "*\", \"*")))

# manually format and map a specific label using sprintf() and aes()
ggplot(my.data, aes(x, y)) +
  geom_point() +
  stat_correlation(aes(label = sprintf("\%s*\" with \"*\%s*\" for \"*\%s",
                                       after_stat(r.label),
                                       after_stat(p.value.label),
                                       after_stat(t.value.label))))

# Inspecting the returned data using geom_debug()
# This provides a quick way of finding out the names of the variables that
# are available for mapping to aesthetics with after_stat().

gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)

if (gginnards.installed)
  library(gginnards)

# the whole of computed data
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug", method = "pearson")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug", method = "kendall")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug", method = "spearman")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug", output.type = "numeric")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug", output.type = "markdown")

if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_point() +
    stat_correlation(geom = "debug", output.type = "LaTeX")

}
\seealso{
\code{\link[stats]{cor.test}} for details on the computations.
}
\concept{ggplot statistics for correlation.}
