% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/naive.R
\name{mmd}
\alias{mmd}
\title{Naive computation for Maximum Mean Discrepancy}
\usage{
mmd(
  X,
  Y,
  beta = -0.1,
  pval = TRUE,
  kernel = c("Laplacian", "Gaussian"),
  numperm = 200,
  seednum = 0,
  alternative = c("greater", "two.sided"),
  allowzeropval = FALSE
)
}
\arguments{
\item{X}{Matrix (or vector) of observations in first sample.}

\item{Y}{Matrix (or vector) of observations in second sample.}

\item{beta}{kernel parameter. Must be positive; if not, computes
median heuristic in quadratic time. Default value
is \code{-0.1}, which will force median heuristic to be used.}

\item{pval}{Boolean for whether to compute p-value or not.}

\item{kernel}{String, either \code{"Laplacian"} or \code{"Gaussian"}.
Default is \code{"Laplacian"}.}

\item{numperm}{Number of permutations. Default is \code{200}.}

\item{seednum}{Seed number for generating permutations. Default is \code{0},
which means seed is set randomly. For values larger than
\code{0}, results will be reproducible.}

\item{alternative}{A character string specifying the alternative hypothesis,
which must be either \code{"greater"} (default) or
\code{"two.sided"}. In Gretton et al., the
MMD test statistic is specified so that if it is
significantly larger than zero, then the null hypothesis
that the two samples come from the same distribution
should be rejected. For this reason, \code{"greater"}
is recommended. The test will still work
in many cases with \code{"two.sided"} specified, but this
could lead to problems in certain cases.}

\item{allowzeropval}{A boolean, specifying whether we will allow zero
p-values or not. Default is \code{FALSE}; then
a threshold of \code{0.5 / (numperm+1)} is used,
and if the computed p-value is less than this
threshold, it is then set to be this value.
this avoids the possibility of zero p-values.}
}
\value{
A list with the following elements:
\describe{
\item{\code{pval}}{The p-value of the test, if it is
computed (\code{pval=TRUE}). }
\item{\code{stat}}{The statistic of the test, which
is always computed. }
\item{\code{beta}}{The kernel parameter used in the test.
If \code{beta} was not initialised or
negative, this will be the median heuristic
value.}
}
}
\description{
Computes maximum mean discrepancy statistics with Laplacian
or Gaussian kernel.
Suitable for multivariate data. Naive approach, quadratic in number
of observations.
}
\details{
First checks number of columns (dimension) are equal.
Suppose matrix \eqn{X} has \eqn{n} rows and \eqn{d} columns,
and matrix \eqn{Y} has \eqn{m} rows; checks that \eqn{Y}
has \eqn{d} columns (if not, then throws error).
Then flattens matrices to vectors (or, if \eqn{d=1}, they are
already vectors.
Then calls C++ method. If the first sample has \eqn{n}
\eqn{d}-dimensional samples and the second sample has
\eqn{m} \eqn{d}-dimensional samples, then the algorithm
computes the statistic in \eqn{O((n+m)^2)} time.

Median difference is as follows:

\deqn{ m = \textnormal{median} \{ || x_i - x_j ||_1; \,\, i>j, \,\, 
        i=1, 2,\dots, n+m,\,\,\textnormal{ and } j=1, 2,\dots, i-1 \}, }

where \eqn{ || x_i - x_j ||_1} is the 1-norm, and so if the data
are \eqn{d}-dimensional then

\deqn{ || x_i - x_j ||_1 = \sum_{k=1}^{d} |x_{i,k} - x_{j,k}|, }

and finally median heuristic is \code{beta = 1/m}.
This can be computed in \eqn{O( (n+m)^2 )} time.

The Laplacian kernel \eqn{k} is defined as

\deqn{ k(x,y) = \exp( -\beta || x_i - x_j ||_1 ). }

Random seed is set for \code{std::mt19937} and \code{std::shuffle} in C++.
}
\examples{

X <- matrix(c(1:12), ncol=2, byrow=TRUE)
Y <- matrix(c(13:20), ncol=2, byrow=TRUE)
mmdList <- mmd(X=X, Y=Y, beta=0.1, pval=FALSE)

#using median heuristic
mmdList <- mmd(X=X, Y=Y, pval=FALSE)

#using median heuristic and computing p-value
mmdList <- mmd(X=X, Y=Y)
\donttest{
#using median heuristic and computing p-value
#using 1000 permutations and seed 1 for reproducibility.
mmdList <- mmd(X=X, Y=Y, numperm=1000, seednum=1)
}

}
\references{
Gretton, A., Borgwardt, K. M., Rasch M. J., Schölkopf, B. and Smola, A.
(2012) "A kernel two-sample test." The Journal of Machine Learning Research
13, no. 1, 723-773.
}
