% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/methods.r
\docType{methods}
\name{mlg.filter}
\alias{mlg.filter}
\alias{mlg.filter,genclone-method}
\alias{mlg.filter,genind-method}
\alias{mlg.filter,genlight-method}
\alias{mlg.filter,snpclone-method}
\alias{mlg.filter<-}
\alias{mlg.filter<-,genclone-method}
\alias{mlg.filter<-,genind-method}
\alias{mlg.filter<-,genlight-method}
\alias{mlg.filter<-,snpclone-method}
\title{Statistics on Clonal Filtering of Genotype Data}
\usage{
mlg.filter(pop, threshold = 0, missing = "asis", memory = FALSE,
  algorithm = "farthest_neighbor", distance = "nei.dist", threads = 0,
  stats = "MLGs", ...)

mlg.filter(pop, missing = "asis", memory = FALSE,
  algorithm = "farthest_neighbor", distance = "nei.dist", threads = 0,
  ...) <- value
}
\arguments{
\item{pop}{a \code{\linkS4class{genind}} or \code{\linkS4class{genclone}}
object.}

\item{threshold}{the desired minimum distance between distinct genotypes.
Defaults to 0, which will only merge identical genotypes}

\item{missing}{any method to be used by \code{\link{missingno}}: "mean",
"zero", "loci", "genotype", or "asis" (default).}

\item{memory}{whether this function should remember the last distance matrix
it generated. TRUE will attempt to reuse the last distance matrix if the
other parameters are the same. (default) FALSE will ignore any stored
matrices and not store any it generates.}

\item{algorithm}{determines the type of clustering to be done. (default)
"farthest_neighbor" merges clusters based on the maximum distance between
points in either cluster. This is the strictest of the three.
"nearest_neighbor" merges clusters based on the minimum distance between
points in either cluster. This is the loosest of the three.
"average_neighbor" merges clusters based on the average distance between
every pair of points between clusters.}

\item{distance}{a character or function defining the distance to be applied
to pop. Defaults to \code{\link{nei.dist}} for genclone objects and
\code{\link{bitwise.dist}} for snpclone objects. A matrix or table
containing distances between individuals (such as the output of
\code{\link{nei.dist}}) is also accepted for this parameter.}

\item{threads}{The maximum number of parallel threads to be used within this
function. A value of 0 (default) will attempt to use as many threads as
there are available cores/CPUs. In most cases this is ideal. A value of 1
will force the function to run serially, which may increase stability on
some systems. Other values may be specified, but should be used with
caution.}

\item{stats}{determines which statistics this function should return on
cluster mergers. If (default) "MLGs", this function will return a vector of
cluster assignments, similar to that of \code{\link{mlg.vector}}. If
"thresholds", the threshold at which each cluster was merged will be
returned instead of the cluster assignment. "distances" will return a
distance matrix of the new distances between each new cluster. If "sizes",
the size of each remaining cluster will be returned. Finally, "all" will
return a list of all 4.}

\item{...}{any parameters to be passed off to the distance method.}

\item{value}{the threshold at which genotypes should be collapsed.}
}
\value{
Default, the collapsed multilocus genotypes. Otherwise, any
  combination of the following:
\subsection{MLGs}{
  a numeric vector naming the multilocus genotype of each individual in the
  dataset. Each genotype is at least the specified distance apart, as
  calculated by the selected algorithm. If stats is set to \code{TRUE}, this
  function will return the thresholds had which each cluster merger occurred
  instead of the new cluster assignments.
}
\subsection{THRESHOLDS}{
  A numeric vector representing the thresholds beyond which clusters of
  multilocus genotypes were collapsed.
}
\subsection{DISTANCES}{
  A square matrix representing the distances between each cluster.
}
\subsection{SIZES}{
 The sizes of the multilocus genotype clusters in order.
}
}
\description{
Create a vector of multilocus genotype indices filtered by minimum distance.
}
\details{
This function will take in any distance matrix or function and
collapse multilocus genotypes below a given threshold. If you use this
function as the assignment method (mlg.filter(myData, distance = myDist) <-
0.5), the distance function or matrix will be remembered by the object. This
means that if you define your own distance matrix or function, you must keep
it in memory to further utilize mlg.filter.
}
\note{
\code{mlg.vector} makes use of \code{mlg.vector} grouping prior to
  applying the given threshold. Genotype numbers returned by
  \code{mlg.vector} represent the lowest numbered genotype (as returned by
  \code{mlg.vector}) in in each new multilocus genotype. Therefore
  \code{mlg.vector} and \code{mlg.vector} return the same vector when
  threshold is set to 0 or less.
}
\examples{
data(partial_clone)
pc <- as.genclone(partial_clone) # convert to genclone object

# Get MLGs at threshold 0.05
mlg.filter(pc, threshold = 0.05, distance = "nei.dist")
pc # 26 mlgs

# Set MLGs at threshold 0.05
mlg.filter(pc, distance = "nei.dist") <- 0.05
pc # 25 mlgs

\dontrun{
# The distance definition is persistant
mlg.filter(pc) <- 0.1
pc # 24 mlgs

# You can still change the definition
mlg.filter(pc, distance = diss.dist, percent = TRUE) <- 0.1
pc

# Even with custom definitions
data(Pinf)
Pinf
mlg.filter(Pinf, distance = function(x) dist(tab(x))) <- 3
Pinf
mlg.filter(Pinf) <- 4
Pinf

# on genlight/snpclone objects
set.seed(999)
gc <- as.snpclone(glSim(100, 0, n.snp.struc = 1e3, ploidy = 2))
gc # 100 mlgs
mlg.filter(gc) <- 0.25
gc # 82 mlgs
}
}

