% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dt_stringdist_join.R
\name{fuzzystring_join}
\alias{fuzzystring_join}
\alias{fuzzystring_inner_join}
\alias{fuzzystring_left_join}
\alias{fuzzystring_right_join}
\alias{fuzzystring_full_join}
\alias{fuzzystring_semi_join}
\alias{fuzzystring_anti_join}
\title{Join two tables based on fuzzy string matching}
\usage{
fuzzystring_join(
  x,
  y,
  by = NULL,
  max_dist = 2,
  method = c("osa", "lv", "dl", "hamming", "lcs", "qgram", "cosine", "jaccard", "jw",
    "soundex"),
  mode = "inner",
  ignore_case = FALSE,
  distance_col = NULL,
  ...
)

fuzzystring_inner_join(x, y, by = NULL, distance_col = NULL, ...)

fuzzystring_left_join(x, y, by = NULL, distance_col = NULL, ...)

fuzzystring_right_join(x, y, by = NULL, distance_col = NULL, ...)

fuzzystring_full_join(x, y, by = NULL, distance_col = NULL, ...)

fuzzystring_semi_join(x, y, by = NULL, distance_col = NULL, ...)

fuzzystring_anti_join(x, y, by = NULL, distance_col = NULL, ...)
}
\arguments{
\item{x}{A \code{data.frame} or \code{data.table}.}

\item{y}{A \code{data.frame} or \code{data.table}.}

\item{by}{Columns by which to join the two tables. You can supply a character
vector of common names (e.g. \code{c("name")} ), or a named vector mapping
\code{x} to \code{y} (e.g. \code{c(name = "approx_name")}).}

\item{max_dist}{Maximum distance to use for joining. Smaller values are stricter.}

\item{method}{Method for computing string distance, see
\code{?stringdist::stringdist} and the \code{stringdist} package vignettes.}

\item{mode}{One of \code{"inner"}, \code{"left"}, \code{"right"}, \code{"full"},
\code{"semi"}, or \code{"anti"}.}

\item{ignore_case}{Logical; if \code{TRUE}, comparisons are case-insensitive.}

\item{distance_col}{If not \code{NULL}, adds a column with this name containing
the computed distance for each matched pair (or \code{NA} for unmatched rows
in outer joins).}

\item{...}{Additional arguments passed to \code{\link[stringdist]{stringdist}}.}
}
\value{
A joined table (same container type as \code{x}). See
\code{\link{fuzzystring_join_backend}} for details on output structure.
}
\description{
Uses \code{stringdist::stringdist()} to compute distances and a data.table-based
backend to assemble the final result. This is the main user-facing entry point
for fuzzy joins on strings.
}
\details{
If \code{method = "soundex"}, \code{max_dist} is automatically set to 0.5,
since Soundex distance is 0 (match) or 1 (no match).

For Levenshtein-like methods (\code{"osa"}, \code{"lv"}, \code{"dl"}), a fast
prefilter is applied: if \code{abs(nchar(v1) - nchar(v2)) > max_dist}, the pair
cannot match, so distance is not computed for that pair.
}
\examples{
\donttest{
if (requireNamespace("ggplot2", quietly = TRUE)) {
  d <- data.table::data.table(approximate_name = c("Idea", "Premiom"))
  # Match diamonds$cut to d$approximate_name
  res <- fuzzystring_inner_join(ggplot2::diamonds, d,
    by = c(cut = "approximate_name"),
    max_dist = 1
  )
  head(res)
}
}

}
