% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.R
\name{simplify_string}
\alias{simplify_string}
\title{Simplifies strings for analysis}
\usage{
simplify_string(
  x,
  alpha = TRUE,
  digits = FALSE,
  unaccent = TRUE,
  utf8_only = TRUE,
  case = "upper",
  trim = TRUE,
  stopwords = NA
)
}
\arguments{
\item{x}{A character vector.}

\item{alpha}{Should alphabetic characters be included in the cleaned up string? (Default: TRUE)}

\item{digits}{Should digits be included in the cleaned up string? (Default: FALSE)}

\item{unaccent}{Should characters be de-accented? (Default: TRUE)}

\item{utf8_only}{Should characters be UTF-8 only? (Default: TRUE)}

\item{case}{What casing should characters use? Can be one of 'upper', 'lower', 'sentence', 'title',
or 'keep' for the existing casing (Default: 'upper')}

\item{trim}{Should strings be trimmed of excess spaces? (Default: TRUE)}

\item{stopwords}{An optional vector of stop words to be removed.}
}
\value{
A character vector of simplified strings.
}
\description{
Takes a character vector and "simplifies" it by uppercasing, removing most non-alphabetic
(or alphanumeric) characters, removing accents, forcing UTF-8 encoding, removing excess spaces,
and optionally removing stop words. Useful in cases where you have two large vector of person
or business names you need to compare, but where misspellings may be common.
}
\examples{
simplify_string(c('J. Jonah Jameson', 'j jonah jameson',
  'j   jonah 123   jameson', 'J Jónah Jameson...'))
simplify_string(c('123 Business Inc.', '123 business incorporated',
  '123 ... Business ... Inc.'), digits = TRUE, stopwords = c('INC', 'INCORPORATED'))

}
