% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compare.R
\name{stri_compare}
\alias{stri_compare}
\alias{stri_cmp}
\alias{stri_cmp_eq}
\alias{stri_cmp_neq}
\alias{stri_cmp_equiv}
\alias{stri_cmp_nequiv}
\alias{stri_cmp_lt}
\alias{stri_cmp_gt}
\alias{stri_cmp_le}
\alias{stri_cmp_ge}
\title{Compare Strings with or without Collation}
\usage{
stri_compare(e1, e2, ..., opts_collator = NULL)

stri_cmp(e1, e2, ..., opts_collator = NULL)

stri_cmp_eq(e1, e2)

stri_cmp_neq(e1, e2)

stri_cmp_equiv(e1, e2, ..., opts_collator = NULL)

stri_cmp_nequiv(e1, e2, ..., opts_collator = NULL)

stri_cmp_lt(e1, e2, ..., opts_collator = NULL)

stri_cmp_gt(e1, e2, ..., opts_collator = NULL)

stri_cmp_le(e1, e2, ..., opts_collator = NULL)

stri_cmp_ge(e1, e2, ..., opts_collator = NULL)
}
\arguments{
\item{e1, e2}{character vectors or objects coercible to character vectors}

\item{...}{additional settings for \code{opts_collator}}

\item{opts_collator}{a named list with \pkg{ICU} Collator's options,
see \code{\link{stri_opts_collator}}, \code{NULL}
for the default collation options.}
}
\value{
The \code{stri_cmp} and \code{stri_compare} functions
return an integer vector representing the comparison results:
\code{-1} if \code{e1[...] < e2[...]},
\code{0} if they are canonically equivalent, and \code{1} if greater.

All the other functions return a logical vector that indicates
whether a given relation holds between two corresponding elements
in \code{e1} and \code{e2}.
}
\description{
These functions may be used to determine if two strings
are equal, canonically equivalent (this is performed in a much more clever
fashion than when testing for equality), or to check whether they are in
a specific lexicographic order.
}
\details{
All the functions listed here are vectorized over \code{e1} and \code{e2}.

\code{stri_cmp_eq} tests whether two corresponding strings
consist of exactly the same code points, while \code{stri_cmp_neq} allows
to check whether there is any difference between them. These are
locale-independent operations: for natural language processing,
where the notion of canonical equivalence is more valid, this might
not be exactly what you are looking for, see Examples.
Please note that \pkg{stringi} always silently removes UTF-8
BOMs from input strings, therefore, e.g., \code{stri_cmp_eq} does not take
BOMs into account while comparing strings.

\code{stri_cmp_equiv} tests for canonical equivalence of two strings
and is locale-dependent. Additionally, the \pkg{ICU}'s Collator may be
tuned up so that, e.g., the comparison is case-insensitive.
To test whether two strings are not canonically equivalent,
call \code{stri_cmp_nequiv}.

\code{stri_cmp_le} tests whether
the elements in the first vector are less than or equal to
the corresponding elements in the second vector,
\code{stri_cmp_ge} tests whether they are greater or equal,
\code{stri_cmp_lt} if less, and \code{stri_cmp_gt} if greater,
see also, e.g., \code{\link{\%s<\%}}.

\code{stri_compare} is an alias to \code{stri_cmp}. They both
perform exactly the same locale-dependent operation.
Both functions provide a C library's \code{strcmp()} look-and-feel,
see Value for details.


For more information on \pkg{ICU}'s Collator and how to tune its settings
refer to \code{\link{stri_opts_collator}}.
Note that different locale settings may lead to different results
(see the examples below).
}
\examples{
# in Polish, ch < h:
stri_cmp_lt('hladny', 'chladny', locale='pl_PL')

# in Slovak, ch > h:
stri_cmp_lt('hladny', 'chladny', locale='sk_SK')

# < or > (depends on locale):
stri_cmp('hladny', 'chladny')

# ignore case differences:
stri_cmp_equiv('hladny', 'HLADNY', strength=2)

# also ignore diacritical differences:
stri_cmp_equiv('hladn\u00FD', 'hladny', strength=1, locale='sk_SK')

marios <- c('Mario', 'mario', 'M\\\\u00e1rio', 'm\\\\u00e1rio')
stri_cmp_equiv(marios, 'mario', case_level=TRUE, strength=2L)
stri_cmp_equiv(marios, 'mario', case_level=TRUE, strength=1L)
stri_cmp_equiv(marios, 'mario', strength=1L)
stri_cmp_equiv(marios, 'mario', strength=2L)

# non-Unicode-normalized vs normalized string:
stri_cmp_equiv(stri_trans_nfkd('\u0105'), '\u105')

# note the difference:
stri_cmp_eq(stri_trans_nfkd('\u0105'), '\u105')

# ligatures:
stri_cmp_equiv('\ufb00', 'ff', strength=2)

# phonebook collation
stri_cmp_equiv('G\u00e4rtner', 'Gaertner', locale='de_DE@collation=phonebook', strength=1L)
stri_cmp_equiv('G\u00e4rtner', 'Gaertner', locale='de_DE', strength=1L)

}
\references{
\emph{Collation} -- ICU User Guide,
\url{https://unicode-org.github.io/icu/userguide/collation/}
}
\seealso{
The official online manual of \pkg{stringi} at \url{https://stringi.gagolewski.com/}

Gagolewski M., \pkg{stringi}: Fast and portable character string processing in R, \emph{Journal of Statistical Software} 103(2), 2022, 1-59, \doi{10.18637/jss.v103.i02}

Other locale_sensitive: 
\code{\link{\%s<\%}()},
\code{\link{about_locale}},
\code{\link{about_search_boundaries}},
\code{\link{about_search_coll}},
\code{\link{stri_count_boundaries}()},
\code{\link{stri_duplicated}()},
\code{\link{stri_enc_detect2}()},
\code{\link{stri_extract_all_boundaries}()},
\code{\link{stri_locate_all_boundaries}()},
\code{\link{stri_opts_collator}()},
\code{\link{stri_order}()},
\code{\link{stri_rank}()},
\code{\link{stri_sort_key}()},
\code{\link{stri_sort}()},
\code{\link{stri_split_boundaries}()},
\code{\link{stri_trans_tolower}()},
\code{\link{stri_unique}()},
\code{\link{stri_wrap}()}
}
\concept{locale_sensitive}
\author{
\href{https://www.gagolewski.com/}{Marek Gagolewski} and other contributors
}
