% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bpemb.R
\name{BPEembed}
\alias{BPEembed}
\title{Tokenise and embed text alongside a Sentencepiece and Word2vec model}
\usage{
BPEembed(
  file_sentencepiece = x$file_model,
  file_word2vec = x$glove.bin$file_model,
  x,
  normalize = TRUE
)
}
\arguments{
\item{file_sentencepiece}{the path to the file containing the sentencepiece model}

\item{file_word2vec}{the path to the file containing the word2vec embeddings}

\item{x}{the result of a call to \code{\link{sentencepiece_download_model}}. 
If this is provided, arguments \code{file_sentencepiece} and \code{file_word2vec} will not be used.}

\item{normalize}{passed on to \code{\link[word2vec]{read.wordvectors}} to read in \code{file_word2vec}. Defaults to \code{TRUE}.}
}
\value{
an object of class BPEembed which is a list with elements 
\itemize{
\item{model: a sentencepiece model as loaded with \code{\link{sentencepiece_load_model}}}
\item{embedding: a matrix with embeddings as loaded with \code{\link[word2vec]{read.wordvectors}}}
\item{dim: the dimension of the embedding}
\item{n: the number of elements in the vocabulary}
\item{file_sentencepiece: the sentencepiece model file}
\item{file_word2vec: the word2vec embedding file}
}
}
\description{
Use a sentencepiece model to tokenise text and get the embeddings of these
}
\examples{
##
## Example loading model from disk
##
folder    <- system.file(package = "sentencepiece", "models")
embedding <- file.path(folder, "nl.wiki.bpe.vs1000.d25.w2v.bin")
model     <- file.path(folder, "nl.wiki.bpe.vs1000.model")
encoder   <- BPEembed(model, embedding)  

## Do tokenisation with the sentencepiece model + embed these
txt    <- c("De eigendomsoverdracht aan de deelstaten is ingewikkeld.",
            "On est d'accord sur le prix de la biere?")
values <- predict(encoder, txt, type = "encode")  
str(values) 
values

txt <- rownames(values[[1]])
predict(encoder, txt, type = "decode") 
txt <- lapply(values, FUN = rownames) 
predict(encoder, txt, type = "decode") 
}
\seealso{
\code{\link{predict.BPEembed}}, \code{\link{sentencepiece_load_model}}, \code{\link{sentencepiece_download_model}}, \code{\link[word2vec]{read.wordvectors}}
}
