% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fmply.R
\name{fmply}
\alias{fmply}
\title{Read, process and write to multiple output files}
\usage{
fmply(
  input,
  outputs,
  FUN,
  ...,
  key.sep = "\\t",
  sep = "\\t",
  skip = 0,
  header = TRUE,
  nblocks = Inf,
  stringsAsFactors = FALSE,
  select = NULL,
  drop = NULL,
  col.names = NULL,
  parallel = 1
)
}
\arguments{
\item{input}{Path of the input file.}

\item{outputs}{Vector of \emph{m} paths for the output files.}

\item{FUN}{A function to apply to each block. Takes as input a \code{data.table}
and optionally additional arguments. It should return a list of length
\emph{m}, the same length as the \code{outputs} vector. The first element
of the list is written to the first output file, the second element of the
list to the second output file, and so on. Besides these \emph{m} \code{data.table}s,
it can return an additional element, which is also returned by \code{fmply()}.}

\item{...}{Additional arguments to be passed to FUN.}

\item{key.sep}{The character that delimits the first field from the rest.}

\item{sep}{The field delimiter (often equal to \code{key.sep}).}

\item{skip}{Number of lines to skip at the beginning of the file}

\item{header}{Whether the file has a header.}

\item{nblocks}{The number of blocks to read.}

\item{stringsAsFactors}{Whether to convert strings into factors.}

\item{select}{The columns (names or numbers) to be read.}

\item{drop}{The columns (names or numbers) not to be read.}

\item{col.names}{Names of the columns.}

\item{parallel}{Number of cores to use.}
}
\value{
If \code{FUN} returns \emph{m} elements, \code{fmply()} returns
invisibly the number of blocks parsed. If \code{FUN} returns \emph{m + 1}
elements, \code{fmply()} returns the list of all the last elements. As a
side effect, it writes the first \emph{m} outputs of \code{FUN} to the
\code{outputs} files.
}
\description{
Sometimes a file should be processed in many different ways. \code{fmply()}
applies a function to each block of the file; the function should return a
list of \emph{m} \code{data.table}s, each of which is written to a different
output file. Optionally, the function can return a list of \emph{m + 1},
where the first \emph{m} elements are \code{data.table}s and are written
to the output files, while the last element is returned as in \code{flply()}.
}
\section{Slogan}{

fmply: from \strong{f}ile to \strong{m}ultiple files
}

\examples{

fin <- system.file("extdata", "dt_iris.csv", package = "fplyr")
fout1 <- tempfile()
fout2 <- ""

# Copy the input file to tempfile as it is, and, at the same time, print
# a summary to the console
fmply(fin, c(fout1, fout2), function(d) {
    list(d, data.table(unclass(summary(d))))
})

fout3 <- tempfile()
fout4 <- tempfile()

# Use linear and polynomial regression and print the outputs to two files
fmply(fin, c(fout3, fout4), function(d) {
    lr.fit <- lm(Sepal.Length ~ ., data = d[, !"Species"])
    lr.summ <- data.table(Species = d$Species[1], t(coefficients(lr.fit)))
    pr.fit <- lm(Sepal.Length ~ poly(as.matrix(d[, 3:5]), degree = 3),
                 data = d[, !"Species"])
    pr.summ <- data.table(Species = d$Species[1], t(coefficients(pr.fit)))
    list(lr.summ, pr.summ)
})

}
