% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/1_fgpm_Class.R
\name{fgpm}
\alias{fgpm}
\title{Gaussian process models for scalar and functional inputs}
\usage{
fgpm(
  sIn = NULL,
  fIn = NULL,
  sOut,
  kerType = "matern5_2",
  f_disType = "L2_bygroup",
  f_pdims = 3,
  f_basType = "B-splines",
  var.hyp = NULL,
  ls_s.hyp = NULL,
  ls_f.hyp = NULL,
  nugget = 1e-08,
  n.starts = 1,
  n.presample = 20,
  par.clust = NULL,
  trace = TRUE,
  pbars = TRUE,
  control.optim = list(trace = TRUE)
)
}
\arguments{
\item{sIn}{An optional matrix of scalar input values to train the model. Each column must match an input
variable and each row a training point. Either scalar input coordinates (sIn), functional input
coordinates (fIn), or both must be provided.}

\item{fIn}{An optional list of functional input values to train the model. Each element of the list must
be a matrix containing the set of curves corresponding to one functional input. Either scalar input
coordinates (sIn), functional input coordinates (fIn), or both must be provided.}

\item{sOut}{A vector (or 1-column matrix) containing the values of the scalar output at the specified
input points.}

\item{kerType}{An optional character string specifying the covariance structure to be used. To be chosen
between "gauss", "matern5_2" and "matern3_2". Default is "matern5_2".}

\item{f_disType}{An optional array of character strings specifying the distance function to be used for
each functional coordinates within the covariance function of the Gaussian process. To be chosen between
"L2_bygroup" and "L2_byindex". The L2_bygroup distance considers each curve as a whole and uses a single
length-scale parameter per functional input variable. The L2_byindex distance uses as many length-scale
parameters per functional input as discretization points it has. For instance an input discretized as
a vector of size 8 will use 8 length-scale parameters when using L2_byindex. If dimension reduction of
a functional input is requested, then L2_byindex uses as many length scale parameters as effective
dimensions used to represent the input. A single character string can also be passed as a general
selection for all the functional inputs of the model. More details in
\href{https://www.sciencedirect.com/science/article/abs/pii/S0951832019301693}{
the reference article}
and
\href{https://hal.archives-ouvertes.fr/hal-02536624}{
the in-depth package manual}. Default is "L2_bygroup".}

\item{f_pdims}{An optional array with the projection dimension for each functional input. For each input,
the projection dimension should be an integer between 0 and its original dimension, with 0 denoting
no projection. A single character string can also be passed as a general selection for all the functional
inputs of the model. Default is 3.}

\item{f_basType}{An optional array of character strings specifying the family of basis functions to be used
in the projection of each functional input. To be chosen between "B-splines" and "PCA". A single character
string can also be passed as a general selection for all the functional inputs of the model. This argument
will be ignored for those inputs for which no projection was requested (i.e., for which f_pdims = 0).
Default is "B-splines".}

\item{var.hyp}{An optional number indicating the value that should be used as the variance parameter of the
model. If not provided, it is estimated through likelihood maximization.}

\item{ls_s.hyp}{An optional numeric array indicating the values that should be used as length-scale parameters
for the scalar inputs. If provided, the size of the array should match the number of scalar inputs. If not
provided, these parameters are estimated through likelihood maximization.}

\item{ls_f.hyp}{An optional numeric array indicating the values that should be used as length-scale parameters
for the functional inputs. If provided, the size of the array should match the number of effective dimensions.
Each input using the "L2_bygroup" distance will count 1 effective dimension, and each input using the
"L2_byindex" distance will count as many effective dimensions as specified by the corresponding element of
the f_pdims argument. For instance, two functional inputs of original dimensions 10 and 22, the first one
projected onto a space of dimension 5 with "L2_byindex" distance, and the second one not projected with
"L2_bygroup" distance will make up a total of 6 effective dimensions; five for the first functional input and
one for second one. If this argument is not provided, the functional length-scale parameters are estimated
through likelihood maximization.}

\item{nugget}{An optional variance value standing for the homogeneous nugget effect. A tiny nugget might help
to overcome numerical problems related to the ill-conditioning of the covariance matrix. Default is 1e-8.}

\item{n.starts}{An optional integer indicating the number of initial points to use for the optimization of the
hyperparameters. A parallel processing cluster can be exploited in order to speed up the evaluation of
multiple initial points. More details in the description of the argument par.clust below. Default is 1.}

\item{n.presample}{An optional integer indicating the number of points to be tested in order to select the
n.starts initial points. The n.presample points will be randomly sampled from the hyper-rectangle defined by: \cr \cr
1e-10 \eqn{\le} \code{ls_s.hyp[i]} \eqn{\le} 2*max(\code{sMs[[i]]}), for i in 1 to the number of scalar inputs, \cr
1e-10 \eqn{\le} \code{ls_f.hyp[i]} \eqn{\le} 2*max(\code{fMs[[i]]}), for i in 1 to the number of functional inputs, \cr \cr
with  sMs and fMs the lists of distance matrices for the scalar and functional inputs, respectively. The value of
n.starts will be assigned to n.presample if this last is smaller. Default is 20.}

\item{par.clust}{An optional parallel processing cluster created with the \code{\link[parallel]{makeCluster}} function
of the \link[=parallel]{parallel package}. If not provided, multistart optimizations are done in sequence.}

\item{trace}{An optional boolean indicating if control messages native of the \link[=funGp-package]{funGp package} should be printed to
console. Default is TRUE. For complementary control on the display of funGp-native progress bars and
\code{\link[stats]{optim}} trace about the hyperparameter optimization process, have a look at the \code{pbars} and
\code{control.optim} arguments, respectively.}

\item{pbars}{An optional boolean indicating if progress bars should be displayed. Default is TRUE.}

\item{control.optim}{An optional list to be passed as the \code{control} argument to \code{\link[stats]{optim}}, the function
in charge of the non-linear optimization of the hyperparameters. Default is \code{list(trace = TRUE)}, equivalent to
\code{list(trace = 1)}, which enables the printing of tracing information on the progress of the optimization. Before
interacting with the \code{\link[funGp]{fgpm}()} \code{control.optim} argument, please carefully check the documentation about
the \code{control} argument provided in \code{\link[stats]{optim}} to ensure a coherent behavior and sound results. Note
that: (i) at this time, only the \code{"L-BFGS-B"} method (Byrd et. al., 1995) is enabled in \code{\link[funGp]{fgpm}()};
(ii) \code{control.optim$fnscale} should not be used since our optimization problem is strictly of minimization, not maximization.}
}
\value{
An object of class \linkS4class{fgpm} containing the data structures representing the fitted funGp model.
}
\description{
This function enables fitting of Gaussian process regression models. The inputs can be
  either scalar, functional or a combination of both types.
}
\examples{
# creating funGp model using default fgpm arguments________________________________________
# generating input data for training
set.seed(100)
n.tr <- 25
sIn <- expand.grid(x1 = seq(0,1,length = sqrt(n.tr)), x2 = seq(0,1,length = sqrt(n.tr)))
fIn <- list(f1 = matrix(runif(n.tr*10), ncol = 10), f2 = matrix(runif(n.tr*22), ncol = 22))

# generating output data for training
sOut <- fgp_BB3(sIn, fIn, n.tr)

# building a scalar-input funGp model
ms <- fgpm(sIn = sIn, sOut = sOut)

# building a functional-input funGp model
mf <- fgpm(fIn = fIn, sOut = sOut)

# building a hybrid-input funGp model
msf <- fgpm(sIn = sIn, fIn = fIn, sOut = sOut)

# plotting the three models
plot(ms)
plot(mf)
plot(msf)

# printing the three models
summary(ms) # equivalent to show(ms)
summary(mf) # equivalent to show(mf)
summary(msf) # equivalent to show(msf)


# recovering useful information from a funGp model_________________________________________
# building the model
set.seed(100)
n.tr <- 25
sIn <- expand.grid(x1 = seq(0,1,length = sqrt(n.tr)), x2 = seq(0,1,length = sqrt(n.tr)))
fIn <- list(f1 = matrix(runif(n.tr*10), ncol = 10), f2 = matrix(runif(n.tr*22), ncol = 22))
sOut <- fgp_BB3(sIn, fIn, n.tr)
m1 <- fgpm(sIn = sIn, fIn = fIn, sOut = sOut)

# recovering data from model slots
m1@f_proj@coefs # list of projection coefficients for the functional inputs
m1@f_proj@basis # list of projection basis functions for the functional inputs
Map(function(a, b) a \%*\% t(b), m1@f_proj@coefs, m1@f_proj@basis) # list of projected
                                                                 # functional inputs
tcrossprod(m1@preMats$L) # training auto-covariance matrix


# making predictions based on a funGp model________________________________________________
# building the model
set.seed(100)
n.tr <- 25
sIn <- expand.grid(x1 = seq(0,1,length = sqrt(n.tr)), x2 = seq(0,1,length = sqrt(n.tr)))
fIn <- list(f1 = matrix(runif(n.tr*10), ncol = 10), f2 = matrix(runif(n.tr*22), ncol = 22))
sOut <- fgp_BB3(sIn, fIn, n.tr)
m1 <- fgpm(sIn = sIn, fIn = fIn, sOut = sOut)

# generating input data for prediction
n.pr <- 100
sIn.pr <- as.matrix(expand.grid(x1 = seq(0,1,length = sqrt(n.pr)),
                                x2 = seq(0,1,length = sqrt(n.pr))))
fIn.pr <- list(f1 = matrix(runif(n.pr*10), ncol = 10), matrix(runif(n.pr*22), ncol = 22))

# making predictions
m1.preds <- predict(m1, sIn.pr = sIn.pr, fIn.pr = fIn.pr)

# plotting predictions
plot(m1.preds)


# simulating from a funGp model____________________________________________________________
# building the model
set.seed(100)
n.tr <- 25
sIn <- expand.grid(x1 = seq(0,1,length = sqrt(n.tr)), x2 = seq(0,1,length = sqrt(n.tr)))
fIn <- list(f1 = matrix(runif(n.tr*10), ncol = 10), f2 = matrix(runif(n.tr*22), ncol = 22))
sOut <- fgp_BB3(sIn, fIn, n.tr)
m1 <- fgpm(sIn = sIn, fIn = fIn, sOut = sOut)

# generating input data for simulation
n.sm <- 100
sIn.sm <- as.matrix(expand.grid(x1 = seq(0,1,length = sqrt(n.sm)),
                                x2 = seq(0,1,length = sqrt(n.sm))))
fIn.sm <- list(f1 = matrix(runif(n.sm*10), ncol = 10), matrix(runif(n.sm*22), ncol = 22))

# making simulations
m1.sims <- simulate(m1, nsim = 10, sIn.sm = sIn.sm, fIn.sm = fIn.sm)

# plotting simulations
plot(m1.sims)


# creating funGp model using custom fgpm arguments_________________________________________
# generating input and output data
set.seed(100)
n.tr <- 25
sIn <- expand.grid(x1 = seq(0,1,length = sqrt(n.tr)), x2 = seq(0,1,length = sqrt(n.tr)))
fIn <- list(f1 = matrix(runif(n.tr*10), ncol = 10), f2 = matrix(runif(n.tr*22), ncol = 22))
sOut <- fgp_BB3(sIn, fIn, n.tr)

# original dimensions
# f1: 10
# f2: 22

# building a the model with the following structure
#    - Kernel: Gaussian
#    - f1: L2_byindex distance, no projection -> 10 length-scale parameters
#    - f2: L2_bygroup distance, B-spline basis of dimension 5 -> 1 length-scale parameter
m1 <- fgpm(sIn = sIn, fIn = fIn, sOut = sOut,
           kerType = "gauss", f_disType = c("L2_byindex", "L2_bygroup"),
           f_pdims = c(0,5), f_basType = c(NA, "B-splines"))

# plotting the model
plot(m1)

# printing the model
m1 # equivalent to show(m1)

\dontrun{
# multistart and parallelization in fgpm___________________________________________________
# generating input and output data
set.seed(100)
n.tr <- 243
sIn <- expand.grid(x1 = seq(0,1,length = n.tr^(1/5)), x2 = seq(0,1,length = n.tr^(1/5)),
                   x3 = seq(0,1,length = n.tr^(1/5)), x4 = seq(0,1,length = n.tr^(1/5)),
                   x5 = seq(0,1,length = n.tr^(1/5)))
fIn <- list(f1 = matrix(runif(n.tr*10), ncol = 10), f2 = matrix(runif(n.tr*22), ncol = 22))
sOut <- fgp_BB7(sIn, fIn, n.tr)

# calling fgpm with multistart in parallel
cl <- parallel::makeCluster(2)
m1 <- fgpm(sIn = sIn, fIn = fIn, sOut = sOut, n.starts = 10, par.clust = cl) # (~14 seconds)
parallel::stopCluster(cl)

# NOTE: in order to provide progress bars for the monitoring of time consuming processes
#       ran in parallel, funGp relies on the doFuture and future packages. Parallel processes
#       suddenly interrupted by the user tend to leave corrupt connections. This problem is
#       originated outside funGp, which limits our control over it. In the manual
#       of funGp, we provide a temporary solution to the issue and we remain attentive in
#       case it appears a more elegant way to handle it or a manner to suppress it.
#
#       funGp manual: https://hal.archives-ouvertes.fr/hal-02536624
}

}
\references{
Betancourt, J., Bachoc, F., Klein, T., Idier, D., Pedreros, R., and Rohmer, J. (2020),
"Gaussian process metamodeling of functional-input code for coastal flood hazard assessment".
\emph{Reliability Engineering & System Safety}, \strong{198}, 106870.
\href{https://www.sciencedirect.com/science/article/abs/pii/S0951832019301693}{[RESS]}
\href{https://hal.archives-ouvertes.fr/hal-01998724}{[HAL]}

Betancourt, J., Bachoc, F., Klein, T., and Gamboa, F. (2020),
Technical Report: "Ant Colony Based Model Selection for Functional-Input Gaussian Process Regression. Ref. D3.b (WP3.2)".
\emph{RISCOPE project}.
\href{https://hal.archives-ouvertes.fr/hal-02532713}{[HAL]}

Betancourt, J., Bachoc, F., and Klein, T. (2020),
R Package Manual: "Gaussian Process Regression for Scalar and Functional Inputs with funGp - The in-depth tour".
\emph{RISCOPE project}.
\href{https://hal.archives-ouvertes.fr/hal-02536624}{[HAL]}
}
\seealso{
\strong{*} \link[funGp]{plot,fgpm-method}: validation plot for a \code{fgpm} model;

\strong{*} \link[funGp]{predict,fgpm-method} for predictions based on a \code{fgpm} model;

\strong{*} \link[funGp]{simulate,fgpm-method} for simulations based on a \code{fgpm} model;

\strong{*} \link[funGp]{update,fgpm-method} for post-creation updates on a \code{fgpm} model;

\strong{*} \link[funGp]{fgpm_factory} for funGp heuristic model selection.
}
\author{
José Betancourt, François Bachoc, Thierry Klein and Jérémy Rohmer
}
