% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gfpca_twoStep.R
\name{gfpca_twoStep}
\alias{gfpca_twoStep}
\title{Generalized functional principal component analysis}
\usage{
gfpca_twoStep(
  Y,
  family = "gaussian",
  npc = NULL,
  npc_criterion = NULL,
  Kt = 8,
  t_min = NULL,
  t_max = NULL,
  row_obj = NULL,
  index_significantDigits = 4L,
  estimation_accuracy = "high",
  start_params = NULL,
  periodic = FALSE,
  verbose = 1,
  ...
)
}
\arguments{
\item{Y}{Dataframe. Should have values id, value, index.}

\item{family}{One of \code{c("gaussian","binomial","gamma","poisson")}.
Poisson data are rounded before performing
the GFPCA to ensure integer data, see Details section below.
Defaults to \code{"gaussian"}.}

\item{npc, npc_criterion}{The number of functional principal components (FPCs)
has to be specified either directly as \code{npc} or based on their explained
share of variance. In the latter case, \code{npc_criterion} can either be set
to (i) a share between 0 and 1, or (ii) a vector with two elements comprising
the targeted explained share of variance and a cut-off scree plot criterion,
both between 0 and 1. As an example for the latter,
\code{npc_criterion = c(0.9,0.02)} tries to choose a number of FPCs that
explains at least 90\% of variation, but only includes FPCs that explain at
least 2\% of variation (even if this means 90\% explained variation is not reached).}

\item{Kt}{Number of B-spline basis functions used to estimate mean functions
and functional principal components. Default is 8.}

\item{t_min}{Minimum value to be evaluated on the time domain.}

\item{t_max}{Maximum value to be evaluated on the time domain.}

\item{row_obj}{If NULL, the function cleans the data and calculates row indices. 
Keep this NULL if you are using standalone \code{register} function.}

\item{index_significantDigits}{Positive integer \code{>= 2}, stating the number
of significant digits to which the index grid should be rounded. Coarsening the
index grid is necessary since otherwise the covariance surface matrix
explodes in size in the presence of too many unique index values (which is
always the case after some registration step). Defaults to 4. Set to
\code{NULL} to prevent rounding.}

\item{estimation_accuracy}{One of \code{c("high","low")}. When set to \code{"low"},
the mixed model estimation step in \code{lme4} is performed with lower
accuracy, reducing computation time. Defaults to \code{"high"}.}

\item{start_params}{Optional start values for gamm4. Not used if
\code{npc_criterion} is specified.}

\item{periodic}{Only contained for full consistency with \code{fpca_gauss}
and \code{bfpca}. If TRUE, returns the knots vector for periodic b-spline
basis functions. Defaults to FALSE. This parameter does not change the
results of the two-step GFPCA.}

\item{verbose}{Can be set to integers between 0 and 4 to control the level of
detail of the printed diagnostic messages. Higher numbers lead to more detailed
messages. Defaults to 1.}

\item{...}{Additional arguments passed to \code{\link{cov_hall}}.}
}
\value{
An object of class \code{fpca} containing:
\item{fpca_type}{Information that FPCA was performed with the 'two-step' approach,
in contrast to registr::fpca_gauss or registr::bfpca.}
\item{t_vec}{Time vector over which the mean \code{mu} was evaluated.
The resolution is can be specified by setting \code{index_significantDigits}.}
\item{knots}{Cutpoints for B-spline basis used to rebuild \code{alpha}.}
\item{efunctions}{\eqn{D \times npc} matrix of estimated FPC basis functions.}
\item{evalues}{Estimated variance of the FPC scores.}
\item{evalues_sum}{Sum of all (nonnegative) eigenvalues of the smoothed
covariance surface estimated with \code{\link{cov_hall}}. Can be used as an
approximation for the total variance present in \code{Y} to compute the
shares of explained variance of the FPC scores.}
\item{npc}{number of FPCs.}
\item{scores}{\eqn{I \times npc} matrix of estimated FPC scores.}
\item{alpha}{Estimated population-level mean.}
\item{mu}{Estimated population-level mean. Same value as \code{alpha} but included for compatibility
with \code{refund.shiny} package.}
\item{subject_coefs}{Always \code{NA} but included for full consistency
with \code{fpca_gauss} and \code{bfpca}.} 
\item{Yhat}{FPC approximation of subject-specific means, before applying the
response function.}
\item{Y}{The observed data.}
\item{family}{\code{binomial}, for compatibility with \code{refund.shiny} package.}
\item{gamm4_theta}{Estimated parameters of the mixed model.}
}
\description{
Function for applying FPCA to different exponential family distributions.
Used in the FPCA step for registering functional data,
called by \code{\link{register_fpca}} when \code{fpca_type = "two-step"}. \cr \cr
The method implements the `two-step approach` of Gertheiss et al. (2017)
and is based on the approach of Hall et al. (2008) to estimate functional
principal components. \cr \cr
The number of functional principal components (FPCs) can either be specified
directly (argument \code{npc}) or chosen based on the explained share of
variance (\code{npc_criterion}). Using the latter, we approximate the overall
variance in the data \code{Y} with the variance represented by the smoothed
covariance surface estimated with \code{\link{cov_hall}}.
Note that the Eigenvalue decomposition of this covariance surface
sometimes leads to a long tail of subordinate FPCs with small eigenvalues.
Such subordinate dimensions seem to often represent phase rather than
amplitude variation, and can be cut off by specifying the second element of
argument \code{npc_criterion}. \cr \cr
This function is an adaptation of the implementation of Jan
Gertheiss for Gertheiss et al. (2017), with focus on higher (RAM) efficiency
for large data settings.
}
\details{
For \code{family = "poisson"} the values in \code{Y} are rounded before
performing the GFPCA to ensure integer data. This is done to ensure reasonable
computation times. Computation times tend to explode when estimating the
underlying high-dimensional mixed model with continuous Poisson data based
on the \code{\link{gamm4}} package.

If negative eigenvalues are present, the respective eigenfunctions are dropped
and not considered further.
}
\examples{
data(growth_incomplete)

# estimate 2 FPCs
fpca_obj = gfpca_twoStep(Y = growth_incomplete, npc = 2, family = "gaussian")
plot(fpca_obj)

# estimate npc adaptively, to explain 90\% of the overall variation
fpca_obj2 = gfpca_twoStep(Y = growth_incomplete, npc_criterion = 0.9, family = "gaussian")
plot(fpca_obj2, plot_FPCs = 1:2)

}
\references{
Gertheiss, J., Goldsmith, J., & Staicu, A. M. (2017). A note on
modeling sparse exponential-family functional response curves.
\emph{Computational statistics & data analysis}, 105, 46--52.

Hall, P., Müller, H. G., & Yao, F. (2008). Modelling sparse
generalized longitudinal observations with latent Gaussian processes.
\emph{Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
70(4), 703--723.
}
\author{
Alexander Bauer \email{alexander.bauer@stat.uni-muenchen.de},
based on work of Jan Gertheiss
}
