% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GPModel.R
\name{GPModel_shared_params}
\alias{GPModel_shared_params}
\title{Documentation for parameters shared by \code{GPModel}, \code{gpb.cv}, and \code{gpboost}}
\arguments{
\item{likelihood}{A \code{string} specifying the likelihood function (distribution) of the response variable. 
Available options:
\itemize{
\item{ "gaussian" }
\item{ "bernoulli_probit": binary data with Bernoulli likelihood and a probit link function }
\item{ "bernoulli_logit": binary data with Bernoulli likelihood and a logit link function }
\item{ "gamma": gamma distribution with a with log link function }
\item{ "poisson": Poisson distribution with a with log link function }
\item{ "negative_binomial": negative binomial distribution with a with log link function }
\item{ Note: other likelihoods could be implemented upon request }
}}

\item{group_data}{A \code{vector} or \code{matrix} whose columns are categorical grouping variables. 
The elements being group levels defining grouped random effects.
The elements of 'group_data' can be integer, double, or character.
The number of columns corresponds to the number of grouped (intercept) random effects}

\item{group_rand_coef_data}{A \code{vector} or \code{matrix} with numeric covariate data 
for grouped random coefficients}

\item{ind_effect_group_rand_coef}{A \code{vector} with integer indices that 
indicate the corresponding categorical grouping variable (=columns) in 'group_data' for 
every covariate in 'group_rand_coef_data'. Counting starts at 1.
The length of this index vector must equal the number of covariates in 'group_rand_coef_data'.
For instance, c(1,1,2) means that the first two covariates (=first two columns) in 'group_rand_coef_data'
have random coefficients corresponding to the first categorical grouping variable (=first column) in 'group_data',
and the third covariate (=third column) in 'group_rand_coef_data' has a random coefficient
corresponding to the second grouping variable (=second column) in 'group_data'}

\item{drop_intercept_group_rand_effect}{A \code{vector} of type \code{logical} (boolean). 
Indicates whether intercept random effects are dropped (only for random coefficients). 
If drop_intercept_group_rand_effect[k] is TRUE, the intercept random effect number k is dropped / not included. 
Only random effects with random slopes can be dropped.}

\item{gp_coords}{A \code{matrix} with numeric coordinates (= inputs / features) for defining Gaussian processes}

\item{gp_rand_coef_data}{A \code{vector} or \code{matrix} with numeric covariate data for
Gaussian process random coefficients}

\item{cov_function}{A \code{string} specifying the covariance function for the Gaussian process. 
Available options:
\itemize{
\item{"exponential": Exponential covariance function (using the parametrization of Diggle and Ribeiro, 2007) }
\item{"gaussian": Gaussian, aka squared expnential, covariance function (using the parametrization of Diggle and Ribeiro, 2007) }
\item{ "matern": Matern covariance function with the smoothness specified by 
the \code{cov_fct_shape} parameter (using the parametrization of Rasmussen and Williams, 2006) }
\item{"powered_exponential": powered exponential covariance function with the exponent specified by 
the \code{cov_fct_shape} parameter (using the parametrization of Diggle and Ribeiro, 2007) }
\item{ "wendland": Compactly supported Wendland covariance function (using the parametrization of Bevilacqua et al., 2019, AOS) }
\item{ "matern_space_time": Spatio-temporal Matern covariance function with different range parameters for space and time. 
Note that the first column in \code{gp_coords} must correspond to the time dimension }
}}

\item{cov_fct_shape}{A \code{numeric} specifying the shape parameter of the covariance function 
(=smoothness parameter for Matern covariance)  
This parameter is irrelevant for some covariance functions such as the exponential or Gaussian}

\item{gp_approx}{A \code{string} specifying the large data approximation
for Gaussian processes. Available options: 
\itemize{
\item{"none": No approximation }
\item{"vecchia": A Vecchia approximation; see Sigrist (2022, JMLR) for more details }
\item{"tapering": The covariance function is multiplied by 
a compactly supported Wendland correlation function }
\item{"fitc": Fully Independent Training Conditional approximation aka 
modified predictive process approximation; see Gyger, Furrer, and Sigrist (2024) for more details }
\item{"full_scale_tapering": A full scale approximation combining an 
inducing point / predictive process approximation with tapering on the residual process; 
see Gyger, Furrer, and Sigrist (2024) for more details }
}}

\item{cov_fct_taper_range}{A \code{numeric} specifying the range parameter 
of the Wendland covariance function and Wendland correlation taper function. 
We follow the notation of Bevilacqua et al. (2019, AOS)}

\item{cov_fct_taper_shape}{A \code{numeric} specifying the shape (=smoothness) parameter 
of the Wendland covariance function and Wendland correlation taper function. 
We follow the notation of Bevilacqua et al. (2019, AOS)}

\item{num_neighbors}{An \code{integer} specifying the number of neighbors for 
the Vecchia approximation. Note: for prediction, the number of neighbors can 
be set through the 'num_neighbors_pred' parameter in the 'set_prediction_data'
function. By default, num_neighbors_pred = 2 * num_neighbors. Further, 
the type of Vecchia approximation used for making predictions is set through  
the 'vecchia_pred_type' parameter in the 'set_prediction_data' function}

\item{vecchia_ordering}{A \code{string} specifying the ordering used in 
the Vecchia approximation. Available options:
\itemize{
\item{"none": the default ordering in the data is used }
\item{"random": a random ordering }
\item{"time": ordering accorrding to time (only for space-time models) }
\item{"time_random_space": ordering according to time and randomly for all 
spatial points with the same time points (only for space-time models) }
}}

\item{ind_points_selection}{A \code{string} specifying the method for choosing inducing points
Available options:
\itemize{
\item{"kmeans++: the k-means++ algorithm }
\item{"cover_tree": the cover tree algorithm }
\item{"random": random selection from data points }
}}

\item{num_ind_points}{An \code{integer} specifying the number of inducing 
points / knots for, e.g., a predictive process approximation}

\item{cover_tree_radius}{A \code{numeric} specifying the radius (= "spatial resolution") 
for the cover tree algorithm}

\item{matrix_inversion_method}{A \code{string} specifying the method used for inverting covariance matrices. 
Available options:
\itemize{
\item{"cholesky": Cholesky factorization }
\item{"iterative": iterative methods. A combination of conjugate gradient, Lanczos algorithm, and other methods. 

This is currently only supported for the following cases: 
\itemize{
\item{likelihood != "gaussian" and gp_approx == "vecchia" (non-Gaussian likelihoods with a Vecchia-Laplace approximation) }
\item{likelihood == "gaussian" and gp_approx == "full_scale_tapering" (Gaussian likelihood with a full-scale tapering approximation) }
}
}
}}

\item{seed}{An \code{integer} specifying the seed used for model creation 
(e.g., random ordering in Vecchia approximation)}

\item{vecchia_pred_type}{A \code{string} specifying the type of Vecchia approximation used for making predictions.
Default value if vecchia_pred_type = NULL: "order_obs_first_cond_obs_only". 
Available options:
\itemize{
\item{"order_obs_first_cond_obs_only": Vecchia approximation for the observable process and observed training data is 
ordered first and the neighbors are only observed training data points }
\item{"order_obs_first_cond_all": Vecchia approximation for the observable process and observed training data is 
ordered first and the neighbors are selected among all points (training + prediction) }
\item{"latent_order_obs_first_cond_obs_only": Vecchia approximation for the latent process and observed data is 
ordered first and neighbors are only observed points}
\item{"latent_order_obs_first_cond_all": Vecchia approximation 
for the latent process and observed data is ordered first and neighbors are selected among all points }
\item{"order_pred_first": Vecchia approximation for the observable process and prediction data is 
ordered first for making predictions. This option is only available for Gaussian likelihoods }
}}

\item{num_neighbors_pred}{an \code{integer} specifying the number of neighbors for the Vecchia approximation 
for making predictions. Default value if NULL: num_neighbors_pred = 2 * num_neighbors}

\item{cg_delta_conv_pred}{a \code{numeric} specifying the tolerance level for L2 norm of residuals for 
checking convergence in conjugate gradient algorithms when being used for prediction
Default value if NULL: 1e-3}

\item{nsim_var_pred}{an \code{integer} specifying the number of samples when simulation 
is used for calculating predictive variances
Default value if NULL: 1000}

\item{rank_pred_approx_matrix_lanczos}{an \code{integer} specifying the rank 
of the matrix for approximating predictive covariances obtained using the Lanczos algorithm
Default value if NULL: 1000}

\item{cluster_ids}{A \code{vector} with elements indicating independent realizations of 
random effects / Gaussian processes (same values = same process realization).
The elements of 'cluster_ids' can be integer, double, or character.}

\item{free_raw_data}{A \code{boolean}. If TRUE, the data (groups, coordinates, covariate data for random coefficients) 
is freed in R after initialization}

\item{y}{A \code{vector} with response variable data}

\item{X}{A \code{matrix} with numeric covariate data for the 
fixed effects linear regression term (if there is one)}

\item{params}{A \code{list} with parameters for the estimation / optimization
 \itemize{
    \item{optimizer_cov: \code{string} (default = "lbfgs" for linear mixed effects models and "gradient_descent" for the GPBoost algorithm). 
    Optimizer used for estimating covariance parameters. 
    Options: "gradient_descent", "lbfgs", "fisher_scoring", "nelder_mead", "adam".
    If there are additional auxiliary parameters for non-Gaussian likelihoods, 
    'optimizer_cov' is also used for those }
    \item{optimizer_coef: \code{string} (default = "wls" for Gaussian likelihoods and "gradient_descent" for other likelihoods). 
    Optimizer used for estimating linear regression coefficients, if there are any 
    (for the GPBoost algorithm there are usually none). 
    Options: "gradient_descent", "lbfgs", "wls", "nelder_mead", "adam". Gradient descent steps are done simultaneously 
    with gradient descent steps for the covariance parameters. 
    "wls" refers to doing coordinate descent for the regression coefficients using weighted least squares.
    If 'optimizer_cov' is set to "nelder_mead", "lbfgs", or "adam", 
    'optimizer_coef' is automatically also set to the same value.}
    \item{maxit: \code{integer} (default = 1000). 
    Maximal number of iterations for optimization algorithm }
    \item{delta_rel_conv: \code{numeric} (default = 1E-6 except for "nelder_mead" for which the default is 1E-8). 
    Convergence tolerance. The algorithm stops if the relative change 
    in either the (approximate) log-likelihood or the parameters is below this value. 
    For "adam", the L2 norm of the gradient is used instead of the relative change in the log-likelihood. 
    If < 0, internal default values are used }
    \item{convergence_criterion: \code{string} (default = "relative_change_in_log_likelihood"). 
    The convergence criterion used for terminating the optimization algorithm.
    Options: "relative_change_in_log_likelihood" or "relative_change_in_parameters" }
    \item{init_coef: \code{vector} with \code{numeric} elements (default = NULL). 
    Initial values for the regression coefficients (if there are any, can be NULL) }
    \item{init_cov_pars: \code{vector} with \code{numeric} elements (default = NULL). 
    Initial values for covariance parameters of Gaussian process and 
    random effects (can be NULL). The order it the same as the order 
    of the parameters in the summary function: first is the error variance 
    (only for "gaussian" likelihood), next follow the variances of the 
    grouped random effects (if there are any, in the order provided in 'group_data'), 
    and then follow the marginal variance and the range of the Gaussian process. 
    If there are multiple Gaussian processes, then the variances and ranges follow alternatingly.
    If 'init_cov_pars = NULL', an internal choice is used that depends on the 
    likelihood and the random effects type and covariance function. 
    If you select the option 'trace = TRUE' in the 'params' argument, 
    you will see the first initial covariance parameters in iteration 0. }
    \item{lr_coef: \code{numeric} (default = 0.1). 
    Learning rate for fixed effect regression coefficients if gradient descent is used }
    \item{lr_cov: \code{numeric} (default = 0.1 for "gradient_descent" and 1. for "fisher_scoring"). 
    Initial learning rate for covariance parameters if "gradient_descent" or "fisher_scoring" is used. 
    If lr_cov < 0, internal default values are used.
    If there are additional auxiliary parameters for non-Gaussian likelihoods, 
    'lr_cov' is also used for those}
    \item{use_nesterov_acc: \code{boolean} (default = TRUE). 
    If TRUE Nesterov acceleration is used.
    This is used only for gradient descent }
    \item{acc_rate_coef: \code{numeric} (default = 0.5). 
    Acceleration rate for regression coefficients (if there are any) 
    for Nesterov acceleration }
    \item{acc_rate_cov: \code{numeric} (default = 0.5). 
    Acceleration rate for covariance parameters for Nesterov acceleration }
    \item{momentum_offset: \code{integer} (Default = 2)}. 
    Number of iterations for which no momentum is applied in the beginning.
    \item{trace: \code{boolean} (default = FALSE). 
    If TRUE, information on the progress of the parameter
    optimization is printed}
    \item{std_dev: \code{boolean} (default = TRUE). 
    If TRUE, approximate standard deviations are calculated for the covariance and linear regression parameters 
    (= square root of diagonal of the inverse Fisher information for Gaussian likelihoods and 
    square root of diagonal of a numerically approximated inverse Hessian for non-Gaussian likelihoods) }
    \item{init_aux_pars: \code{vector} with \code{numeric} elements (default = NULL). 
    Initial values for additional parameters for non-Gaussian likelihoods 
    (e.g., shape parameter of a gamma or negative_binomial likelihood) }
    \item{estimate_aux_pars: \code{boolean} (default = TRUE). 
    If TRUE, additional parameters for non-Gaussian likelihoods 
    are also estimated (e.g., shape parameter of a gamma or negative_binomial likelihood) }
    \item{cg_max_num_it: \code{integer} (default = 1000). 
    Maximal number of iterations for conjugate gradient algorithms }
    \item{cg_max_num_it_tridiag: \code{integer} (default = 1000). 
    Maximal number of iterations for conjugate gradient algorithm 
    when being run as Lanczos algorithm for tridiagonalization }
    \item{cg_delta_conv: \code{numeric} (default = 1E-2).
    Tolerance level for L2 norm of residuals for checking convergence 
    in conjugate gradient algorithm when being used for parameter estimation }
    \item{num_rand_vec_trace: \code{integer} (default = 50). 
    Number of random vectors (e.g., Rademacher) for stochastic approximation of the trace of a matrix }
    \item{reuse_rand_vec_trace: \code{boolean} (default = TRUE). 
    If true, random vectors (e.g., Rademacher) for stochastic approximations 
    of the trace of a matrix are sampled only once at the beginning of 
    the parameter estimation and reused in later trace approximations.
    Otherwise they are sampled every time a trace is calculated }
    \item{seed_rand_vec_trace: \code{integer} (default = 1). 
    Seed number to generate random vectors (e.g., Rademacher) }
    \item{piv_chol_rank: \code{integer} (default = 50). 
    Rank of the pivoted Cholesky decomposition used as 
    preconditioner in conjugate gradient algorithms }
    \item{cg_preconditioner_type: \code{string}.
    Type of preconditioner used for conjugate gradient algorithms.
    \itemize{
      \item Options for non-Gaussian likelihoods and gp_approx = "vecchia": 
        \itemize{
          \item{"piv_chol_on_Sigma" (= default): (Lk * Lk^T + W^-1) as preconditioner for inverting (B^-1 * D * B^-T + W^-1), 
      where Lk is a low-rank pivoted Cholesky approximation for Sigma and B^-1 * D * B^-T approx= Sigma }
          \item{"Sigma_inv_plus_BtWB": (B^T * (D^-1 + W) * B) as preconditioner for inverting (B^T * D^-1 * B + W), 
      where B^T * D^-1 * B approx= Sigma^-1 }
      }
      \item Options for likelihood = "gaussian" and gp_approx = "full_scale_tapering": 
        \itemize{
          \item{"predictive_process_plus_diagonal" (= default): predictive process preconditiioner }
          \item{"none": no preconditioner }
      }
    }
   }
}}

\item{fixed_effects}{A \code{numeric} \code{vector} with 
additional fixed effects that are added to the linear predictor (= offset). 
The length of this vector needs to equal the number of training data points.}

\item{group_data_pred}{A \code{vector} or \code{matrix} with elements being group levels 
for which predictions are made (if there are grouped random effects in the \code{GPModel})}

\item{group_rand_coef_data_pred}{A \code{vector} or \code{matrix} with covariate data 
for grouped random coefficients (if there are some in the \code{GPModel})}

\item{gp_coords_pred}{A \code{matrix} with prediction coordinates (=features) for 
Gaussian process (if there is a GP in the \code{GPModel})}

\item{gp_rand_coef_data_pred}{A \code{vector} or \code{matrix} with covariate data for 
Gaussian process random coefficients (if there are some in the \code{GPModel})}

\item{cluster_ids_pred}{A \code{vector} with elements indicating the realizations of 
random effects / Gaussian processes for which predictions are made 
(set to NULL if you have not specified this when creating the \code{GPModel})}

\item{X_pred}{A \code{matrix} with prediction covariate data for the 
fixed effects linear regression term (if there is one in the \code{GPModel})}

\item{predict_cov_mat}{A \code{boolean}. If TRUE, the (posterior) 
predictive covariance is calculated in addition to the (posterior) predictive mean}

\item{predict_var}{A \code{boolean}. If TRUE, the (posterior) 
predictive variances are calculated}

\item{vecchia_approx}{Discontinued. Use the argument \code{gp_approx} instead}
}
\description{
Documentation for parameters shared by \code{GPModel}, \code{gpb.cv}, and \code{gpboost}
}
