Welcome to ClientVPS Mirrors

Exploring The Variables Importance

Exploring The Variables Importance

Gabriele Pittarello

2024-11-14

Introduction

Machine learning models catch interactions between covariates. Often they are a black-box but they can be interpreted with SHAP values. We generate two data sets, one from scenario Alpha and one from scenario Delta the plotting functionalities of the ReSurv package.

input_data_0 <- data_generator(
  random_seed = 1,
  scenario = 0,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_0 <- IndividualDataPP(
  data = input_data_0,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)
# Input data scenario Delta

input_data3 <- data_generator(
  random_seed = 1,
  scenario = 3,
  time_unit = 1 / 360,
  years = 4,
  yearly_exposure = 200
)

individual_data_3 <- IndividualDataPP(
  data = input_data3,
  id = NULL,
  categorical_features = "claim_type",
  continuous_features = "AP",
  accident_period = "AP",
  calendar_period = "RP",
  input_time_granularity = "days",
  output_time_granularity = "quarters",
  years = 4
)

Here we fit Neural Networks and XGB. In order to simplify this vignette, we provide in advance the optimal hyperparameters.

hp_scenario_alpha_xgb <- list(
  params = list(
    booster = "gbtree",
    eta = 0.9887265,
    subsample = 0.7924135 ,
    alpha = 10.85342,
    lambda = 6.213317,
    min_child_weight = 3.042204,
    max_depth = 1
  ),
  print_every_n = 0,
  nrounds = 3000,
  verbose = FALSE,
  early_stopping_rounds = 500
)

hp_scenario_alpha_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 10,
  optim = "SGD",
  batch_size = as.integer(5000),
  lr = 0.3023043,
  xi = 0.426443,
  eps = 0,
  activation = "SELU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)

hp_scenario_delta_xgb <- list(params=list(booster="gbtree",
                                          eta=0.2717736,
                                          subsample=0.9043068 ,
                                          alpha=7.789214,
                                          lambda=12.09398 ,
                                          min_child_weight=22.4837 ,
                                          max_depth = 4),
                                          print_every_n = 0,
                                          nrounds=3000,
                                          verbose= FALSE,
                                          early_stopping_rounds = 500)

hp_scenario_delta_nn <- list(
  batch_size = as.integer(5000),
  epochs = as.integer(5500),
  num_workers = 0,
  tie = 'Efron',
  num_layers = 2,
  num_nodes = 2,
  optim = "Adam",
  batch_size = as.integer(5000),
  lr = 0.3542422,
  xi = 0.1803953,
  eps = 0,
  activation = "LeakyReLU",
  early_stopping = TRUE,
  patience = 350,
  verbose = FALSE,
  network_structure = NULL
)
resurv_model_xgb_A <-  ReSurv(individual_data_0,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_alpha_xgb)

resurv_model_nn_A <-  ReSurv(individual_data_0,
                             hazard_model = "NN",
                             hparameters = hp_scenario_alpha_nn)

resurv_model_xgb_D <-  ReSurv(individual_data_3,
                              hazard_model = "XGB",
                              hparameters = hp_scenario_delta_xgb)

resurv_model_nn_D <- ReSurv(individual_data_3,
                            hazard_model = "NN",
                            hparameters = hp_scenario_delta_nn)

Shap values (XGB)

plot(resurv_model_xgb_A)
plot(resurv_model_xgb_D)

Shap values (NN)

plot(resurv_model_nn_A, nsamples = 10000)
plot(resurv_model_nn_D, nsamples=10000)

Need a high-speed mirror for your open-source project?
Contact our mirror admin team at info@clientvps.com.

This archive is provided as a free public service to the community.
Proudly supported by infrastructure from VPSPulse , RxServers , BuyNumber , UnitVPS , OffshoreName and secure payment technology by ArionPay.