## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
# xgboost uses data.table
data.table::setDTthreads(2)
RhpcBLASctl::blas_set_num_threads(2)
RhpcBLASctl::omp_set_num_threads(2)

# set the two variables below to FALSE for a normal vignette using the data
# included in the package
download_data <- FALSE
create_sample_data <- FALSE
if (create_sample_data) {
  download_data <- TRUE # we need to download the data to create the sample data
}

## ----libraries----------------------------------------------------------------
library(tidysdm)

## ----load_presences-----------------------------------------------------------
data(lacerta)
head(lacerta)

## ----download_presences, eval = download_data---------------------------------
# # download presences
# library(rgbif)
# occ_download_get(key = "0068808-230530130749713", path = tempdir())
# # read file
# library(readr)
# distrib <- read_delim(file.path(tempdir(), "0068808-230530130749713.zip"))
# # keep the necessary columns and rename them
# lacerta <- distrib %>%
#   select(gbifID, decimalLatitude, decimalLongitude) %>%
#   rename(ID = gbifID, latitude = decimalLatitude, longitude = decimalLongitude)
# # clean up the data
# library(CoordinateCleaner)
# lacerta <- clean_coordinates(
#   x = lacerta,
#   lon = "longitude",
#   lat = "latitude",
#   species = "ID",
#   value = "clean"
# )

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# usethis::use_data(lacerta, overwrite = TRUE)

## ----cast_to_sf---------------------------------------------------------------
library(sf)
lacerta <- st_as_sf(lacerta, coords = c("longitude", "latitude"))
st_crs(lacerta) <- "+proj=longlat"

## ----land_mask, eval= download_data-------------------------------------------
# library(pastclim)
# download_dataset(dataset = "WorldClim_2.1_10m")
# land_mask <-
#   get_land_mask(time_ce = 1985, dataset = "WorldClim_2.1_10m")
# 
# # Iberia peninsula extension
# iberia_poly <-
#   terra::vect(
#     "POLYGON((-9.8 43.3,-7.8 44.1,-2.0 43.7,3.6 42.5,3.8 41.5,1.3 40.8,0.3 39.5,
#      0.9 38.6,-0.4 37.5,-1.6 36.7,-2.3 36.3,-4.1 36.4,-4.5 36.4,-5.0 36.1,
#     -5.6 36.0,-6.3 36.0,-7.1 36.9,-9.5 36.6,-9.4 38.0,-10.6 38.9,-9.5 40.8,
#     -9.8 43.3))"
#   )
# 
# crs(iberia_poly) <- "+proj=longlat"
# # crop the extent
# land_mask <- crop(land_mask, iberia_poly)
# # and mask to the polygon
# land_mask <- mask(land_mask, iberia_poly)

## ----land_mask_save, echo=FALSE, results = 'hide', eval=create_sample_data----
# terra::saveRDS(land_mask, "../inst/extdata/lacerta_land_mask.rds")

## ----land_mask_load, echo=FALSE, eval=!download_data--------------------------
library(pastclim)
set_data_path(on_CRAN = TRUE)
# Iberia peninsula extension
iberia_poly <-
  terra::vect(
    "POLYGON((-9.8 43.3,-7.8 44.1,-2.0 43.7,3.6 42.5,3.8 41.5,1.3 40.8,0.3 39.5,
     0.9 38.6,-0.4 37.5,-1.6 36.7,-2.3 36.3,-4.1 36.4,-4.5 36.4,-5.0 36.1,
    -5.6 36.0,-6.3 36.0,-7.1 36.9,-9.5 36.6,-9.4 38.0,-10.6 38.9,-9.5 40.8,
    -9.8 43.3))"
  )
crs(iberia_poly) <- "+proj=longlat"
land_mask <- terra::readRDS(system.file("extdata/lacerta_land_mask.rds",
  package = "tidysdm"
))

## ----fig.width=6, fig.height=4------------------------------------------------
library(tidyterra)
library(ggplot2)
ggplot() +
  geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) +
  geom_sf(data = lacerta) +
  guides(fill = "none")

## -----------------------------------------------------------------------------
iberia_proj4 <-
  "+proj=aea +lon_0=-4.0 +lat_1=36.8 +lat_2=42.6 +lat_0=39.7 +datum=WGS84 +units=m +no_defs"

## -----------------------------------------------------------------------------
land_mask <- terra::project(land_mask, y = iberia_proj4)

## -----------------------------------------------------------------------------
lacerta <- st_transform(lacerta, iberia_proj4)

## ----project_iberia, fig.width=6, fig.height=4--------------------------------
ggplot() +
  geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) +
  geom_sf(data = lacerta) +
  guides(fill = "none")

## ----thin_by_cell-------------------------------------------------------------
set.seed(1234567)
lacerta <- thin_by_cell(lacerta, raster = land_mask)
nrow(lacerta)

## -----------------------------------------------------------------------------
pres_data <- terra::extract(land_mask, lacerta)
summary(pres_data)

## ----plot_thin_by_cell, fig.width=6, fig.height=4-----------------------------
ggplot() +
  geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) +
  geom_sf(data = lacerta) +
  guides(fill = "none")

## ----thin_by_dist-------------------------------------------------------------
set.seed(1234567)
lacerta_thin <- thin_by_dist(lacerta, dist_min = km2m(20))
nrow(lacerta_thin)

## ----plot_thin_by_dist, fig.width=6, fig.height=4-----------------------------
ggplot() +
  geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) +
  geom_sf(data = lacerta_thin) +
  guides(fill = "none")

## ----download_background, eval=FALSE------------------------------------------
# library(rgbif)
# occ_download_get(key = "0121761-240321170329656", path = tempdir())
# library(readr)
# backg_distrib <- readr::read_delim(file.path(
#   tempdir(),
#   "0121761-240321170329656.zip"
# ))
# # keep the necessary columns
# lacertidae_background <- backg_distrib %>%
#   select(gbifID, decimalLatitude, decimalLongitude) %>%
#   rename(ID = gbifID, latitude = decimalLatitude, longitude = decimalLongitude)

## ----projection_background, eval=FALSE----------------------------------------
# # convert to an sf object
# lacertidae_background <- st_as_sf(lacertidae_background,
#   coords = c("longitude", "latitude")
# )
# 
# st_crs(lacertidae_background) <- "+proj=longlat"
# lacertidae_background <- st_transform(lacertidae_background, crs = iberia_proj4)

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# usethis::use_data(lacertidae_background, overwrite = TRUE)

## ----echo=FALSE---------------------------------------------------------------
data("lacertidae_background")
lacertidae_background <- st_as_sf(lacertidae_background,
  coords = c("longitude", "latitude")
)
st_crs(lacertidae_background) <- "+proj=longlat"
lacertidae_background <- st_transform(lacertidae_background, crs = iberia_proj4)

## ----background_to_raster, fig.width=6, fig.height=4--------------------------
lacertidae_background_raster <- rasterize(lacertidae_background,
  land_mask,
  fun = "count"
)
lacertidae_background_raster <- mask(
  lacertidae_background_raster,
  land_mask
)
ggplot() +
  geom_spatraster(data = lacertidae_background_raster, aes(fill = count)) +
  scale_fill_viridis_b(na.value = "transparent")
guides(fill = "none")

## ----sample_background--------------------------------------------------------
set.seed(1234567)
lacerta_thin <- sample_background(
  data = lacerta_thin, raster = lacertidae_background_raster,
  n = 3 * nrow(lacerta_thin),
  method = "bias",
  class_label = "background",
  return_pres = TRUE
)

## ----plot_sample_pseudoabs, fig.width=6, fig.height=4-------------------------
ggplot() +
  geom_spatraster(data = land_mask, aes(fill = land_mask_1985)) +
  geom_sf(data = lacerta_thin, aes(col = class)) +
  guides(fill = "none")

## ----load_climate, eval=download_data-----------------------------------------
# download_dataset("WorldClim_2.1_10m")
# climate_vars <- get_vars_for_dataset("WorldClim_2.1_10m")
# climate_present <- pastclim::region_slice(
#   time_ce = 1985,
#   bio_variables = climate_vars,
#   data = "WorldClim_2.1_10m",
#   crop = iberia_poly
# )

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# terra::saveRDS(
#   climate_present,
#   "../inst/extdata/lacerta_climate_present_10m.rds"
# )

## ----echo=FALSE, results='hide', eval=!download_data--------------------------
climate_present <- terra::readRDS(
  system.file("extdata/lacerta_climate_present_10m.rds",
    package = "tidysdm"
  )
)
climate_vars <- names(climate_present)

## -----------------------------------------------------------------------------
climate_present <- terra::project(climate_present, y = iberia_proj4)

## -----------------------------------------------------------------------------
lacerta_thin <- lacerta_thin %>%
  bind_cols(terra::extract(climate_present, lacerta_thin, ID = FALSE))

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# terra::saveRDS(lacerta_thin, "../inst/extdata/lacerta_thin_all_vars.rds")

## -----------------------------------------------------------------------------
summary(lacerta_thin)

## ----fig.height=11, fig.width=7-----------------------------------------------
lacerta_thin %>% plot_pres_vs_bg(class)

## -----------------------------------------------------------------------------
lacerta_thin %>% dist_pres_vs_bg(class)

## ----climate_variables--------------------------------------------------------
suggested_vars <- c("bio06", "bio05", "bio13", "bio14", "bio15")

## ----fig.width=7, fig.height=8------------------------------------------------
pairs(climate_present[[suggested_vars]])

## ----choose_var_cor_keep------------------------------------------------------
climate_present <- climate_present[[suggested_vars]]

vars_uncor <- filter_collinear(climate_present,
  cutoff = 0.7,
  method = "cor_caret"
)
vars_uncor

## -----------------------------------------------------------------------------
lacerta_thin <- lacerta_thin %>% select(all_of(c(vars_uncor, "class")))
climate_present <- climate_present[[vars_uncor]]
names(climate_present) # variables retained in the end

## ----recipe-------------------------------------------------------------------
lacerta_rec <- recipe(lacerta_thin, formula = class ~ .)
lacerta_rec

## -----------------------------------------------------------------------------
lacerta_thin %>% check_sdm_presence(class)

## ----workflow_set-------------------------------------------------------------
lacerta_models <-
  # create the workflow_set
  workflow_set(
    preproc = list(default = lacerta_rec),
    models = list(
      # the standard glm specs
      glm = sdm_spec_glm(),
      # rf specs with tuning
      rf = sdm_spec_rf(),
      # boosted tree model (gbm) specs with tuning
      gbm = sdm_spec_boost_tree(),
      # maxent specs with tuning
      maxent = sdm_spec_maxent()
    ),
    # make all combinations of preproc and models,
    cross = TRUE
  ) %>%
  # tweak controls to store information needed later to create the ensemble
  option_add(control = control_ensemble_grid())

## ----training_cv, fig.width=6, fig.height=4-----------------------------------
library(tidysdm)
set.seed(105)
lacerta_cv <- spatial_block_cv(lacerta_thin, v = 5)
autoplot(lacerta_cv)

## ----check_balance------------------------------------------------------------
check_splits_balance(lacerta_cv, class)

## ----tune_grid----------------------------------------------------------------
set.seed(1234567)
lacerta_models <-
  lacerta_models %>%
  workflow_map("tune_grid",
    resamples = lacerta_cv, grid = 3,
    metrics = sdm_metric_set(), verbose = TRUE
  )

## ----autoplot_models, fig.width=7, fig.height=4-------------------------------
autoplot(lacerta_models)

## -----------------------------------------------------------------------------
lacerta_ensemble <- simple_ensemble() %>%
  add_member(lacerta_models, metric = "boyce_cont")
lacerta_ensemble

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# usethis::use_data(lacerta_ensemble, overwrite = TRUE)

## ----autoplot_ens, fig.width=7, fig.height=4----------------------------------
autoplot(lacerta_ensemble)

## -----------------------------------------------------------------------------
lacerta_ensemble %>% collect_metrics()

## ----plot_present, fig.width=6, fig.height=4----------------------------------
prediction_present <- predict_raster(lacerta_ensemble, climate_present)
ggplot() +
  geom_spatraster(data = prediction_present, aes(fill = mean)) +
  scale_fill_terrain_c() +
  # plot presences used in the model
  geom_sf(data = lacerta_thin %>% filter(class == "presence"))

## ----plot_present_best, fig.width=6, fig.height=4-----------------------------
prediction_present_boyce <- predict_raster(lacerta_ensemble, climate_present,
  metric_thresh = c("boyce_cont", 0.5),
  fun = "median"
)
ggplot() +
  geom_spatraster(data = prediction_present_boyce, aes(fill = median)) +
  scale_fill_terrain_c() +
  geom_sf(data = lacerta_thin %>% filter(class == "presence"))

## -----------------------------------------------------------------------------
lacerta_ensemble <- calib_class_thresh(lacerta_ensemble,
  class_thresh = "tss_max",
  metric_thresh = c("boyce_cont", 0.5)
)

## ----fig.width=6, fig.height=4------------------------------------------------
prediction_present_binary <- predict_raster(lacerta_ensemble,
  climate_present,
  type = "class",
  class_thresh = c("tss_max"),
  metric_thresh = c("boyce_cont", 0.5)
)
ggplot() +
  geom_spatraster(data = prediction_present_binary, aes(fill = binary_mean)) +
  geom_sf(data = lacerta_thin %>% filter(class == "presence")) +
  scale_fill_discrete(na.value = "transparent")

## ----eval=download_data-------------------------------------------------------
# download_dataset("WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m")

## ----eval=FALSE---------------------------------------------------------------
# get_time_ce_steps("WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m")

## ----echo=FALSE---------------------------------------------------------------
c(2030, 2050, 2070, 2090)

## ----eval=FALSE---------------------------------------------------------------
# get_vars_for_dataset("WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m")

## ----echo=FALSE---------------------------------------------------------------
climate_vars[-length(climate_vars)]

## ----eval=download_data-------------------------------------------------------
# climate_future <- pastclim::region_slice(
#   time_ce = 2090,
#   bio_variables = vars_uncor,
#   data = "WorldClim_2.1_HadGEM3-GC31-LL_ssp245_10m",
#   crop = iberia_poly
# )

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# terra::saveRDS(
#   climate_future,
#   "../inst/extdata/lacerta_climate_future_10m.rds"
# )

## ----echo=FALSE, results='asis', eval=!download_data--------------------------
climate_future <- terra::readRDS(
  system.file("extdata/lacerta_climate_future_10m.rds",
    package = "tidysdm"
  )
)

## -----------------------------------------------------------------------------
climate_future <- terra::project(climate_future, y = iberia_proj4)

## ----plot_future, fig.width=6, fig.height=4-----------------------------------
prediction_future <- predict_raster(lacerta_ensemble, climate_future)

ggplot() +
  geom_spatraster(data = prediction_future, aes(fill = mean)) +
  scale_fill_terrain_c()

## ----fig.width=6, fig.height=4------------------------------------------------
climate_future_clamped <- clamp_predictors(climate_future,
  training = lacerta_thin,
  .col = class
)
prediction_future_clamped <- predict_raster(lacerta_ensemble,
  raster = climate_future_clamped
)

ggplot() +
  geom_spatraster(data = prediction_future_clamped, aes(fill = mean)) +
  scale_fill_terrain_c()

## ----fig.width=6, fig.height=4------------------------------------------------
lacerta_mess_future <- extrapol_mess(
  x = climate_future,
  training = lacerta_thin,
  .col = "class"
)

ggplot() +
  geom_spatraster(data = lacerta_mess_future) +
  scale_fill_viridis_b(na.value = "transparent")

## ----fig.width=6, fig.height=4------------------------------------------------
# subset mess
lacerta_mess_future_subset <- lacerta_mess_future
lacerta_mess_future_subset[lacerta_mess_future_subset >= 0] <- NA
lacerta_mess_future_subset[lacerta_mess_future_subset < 0] <- 1

# convert into polygon
lacerta_mess_future_subset <- as.polygons(lacerta_mess_future_subset)

library(ggpattern)

# plot as a mask
ggplot() +
  geom_spatraster(data = prediction_future) +
  scale_fill_terrain_c() +
  geom_sf_pattern(
    data = lacerta_mess_future_subset,
    pattern = "stripe",
    fill = "transparent",
    pattern_fill = "black",
    pattern_density = 0.02,
    pattern_spacing = 0.05,
    pattern_angle = 45,
    alpha = 0.1,
    linewidth = 0.5
  )

## ----fig.width=6, fig.height=4------------------------------------------------
bio05_prof <- lacerta_rec %>%
  step_profile(-bio05, profile = vars(bio05)) %>%
  prep(training = lacerta_thin)

bio05_data <- bake(bio05_prof, new_data = NULL)

bio05_data <- bio05_data %>%
  mutate(
    pred = predict(lacerta_ensemble, bio05_data)$mean
  )

ggplot(bio05_data, aes(x = bio05, y = pred)) +
  geom_point(alpha = .5, cex = 1)

## -----------------------------------------------------------------------------
# empty object to store the simple ensembles that we will create
ensemble_list <- list()
set.seed(1234) # make sure you set the seed OUTSIDE the loop
for (i_repeat in 1:3) {
  # thin the data
  lacerta_thin_rep <- thin_by_cell(lacerta, raster = climate_present)
  lacerta_thin_rep <- thin_by_dist(lacerta_thin_rep, dist_min = 20000)
  # sample pseudo-absences
  lacerta_thin_rep <- sample_pseudoabs(lacerta_thin_rep,
    n = 3 * nrow(lacerta_thin_rep),
    raster = climate_present,
    method = c("dist_min", 50000)
  )
  # get climate
  lacerta_thin_rep <- lacerta_thin_rep %>%
    bind_cols(terra::extract(climate_present, lacerta_thin_rep, ID = FALSE))
  # create folds
  lacerta_thin_rep_cv <- spatial_block_cv(lacerta_thin_rep, v = 5)
  # create a recipe
  lacerta_thin_rep_rec <- recipe(lacerta_thin_rep, formula = class ~ .)
  # create a workflow_set
  lacerta_thin_rep_models <-
    # create the workflow_set
    workflow_set(
      preproc = list(default = lacerta_thin_rep_rec),
      models = list(
        # the standard glm specs
        glm = sdm_spec_glm(),
        # maxent specs with tuning
        maxent = sdm_spec_maxent()
      ),
      # make all combinations of preproc and models,
      cross = TRUE
    ) %>%
    # tweak controls to store information needed later to create the ensemble
    option_add(control = control_ensemble_grid())

  # train the model
  lacerta_thin_rep_models <-
    lacerta_thin_rep_models %>%
    workflow_map("tune_grid",
      resamples = lacerta_thin_rep_cv, grid = 3,
      metrics = sdm_metric_set(), verbose = TRUE
    )
  # make an simple ensemble and add it to the list
  ensemble_list[[i_repeat]] <- simple_ensemble() %>%
    add_member(lacerta_thin_rep_models, metric = "boyce_cont")
}

## -----------------------------------------------------------------------------
lacerta_rep_ens <- repeat_ensemble() %>% add_repeat(ensemble_list)
lacerta_rep_ens

## ----echo=FALSE, results='hide', eval=create_sample_data----------------------
# usethis::use_data(lacerta_rep_ens, overwrite = TRUE)

## ----fig.width=6, fig.height=4------------------------------------------------
lacerta_rep_ens <- predict_raster(lacerta_rep_ens, climate_present,
  fun = c("mean", "median")
)
ggplot() +
  geom_spatraster(data = lacerta_rep_ens, aes(fill = median)) +
  scale_fill_terrain_c()