#' Compile the spatial demographic model
#'
#' First, compiles the vectorized population spatial maps into a series of
#' binary raster PNG files, which is the format that SLiM understands and uses
#' it to define population boundaries. Then extracts the demographic model
#' defined by the user (i.e. population divergences and gene flow events) into a
#' series of tables which are later used by the built-in SLiM script to program
#' the timing of simulation events.
#'
#' @param populations Object(s) of the \code{slendr_pop} class (multiple objects
#'   need to be specified in a \code{list})
#' @param generation_time Generation time (in model time units)
#' @param resolution How many distance units per pixel?
#' @param gene_flow Gene flow events generated by the \code{gene_flow} function
#'   (either a list of data.frame objects in the format defined by the
#'   \code{gene_flow} function, or a single data.frame)
#' @param competition,mating Maximum spatial competition and mating choice
#'   distance
#' @param dispersal Standard deviation of the normal distribution of the
#'   parent-offspring distance
#' @param path Output directory for the model configuration files which will be
#'   loaded by the backend SLiM script. If \code{NULL}, model configuration
#'   files will be saved to a temporary directory.
#' @param overwrite Completely delete the specified directory, in case it
#'   already exists, and create a new one?
#' @param simulation_length Total length of the simulation (required for forward time
#'   models, optional for models specified in backward time units which by
#'   default run to "the present time")
#' @param direction Intended direction of time. Under normal circumstances this
#'   parameter is inferred from the model and does not need to be set manually.
#' @param slim_script Path to a SLiM script to be used for executing the model
#'   (by default, a bundled backend script will be used). If \code{NULL}, the
#'   SLiM script bundled with slendr will be used.
#' @param force Force a deletion of the model directory if it is already
#'   present? Useful for non-interactive uses. In an interactive mode, the user
#'   is asked to confirm the deletion manually.
#' @param description Optional short description of the model
#' @param sim_length Deprecated. Use \code{simulation_length} instead.
#'
#' @return Compiled \code{slendr_model} model object which encapsulates all
#'   information about the specified model (which populations are involved,
#'   when and how much gene flow should occur, what is the spatial resolution
#'   of a map, and what spatial dispersal and mating parameters should be used
#'   in a SLiM simulation, if applicable)
#'
#' @export
#'
#' @example man/examples/model_definition.R
compile_model <- function(populations, generation_time, path = NULL, resolution = NULL,
                          competition = NULL, mating = NULL, dispersal = NULL,
                          gene_flow = list(), overwrite = FALSE, force = FALSE,
                          simulation_length = NULL, direction = NULL,
                          slim_script = NULL, description = "", sim_length = NULL) {
  if (is.null(simulation_length) && !is.null(sim_length)) {
    warning("Argument `sim_length` will soon be deprecated in favor of `simulation_length`.", call. = FALSE)
    simulation_length <- sim_length
  }

  if (inherits(populations, "slendr_pop"))  populations <- list(populations)

  if (is.null(path)) path <- tempfile()

  if (is.null(slim_script))
    slim_script <- system.file("scripts", "script.slim", package = "slendr")

  map <- get_map(populations[[1]])
  if (!is.null(map) && is.null(resolution))
    stop("A map resolution must be specified for spatial models", call. = FALSE)

  # make sure that all parents are present
  pop_names <- purrr::map_chr(populations, ~ .x$pop[1])
  parent_names <- unique(purrr::map_chr(populations, function(pop) {
    parent <- attr(pop, "parent")
    if (is.character(parent))
      return(pop$pop[1])
    else
      parent$pop[1]
  }))
  if (!all(parent_names %in% pop_names))
    stop("The following parent populations are missing: ", parent_names[!parent_names %in% pop_names], call. = FALSE)

  if (length(populations) != length(unique(sapply(populations, `[[`, "pop"))))
    stop("All populations must have unique names", call. = FALSE)

  # prepare the model output directory
  if (dir.exists(path)) {
    if (!overwrite)
      stop("Directory '", path, "' already exists. Either delete it\nmanually ",
           "or set 'overwrite = TRUE' to delete it from R.", call. = FALSE)
    else {
      if (interactive() && !force) {
        answer <- utils::menu(c("Yes", "No"),
          title = paste0("Are you ABSOLUTELY SURE you want to delete '", path,
                         "'?\nThere is no going back.")
        )
        force <- answer == 1
      }
      if (force)
        unlink(path, recursive = TRUE)
      else
        stop("Compilation aborted because the specified model path directory\ncould ",
             "not have been created. If you're running this in a non-interactive\n",
             "mode in a script and want to overwrite an already existing model\n",
             "directory, you must set `force = TRUE`.", call. = FALSE)
    }

  }
  dir.create(path)

  if (is.data.frame(gene_flow)) gene_flow <- list(gene_flow)

  if (length(unique(sapply(populations, has_map))) != 1)
    stop("Populations must be either all spatial or non-spatial, but not both", call. = FALSE)

  # make sure all populations share the same direction of time
  time_dir <- setdiff(unique(sapply(populations, time_direction)), "unknown")

  if (!is.null(direction) & any(direction != time_dir))
    stop("The direction that was explicitly specified contradicts the direction implied by the model", call. = FALSE)

  if (length(time_dir) > 1)
    stop("Inconsistent direction of time among the specified populations", call. = FALSE)

  if (length(time_dir) == 0 || all(time_dir == "forward")) {
    if (!is.null(direction) && all(direction == "backward"))
      time_dir <- "backward"
    else if (is.null(simulation_length))
      stop("The specified model implies a forward direction of time. However,
forward models require that the 'simulation_length' parameter is explicitly
specified in order to know when to terminate the simulation. If you
intended to run a backward time model instead, you can state this by
setting `direction = 'backward'.`", call. = FALSE)
    else
      time_dir <- "forward"
  }

  # there's no need to specify simulation run length for backward models
  # (those stop at time 0 by default) so we find the oldest time present
  # in the model and take it as the total amount of time for the simulation
  if (time_dir == "backward" || is.null(simulation_length)) {
    end_time <- get_oldest_time(populations, time_dir)
  } else
    end_time <- simulation_length

  split_table <- compile_splits(populations, generation_time, time_dir, end_time)
  admix_table <- compile_geneflows(gene_flow, split_table, generation_time, time_dir, end_time)
  resize_table <- compile_resizes(populations, generation_time, time_dir, end_time, split_table)

  if (inherits(map, "slendr_map")) {
    if (!is.null(competition)) check_resolution(map, competition)
    if (!is.null(mating)) check_resolution(map, mating)
    if (!is.null(dispersal)) check_resolution(map, dispersal)
    check_resolution(map, resolution)

    map_table <- compile_maps(populations, split_table, resolution, generation_time, time_dir, end_time, path)
    dispersal_table <- compile_dispersals(populations, generation_time, time_dir, end_time, split_table,
                                        resolution, competition, mating, dispersal)

    return_maps <-  map_table[, c("pop", "pop_id", "time_orig", "time_gen", "path")]
  } else {
    map_table <- return_maps <- dispersal_table <- NULL
  }

  simulation_length <- if (is.null(simulation_length)) end_time else simulation_length

  checksums <- write_model(
    path, populations, admix_table, map_table, split_table, resize_table,
    dispersal_table, generation_time, resolution, simulation_length, time_dir, slim_script,
    description, map
  )

  names(populations) <- pop_names

  # compile the result
  result <- list(
    path = path,
    world = map,
    populations = populations,
    splits = split_table,
    geneflow = admix_table,
    maps = return_maps,
    dispersals = dispersal_table,
    generation_time = generation_time,
    resolution = resolution,
    length = round(simulation_length / generation_time),
    orig_length = simulation_length,
    direction = time_dir,
    checksums = checksums
  )
  class(result) <- set_class(result, "model")

  result
}

#' Read a previously serialized model configuration
#'
#' Reads all configuration tables and other model data from a location
#' where it was previously compiled to by the \code{compile} function.
#'
#' @param path Directory with all required configuration files
#'
#' @return Compiled \code{slendr_model} model object which encapsulates all
#'   information about the specified model (which populations are involved,
#'   when and how much gene flow should occur, what is the spatial resolution
#'   of a map, and what spatial dispersal and mating parameters should be used
#'   in a SLiM simulation, if applicable)
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE) # make sure dependencies are present
#' }
#' # load an example model with an already simulated tree sequence
#' path <- system.file("extdata/models/introgression", package = "slendr")
#' model <- read_model(path)
#'
#' plot_model(model, sizes = FALSE, log = TRUE)
#' @export
read_model <- function(path) {
  # paths to files which are saved by the compile() function and are necessary
  # for running the backend script using the run() function
  path_populations <- file.path(path, "ranges.rds")
  path_splits <- file.path(path, "populations.tsv")
  path_geneflow <- file.path(path, "geneflow.tsv")
  path_maps <- file.path(path, "maps.tsv")
  path_generation_time <- file.path(path, "generation_time.txt")
  path_resolution <- file.path(path, "resolution.txt")
  path_length <- file.path(path, "length.txt")
  path_orig_length <- file.path(path, "orig_length.txt")
  path_direction <- file.path(path, "direction.txt")

  if (!dir.exists(path))
    stop(sprintf("Model directory '%s' does not exist", path), call. = FALSE)

  # verify checksums of serialized model configuration files
  checksums <- utils::read.table(file.path(path, "checksums.tsv"), header = TRUE)
  verify_checksums(file.path(path, checksums$file), checksums$hash)

  generation_time <- scan(path_generation_time, what = integer(), quiet = TRUE)
  length <- as.integer(scan(path_length, what = numeric(), quiet = TRUE))
  orig_length <- as.integer(scan(path_orig_length, what = numeric(), quiet = TRUE))

  split_table <- utils::read.table(path_splits, header = TRUE, stringsAsFactors = FALSE)

  admix_table <- NULL
  if (file.exists(path_geneflow)) {
    admix_table <- utils::read.table(path_geneflow, header = TRUE, stringsAsFactors = FALSE)
    admix_table$overlap <- admix_table$overlap == 1
  }

  populations <- readRDS(path_populations)
  names(populations) <- purrr::map_chr(populations, ~ .x$pop[1])

  if (file.exists(path_maps)) {
    maps <- utils::read.table(path_maps, header = TRUE, stringsAsFactors = FALSE)
    resolution <- scan(path_resolution, what = numeric(), quiet = TRUE)
    world <- attr(populations[[1]], "map")
  } else
    maps <- world <- resolution <- NULL

  direction <- scan(path_direction, what = character(), quiet = TRUE)

  result <- list(
    path = path,
    world = world,
    populations = populations,
    splits = split_table,
    geneflow = admix_table,
    maps = maps,
    generation_time = generation_time,
    resolution = resolution,
    length = length,
    orig_length = orig_length,
    direction = direction,
    checksums = checksums
  )
  class(result) <- set_class(result, "model")
  result
}


#' Run a slendr model in SLiM
#'
#' This function will execute a SLiM script generated by the \code{compile}
#' function during the compilation of a slendr demographic model.
#'
#' @param model Model object created by the \code{compile} function
#' @param sequence_length Total length of the simulated sequence (in base-pairs)
#' @param recombination_rate Recombination rate of the simulated sequence (in
#'   recombinations per basepair per generation)
#' @param samples A data frame of times at which a given number of individuals
#'   should be remembered in the tree-sequence (see \code{schedule_sampling} for a
#'   function that can generate the sampling schedule in the correct format). If
#'   missing, only individuals present at the end of the simulation will be
#'   recorded in the tree-sequence output file.
#' @param output Path to the output tree-sequence file. If \code{NULL} (the default),
#'   tree sequence will be saved to a temporary file.
#' @param burnin Length of the burnin (in model's time units, i.e. years)
#' @param max_attempts How many attempts should be made to place an offspring
#'   near one of its parents? Serves to prevent infinite loops on the SLiM
#'   backend. Default value is 1.
#' @param spatial Should the model be executed in spatial mode? By default, if a
#'   world map was specified during model definition, simulation will proceed in
#'   a spatial mode.
#' @param coalescent_only Should \code{initializeTreeSeq(retainCoalescentOnly =
#'   <...>)} be set to \code{TRUE} (the default) or \code{FALSE}? See
#'   "retainCoalescentOnly" in the SLiM manual for more detail.
#' @param method How to run the script? ("gui" - open in SLiMgui, "batch" - run
#'   on the command-line)
#' @param random_seed Random seed (if missing, SLiM's own seed will be used)
#' @param verbose Write the SLiM output log to the console (default
#'   \code{FALSE})?
#' @param load Should the final tree sequence be immediately loaded and returned?
#'   Default is \code{TRUE}. The alternative (\code{FALSE}) is useful when a tree-sequence
#'   file is written to a custom location to be loaded at a later point.
#' @param locations If \code{NULL}, locations are not saved. Otherwise, the
#'   path to the file where locations of each individual throughout the simulation
#'   will be saved (most likely for use with \code{animate_model}).
#' @param slim_path Optional way to specify path to an appropriate SLiM binary (this is useful
#'   if the \code{slim} binary is not on the \code{$PATH}).
#' @param sampling Deprecated in favor of \code{samples}.
#'
#' @return A tree-sequence object loaded via Python-R reticulate interface function \code{ts_load}
#'   (internally represented by the Python object \code{tskit.trees.TreeSequence})
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE, slim = TRUE) # make sure dependencies are present
#' }
#' # load an example model
#' model <- read_model(path = system.file("extdata/models/introgression", package = "slendr"))
#'
#' # afr and eur objects would normally be created before slendr model compilation,
#' # but here we take them out of the model object already compiled for this
#' # example (in a standard slendr simulation pipeline, this wouldn't be necessary)
#' afr <- model$populations[["AFR"]]
#' eur <- model$populations[["EUR"]]
#' chimp <- model$populations[["CH"]]
#'
#' # schedule the sampling of a couple of ancient and present-day individuals
#' # given model at 20 ky, 10 ky, 5ky ago and at present-day (time 0)
#' modern_samples <- schedule_sampling(model, times = 0, list(afr, 5), list(eur, 5), list(chimp, 1))
#' ancient_samples <- schedule_sampling(model, times = c(30000, 20000, 10000), list(eur, 1))
#'
#' # sampling schedules are just data frames and can be merged easily
#' samples <- rbind(modern_samples, ancient_samples)
#'
#' # run a simulation using the SLiM back end from a compiled slendr model object and return
#' # a tree-sequence output
#' ts <- slim(model, sequence_length = 1e5, recombination_rate = 0, samples = samples)
#'
#' # automatic loading of a simulated output can be prevented by `load = FALSE`, which can be
#' # useful when a custom path to a tree-sequence output is given for later downstream analyses
#' output_file <- tempfile(fileext = ".trees")
#' slim(model, sequence_length = 1e5, recombination_rate = 0, samples = samples,
#'      output = output_file, load = FALSE)
#' # ... at a later stage:
#' ts <- ts_load(output_file, model)
#'
#' ts
#' @export
slim <- function(
  model, sequence_length, recombination_rate, samples = NULL, output = NULL,
  burnin = 0, max_attempts = 1, spatial = !is.null(model$world), coalescent_only = TRUE,
  method = c("batch", "gui"), random_seed = NULL, verbose = FALSE, load = TRUE,
  locations = NULL, slim_path = NULL, sampling = NULL
) {
  method <- match.arg(method)

  if (is.null(output) & !load)
    warning("No custom tree-sequence output path is given but loading a tree sequence from\n",
            "a temporary file after the simulation has been prevented", call. = FALSE)

  if (is.null(output)) output <- tempfile(fileext = ".trees")

  if (method == "gui" & !interactive())
    stop("SLiMgui can only be run from an interactive R session", call. = FALSE)

  model_dir <- model$path
  if (!dir.exists(model_dir))
    stop(sprintf("Model directory '%s' does not exist", model_dir), call. = FALSE)

  if (sequence_length %% 1 != 0 | sequence_length <= 0)
    stop("Sequence length must be a non-negative integer number", call. = FALSE)

  if (!is.numeric(recombination_rate) | recombination_rate < 0)
    stop("Recombination rate must be a numeric value", call. = FALSE)

  if (!is.null(sampling) && is.null(samples)) {
    warning("Argument `sampling` will soon be deprecated in favor of `samples`.", call. = FALSE)
    samples <- sampling
  }

  # verify checksums of serialized model configuration files
  checksums <- readr::read_tsv(file.path(model_dir, "checksums.tsv"), progress = FALSE,
                               col_types = "cc")
  verify_checksums(file.path(model_dir, checksums$file), checksums$hash)

  if (is.character(slim_path) && !all(file.exists(slim_path)))
    stop("SLiM binary not found at ", slim_path, call. = FALSE)

  script_path <- path.expand(file.path(model_dir, "script.slim"))

  spatial <- if (spatial) "T" else "F"
  locations <- if (is.character(locations)) locations else ""
  coalescent_only <- if (coalescent_only) "T" else "F"
  burnin <- round(burnin / model$generation_time)

  sampling_path <- tempfile()
  sampling_df <- process_sampling(samples, model, verbose)
  readr::write_tsv(sampling_df, sampling_path)

  binary <- if (!is.null(slim_path)) slim_path else get_binary(method)
  if (binary != "open -a SLiMgui" && Sys.which(binary) == "")
    stop(sprintf("%s binary not found. Please modify your $PATH accordingly or
  specify the path manually by setting the 'binary_path' argument.", binary),
  call. = FALSE)

  seed <- if (is.null(random_seed)) "" else paste0(" \\\n    -d SEED=", random_seed)
  samples <- if (is.null(sampling_path)) ""
             else paste0(" \\\n    -d 'SAMPLES=\"", sampling_path, "\"'")

  if (method == "gui") {
    # to be able to execute the script in the SLiMgui, we have to hardcode
    # the path to the model configuration directory
    modif_path <- tempfile()
    readLines(script_path) %>%
      gsub("\"MODEL\", \".\"", paste0("\"MODEL\", \"", normalizePath(model$path), "\""), .) %>%
      gsub("\"SAMPLES\", \"\"", paste0("\"SAMPLES\", \"", normalizePath(sampling_path), "\""), .) %>%
      gsub("required_arg\\(\"OUTPUT_TS\"\\)", sprintf("defineConstant(\"OUTPUT_TS\", \"%s\")", output), .) %>%
      cat(file = modif_path, sep = "\n")
    system(sprintf("%s %s", binary, modif_path))
  } else {
    slim_command <- sprintf("%s %s %s \\
    -d 'MODEL=\"%s\"' \\
    -d 'OUTPUT_TS=\"%s\"' \\
    -d SPATIAL=%s \\
    -d SEQUENCE_LENGTH=%s \\
    -d RECOMB_RATE=%s \\
    -d BURNIN_LENGTH=%s \\
    -d SIMULATION_LENGTH=%s \\
    -d 'OUTPUT_LOCATIONS=\"%s\"' \\
    -d COALESCENT_ONLY=%s \\
    -d MAX_ATTEMPTS=%i \\
    %s",
      binary,
      seed,
      samples,
      path.expand(model_dir),
      output,
      spatial,
      sequence_length,
      recombination_rate,
      burnin,
      model$length,
      locations,
      coalescent_only,
      max_attempts,
      script_path
    )

    if (verbose) {
      cat("--------------------------------------------------\n")
      cat("SLiM command to be executed:\n\n")
      cat(slim_command, "\n")
      cat("--------------------------------------------------\n\n")
    }

    # execute the command, capture all log output and decide whether to print
    # any of the log information to the console
    log_output <- suppressWarnings(system(paste(slim_command, "2>&1"), intern = TRUE))
    log_warnings <- grep("WARNING", log_output, value = TRUE)
    if (verbose)
      cat(log_output, sep = "\n")
    else if (length(log_warnings)) {
      warning("There were some warnings during the simulation run:\n",
              paste(log_warnings, collapse = "\n"), call. = FALSE)
    }

    if (!grepl("simulation finished", log_output[length(log_output)])) {
      if (!verbose) cat(log_output, sep = "\n")
      stop("Unfortunately SLiM terminated before a tree sequence was saved.\n",
           "See the above for an indication of where things ",
           "could have gone wrong.",
           ifelse(!is.null(attr(log_output, "status")),
                  paste0("\n\nSLiM exit status: ",
                         attr(log_output, "status"), "\n",
                         "Message: ", attr(log_output, "errmsg")),
                  ""), call. = FALSE)
    }
  }

  # if the simulation was run in GUI mode, wait for the confirmation from the user that it
  # finished before loading the tree-sequence output file
  if (method == "gui")
    readline("Please confirm that the SLiMgui simulation is finished [press ENTER]")

  if (!file.exists(output))
    stop("Tree sequence was not found at the expected location:\n", output, call. = FALSE)

  if (load) {
    if (verbose) {
      cat("Tree sequence was saved to:\n", output, "\n")
      cat("Loading the tree-sequence file...\n")

    }

    ts <- ts_load(model, file = output)
    return(ts)
  }
}

#' Run a slendr model in msprime
#'
#' This function will execute a built-in msprime script and run a compiled
#' slendr demographic model.
#'
#' @param model Model object created by the \code{compile} function
#' @param sequence_length Total length of the simulated sequence (in base-pairs)
#' @param recombination_rate Recombination rate of the simulated sequence (in
#'   recombinations per basepair per generation)
#' @param samples A data frame of times at which a given number of individuals
#'   should be remembered in the tree-sequence (see \code{schedule_sampling} for a
#'   function that can generate the sampling schedule in the correct format). If
#'   missing, only individuals present at the end of the simulation will be
#'   recorded in the tree-sequence output file.
#' @param output Path to the output tree-sequence file. If \code{NULL} (the default),
#'   tree sequence will be saved to a temporary file.
#' @param random_seed Random seed (if missing, SLiM's own seed will be used)
#' @param load Should the final tree sequence be immediately loaded and returned?
#'   Default is \code{TRUE}. The alternative (\code{FALSE}) is useful when a tree-sequence
#'   file is written to a custom location to be loaded at a later point.
#' @param verbose Write the output log to the console (default \code{FALSE})?
#' @param debug Write msprime's debug log to the console (default \code{FALSE})?
#' @param sampling Deprecated in favor of \code{samples}.
#'
#' @return A tree-sequence object loaded via Python-R reticulate interface function \code{ts_load}
#'   (internally represented by the Python object \code{tskit.trees.TreeSequence})
#'
#' @examples
#' \dontshow{check_dependencies(python = TRUE) # make sure dependencies are present
#' }
#' # load an example model
#' model <- read_model(path = system.file("extdata/models/introgression", package = "slendr"))
#'
#' # afr and eur objects would normally be created before slendr model compilation,
#' # but here we take them out of the model object already compiled for this
#' # example (in a standard slendr simulation pipeline, this wouldn't be necessary)
#' afr <- model$populations[["AFR"]]
#' eur <- model$populations[["EUR"]]
#' chimp <- model$populations[["CH"]]
#'
#' # schedule the sampling of a couple of ancient and present-day individuals
#' # given model at 20 ky, 10 ky, 5ky ago and at present-day (time 0)
#' modern_samples <- schedule_sampling(model, times = 0, list(afr, 10), list(eur, 100), list(chimp, 1))
#' ancient_samples <- schedule_sampling(model, times = c(40000, 30000, 20000, 10000), list(eur, 1))
#'
#' # sampling schedules are just data frames and can be merged easily
#' samples <- rbind(modern_samples, ancient_samples)
#'
#' # run a simulation using the msprime back end from a compiled slendr model object
#' ts <- msprime(model, sequence_length = 1e5, recombination_rate = 0, samples = samples)
#'
#' # automatic loading of a simulated output can be prevented by `load = FALSE`, which can be
#' # useful when a custom path to a tree-sequence output is given for later downstream analyses
#' output_file <- tempfile(fileext = ".trees")
#' msprime(model, sequence_length = 1e5, recombination_rate = 0, samples = samples,
#'         output = output_file, load = FALSE, random_seed = 42)
#' # ... at a later stage:
#' ts <- ts_load(output_file, model)
#'
#' summary(ts)
#' @export
msprime <- function(model, sequence_length, recombination_rate, samples = NULL,
                    output = NULL, random_seed = NULL,
                    load = TRUE, verbose = FALSE, debug = FALSE, sampling = NULL) {
  if (is.null(output) & !load)
    warning("No custom tree-sequence output path is given but loading a tree sequence from\n",
            "a temporary file after the simulation has been prevented", call. = FALSE)

  if (is.null(output)) output <- tempfile(fileext = ".trees")

  model_dir <- model$path
  if (!dir.exists(model_dir))
    stop(sprintf("Model directory '%s' does not exist", model_dir), call. = FALSE)

  if (sequence_length %% 1 != 0 | sequence_length <= 0)
    stop("Sequence length must be a non-negative integer number", call. = FALSE)

  if (!is.numeric(recombination_rate) | recombination_rate < 0)
    stop("Recombination rate must be a numeric value", call. = FALSE)

  if (!is.null(sampling) && is.null(samples)) {
    warning("Argument `sampling` will soon be deprecated in favor of `samples`.", call. = FALSE)
    samples <- sampling
  }

  # verify checksums of serialized model configuration files
  checksums <- readr::read_tsv(file.path(model_dir, "checksums.tsv"), progress = FALSE,
                               col_types = "cc")
  verify_checksums(file.path(model_dir, checksums$file), checksums$hash)

  script_path <- path.expand(file.path(model_dir, "script.py"))

  if (!is.null(samples)) {
    sampling_path <- tempfile()
    sampling_df <- process_sampling(samples, model, verbose)
    readr::write_tsv(sampling_df, sampling_path)
    sampling <- paste("--sampling-schedule", sampling_path)
  } else
    sampling <- ""

  msprime_command <- sprintf("python3 \\
    %s \\
    %s \\
    --model %s \\
    --output %s \\
    --sequence-length %d \\
    --recombination-rate %s \\
    %s \\
    %s \\
    %s",
    script_path,
    ifelse(is.null(random_seed), "", paste("--seed", random_seed)),
    path.expand(model_dir),
    output,
    sequence_length,
    recombination_rate,
    sampling,
    ifelse(verbose, "--verbose", ""),
    ifelse(debug, "--debug", "")
  )

  if (verbose) {
    cat("--------------------------------------------------\n")
    cat("msprime command to be executed:\n\n")
    cat(msprime_command, "\n")
    cat("--------------------------------------------------\n\n")
  }

  reticulate::py_run_string(sprintf("import os; os.system('%s')", msprime_command))

  # if (system(msprime_command, ignore.stdout = !verbose) != 0)
  #   stop("msprime simulation resulted in an error -- see the output above", call. = FALSE)

  if (!file.exists(output))
    stop("Tree sequence was not found at the expected location:\n", output, call. = FALSE)

  if (load) {
    if (verbose) {
      cat("Tree sequence was saved to:\n", output, "\n")
      cat("Loading the tree-sequence file...\n")

    }

    ts <- ts_load(model, file = output)
    return(ts)
  }
}

calculate_checksums <- function(files) {
  if (!all(file.exists(files)))
    stop("Not all compiled files are present", call. = FALSE)

  data.frame(
    file = basename(files),
    hash = as.vector(tools::md5sum(files))
  )
}


# Make sure the checksums of a given set of files matches the expectation
verify_checksums <- function(files, hashes) {
  for (i in seq_along(files)) {
    if (tools::md5sum(files[i]) != hashes[i]) {
      warning("Checksum of '", basename(files[i]), "' does not match its compiled state",
              call. = FALSE)
    }
  }
}

# Write a compiled slendr model to disk and return a table of checksums
write_model <- function(path, populations, admix_table, map_table, split_table,
                        resize_table, dispersal_table,
                        generation_time, resolution, length, direction,
                        script_source, description, map) {
  saved_files <- c()

  # table of split times and initial population sizes
  saved_files["splits"] <- file.path(path, "populations.tsv")
  utils::write.table(split_table, saved_files[["splits"]],
                     sep = "\t", quote = FALSE, row.names = FALSE)

  # table of geneflow events
  if (!is.null(admix_table)) {
    saved_files["geneflow"] <- file.path(path, "geneflow.tsv")
    admix_table$overlap <- as.integer(admix_table$overlap)
    utils::write.table(admix_table, saved_files[["geneflow"]],
                       sep = "\t", quote = FALSE, row.names = FALSE)
  }

  if (!is.null(map_table)) {
    # rasterized spatial maps
    for (i in seq_len(nrow(map_table))) {
      saved_files[paste0("map", i)] <- file.path(path, sprintf("%d.png", i))
      map_row <- map_table[i, ]
      save_png(map_row$map[[1]], saved_files[paste0("map", i)])
    }

    # table of paths to raster files
    saved_files["maps"] <- file.path(path, "maps.tsv")
    utils::write.table(
      map_table[, c("pop", "pop_id", "time_orig", "time_gen", "path")],
      saved_files[["maps"]], sep = "\t", quote = FALSE, row.names = FALSE
    )

    saved_files["resolution"] <- file.path(path, "resolution.txt")
    base::write(resolution, saved_files[["resolution"]])
  }

  # table of interaction and dispersal distances
  if (!is.null(dispersal_table)) {
    saved_files["dispersal"] <- file.path(path, "dispersals.tsv")
    utils::write.table(dispersal_table, saved_files[["dispersal"]],
                       sep = "\t", quote = FALSE, row.names = FALSE)
  }

  # serialized population objects
  saved_files["populations"] <- file.path(path, "ranges.rds")
  saveRDS(populations, saved_files[["populations"]])

  # table of scheduled resize events
  if (!is.null(resize_table)) {
    saved_files["resizes"] <- file.path(path, "resizes.tsv")
    utils::write.table(resize_table, saved_files["resizes"], sep = "\t",
                       quote = FALSE, row.names = FALSE)
  }

  saved_files["generation_time"] <- file.path(path, "generation_time.txt")
  saved_files["length"] <- file.path(path, "length.txt")
  saved_files["orig_length"] <- file.path(path, "orig_length.txt")
  saved_files["direction"] <- file.path(path, "direction.txt")
  saved_files["description"] <- file.path(path, "description.txt")
  base::write(generation_time, file.path(path, "generation_time.txt"))
  base::write(round(length / generation_time), file.path(path, "length.txt"))
  base::write(length, file.path(path, "orig_length.txt"))
  base::write(direction, file.path(path, "direction.txt"))
  base::write(description, file.path(path, "description.txt"))

  saved_files["slim_script"] <- file.path(path, "script.slim")
  saved_files["msprime_script"] <- file.path(path, "script.py")
  write_script(saved_files["slim_script"], script_source, map, resolution)
  write_script(saved_files["msprime_script"],
               system.file("scripts/script.py", package = "slendr"))

  checksums <- calculate_checksums(saved_files)
  utils::write.table(checksums, file.path(path, "checksums.tsv"), sep = "\t",
                     quote = FALSE, row.names = FALSE)

  checksums
}

write_script <- function(script_target, script_source,
                         map = NULL, resolution = NULL, description = "") {
  # copy the script to the dedicated model directory, replacing the
  # placeholders for model directory and slendr version accordingly
  script_code <- readLines(script_source) %>%
    gsub("__VERSION__", paste0("slendr_", utils::packageVersion("slendr")), .)

  if (!is.null(map)) {
    crs <- ifelse(has_crs(map), sf::st_crs(map)$epsg, "NULL")
    extent <- paste(deparse(as.vector(sf::st_bbox(map))), collapse = "")
    script_code <- script_code %>%
      gsub("__CRS__", as.character(crs), .) %>%
      gsub("__EXTENT__", extent, .) %>%
      gsub("__RESOLUTION__", as.character(resolution), .)
  }
  cat(script_code, file = script_target, sep = "\n")

  script_target
}

# Iterate over population objects and convert he information about
# population split hierarchy and split times into a data frame
compile_splits <- function(populations, generation_time, direction, end_time) {
  split_table <- lapply(populations, function(p) {
    parent <- attr(p, "parent")
    if (is.character(parent) && parent == "ancestor") {
      parent_name <- parent
    } else {
      parent_name <- unique(attr(p, "parent")$pop)
    }

    tremove <- attr(p, "remove")
    tsplit <- attr(p, "history")[[1]]$time

    data.frame(
      pop = unique(p$pop),
      parent = parent_name,
      tsplit = tsplit,
      N = attr(p, "history")[[1]]$N,
      tremove = ifelse(!is.null(tremove), tremove, -1),
      stringsAsFactors = FALSE
    )
  }) %>% do.call(rbind, .)

  # convert times into a forward direction
  split_table <- convert_to_forward(
    split_table,
    direction = direction,
    columns = c("tsplit", "tremove"),
    end_time = end_time,
    generation_time = generation_time
  )

  # order populations by split time and assign a numeric identifier to each
  split_table <- split_table[
    order(split_table$tsplit_gen, decreasing = FALSE, na.last = FALSE), ]
  split_table$pop_id <- seq_len(nrow(split_table)) - 1
  split_table$parent_id <- lapply(
    split_table$parent,
    function(i) {
      if (i == "ancestor") -1
      else split_table[split_table$pop == i, ]$pop_id
    }
  ) %>% unlist()

  # if a population is ancestral (without a parent), it should appear in the
  # simulation in generation 1 regardless of which "time of appearance" was
  # specified (to include it in the burnin)
  split_table %>%
    dplyr::mutate(tsplit_gen = ifelse(parent == "ancestor", 1, tsplit_gen))
}

# Process vectorized population boundaries into a table with
# rasterized map objects
compile_maps <- function(populations, split_table, resolution, generation_time,
                         direction, end_time, dir) {
  # generate rasterized maps
  maps <- render(populations, resolution)

  # convert list of rasters into data frame, adding the spatial
  # maps themselves as a list column
  map_table <- lapply(maps, function(m) {
    as.data.frame(m[c("pop", "time")], stringsAsFactors = FALSE)
  }) %>%
    do.call(rbind, .)
  # add column with a numeric population identifier (used later by SLiM)
  map_table$pop_id <- unlist(lapply(
    map_table$pop,
    function(i) split_table[split_table$pop == i, ]$pop_id
  ))
  map_table$map <- I(lapply(maps, function(m) m$map))

  map_table <- convert_to_forward(
    map_table,
    direction = direction,
    columns = "time",
    end_time = end_time,
    generation_time = generation_time
  )

  # number maps sequentially in the order SLiM will be swapping them
  # later (each map number X corresponds to X.png)
  map_table <- map_table[order(map_table$time_gen, na.last = FALSE), ]
  # in some situations, multiple maps are scheduled for a single generation
  # for one population - this removes the duplicates, but ideally this kind
  # of problem should be caught somewhere upstream
  map_table <- map_table[!duplicated(map_table[, c("pop", "time_gen")]), ]
  map_table$path <- seq_len(nrow(map_table)) %>% paste0(., ".png")

  # maps of ancestral populations have to be set in the first generation,
  # regardless of the specified split time
  ancestral_pops <- split_table[split_table$parent == "ancestor", ]$pop
  ancestral_maps <- purrr::map(ancestral_pops, ~ which(map_table$pop == .x)) %>%
    purrr::map_int(~ .x[1])
  map_table[ancestral_maps, ]$time_gen <- 1

  map_table
}


compile_geneflows <- function(geneflow, split_table, generation_time,
                              direction, end_time) {
  if (length(geneflow) == 0)
    return(NULL)

  admix_table <- do.call(rbind, geneflow)
  admix_table <- convert_to_forward(
    admix_table,
    direction = direction,
    columns = c("tstart", "tend"),
    end_time = end_time,
    generation_time = generation_time
  )
  names(admix_table)[1:2] <- c("from", "to")

  # convert population names and their parents' names to SLiM numbers
  admix_table$from_id <- unlist(lapply(
    admix_table$from,
    function(i) split_table[split_table$pop == i, ]$pop_id
  ))
  admix_table$to_id <- unlist(lapply(
    admix_table$to,
    function(i) split_table[split_table$pop == i, ]$pop_id
  ))

  admix_table
}


# Compile table of population resize events
compile_resizes <- function(populations, generation_time, direction,
                            end_time, split_table) {
  resize_events <- lapply(populations, function(p) {
    lapply(attr(p, "history"), function(event) {
      if (unique(event$event) == "resize") event
    }) %>% do.call(rbind, .)
  }) %>% do.call(rbind, .)

  if (is.null(resize_events))
    return(NULL)
  else
    resize_events$tend[is.na(resize_events$tend)] <- -1

  resize_table <- convert_to_forward(
    resize_events,
    direction = direction,
    columns = c("tresize", "tend"),
    end_time = end_time,
    generation_time = generation_time
  )

  resize_table$pop_id <- sapply(
    resize_table$pop,
    function(i) split_table[split_table$pop == i, ]$pop_id
  ) %>% as.numeric

  resize_table[, c("pop", "pop_id", "how", "N", "prev_N",
                   "tresize_orig", "tresize_gen", "tend_orig", "tend_gen")]
}

# Compile table of population resize events
compile_dispersals <- function(populations, generation_time, direction,
                               end_time, split_table, resolution,
                               competition, mating, dispersal) {
  dispersal_events <- lapply(populations, function(p) {
    lapply(attr(p, "history"), function(event) {
      if (unique(event$event) == "split") {
        event$N <- NULL
        names(event) <- c("pop", "event", "time", "competition",
                          "mating", "dispersal", "dispersal_fun")
        event$event <- "dispersal"
        event
      } else if (unique(event$event) == "dispersal") {
        event
      }
    }) %>% do.call(rbind, .)
  }) %>% do.call(rbind, .)

  if (is.null(dispersal_events))
    return(NULL)

  dispersal_events$tdispersal <- dispersal_events$time
  dispersal_events$time <- NULL

  dispersal_table <- convert_to_forward(
    dispersal_events,
    direction = direction,
    columns = "tdispersal",
    end_time = end_time,
    generation_time = generation_time
  )

  dispersal_table$pop_id <- sapply(
    dispersal_table$pop,
    function(i) split_table[split_table$pop == i, ]$pop_id
  ) %>% as.numeric

  # take care of missing interactions and offspring distances
  dispersal_table <- set_distances(dispersal_table, resolution, competition,
                                   mating, dispersal)

  dispersal_table <- dispersal_table[order(dispersal_table$tdispersal_gen, na.last = FALSE), ]

  # dispersals of ancestral populations have to be set in the first generation,
  # regardless of the specified split time
  ancestral_pops <- split_table[split_table$parent == "ancestor", ]$pop
  indices <- purrr::map(ancestral_pops, ~ which(dispersal_table$pop == .x)) %>%
    purrr::map_int(~ .x[1])
  dispersal_table[indices, ]$tdispersal_gen <- 1

  dispersal_table[, c("pop", "pop_id", "tdispersal_gen", "tdispersal_orig",
                      "competition", "mating", "dispersal",
                      "dispersal_fun")]
}


# Render population boundaries to black-and-white spatial maps
render <- function(pops, resolution) {
  raster_list <- lapply(pops, function(pop) {
    # iterate over temporal maps for the current population
    snapshots <- lapply(unique(pop$time), function(t) {
      snapshot <- pop[pop$time == t, ]
      class(snapshot) <- set_class(snapshot, "pop")

      # render the population if needed
      if (is.null(attr(pop, "intersected")))
        snapshot <- intersect_features(snapshot)

      raster_map <- rasterize(snapshot, resolution)

      # return the rendered spatial map with the population name and the
      # appropriate time stamp (unique-ing because intersecting splits
      # the spatial object into multiple disjoint features)
      list(
        pop = unique(snapshot$pop),
        time = unique(snapshot$time),
        map = raster_map
      )
    })
    snapshots
  })

  # flatten the list of ggplot objects
  rasters <- do.call(c, raster_list)

  rasters
}


# Rasterize the vector form of a population spatial boundary
rasterize <- function(x, resolution) {
  # add a dummy variable for plotting the bi-color map
  x$fill <- factor(1)

  # create a template object for rasterization (i.e. size of the final raster)
  if (inherits(x, "slendr_map"))
    bbox <- sf::st_bbox(x)
  else
    bbox <- sf::st_bbox(attr(x, "map"))

  template <- stars::st_as_stars(bbox, dx = resolution, dy = resolution)

  # perform the rasterization using the dummy single-value factor column
  raster <- stars::st_rasterize(x["fill"], template)

  if (length(table(raster$ID)) == 1) {
    stop(sprintf("
The generated raster map of %s at time %s is blank.
This would cause SLiM to crash as it would not be able to place
any individuals on the map. Please check the spatial boundary for
this population at this time point.", x$pop, x$time), call. = FALSE)
  }

  pixel_values <- unique(as.numeric(raster$fill))
  if (length(pixel_values) == 1 && pixel_values == 0)
    stop("No occupiable pixel on a rasterized map for population '",
         x$pop[1], "' at time ", x$time[1], ". Make sure that the specified",
         " population boundary has sufficient space for the population to occupy.",
         call. = FALSE)

  raster
}


# Save the rasterized stars object to a PNG file
save_png <- function(raster, path) {
  tmp_tiff <- paste0(tempfile(), ".tiff")

  # write stars raster as a TIFF format
  stars::write_stars(raster, tmp_tiff)

  # convert the stars TIFF into a PNG (the only format SLiM supports)
  img <- ijtiff::read_tif(tmp_tiff, msg = FALSE)
  unlink(tmp_tiff)

  # subset the multidimensional array only to pixel two-dimensional matrix
  img_matrix <- img[, , 1, 1]

  # binarize the matrix (st_rasterize assigns a different color to each
  # fragmented spatial feature after intersect_features() call)
  img_matrix[img_matrix > 0] <- 1

  png::writePNG(img_matrix, path)
}


# Convert times given in specified columns of a data frame into
# a SLiM forward direction given in generations
convert_to_forward <- function(df, direction, columns, end_time, generation_time) {
  for (column in columns) {
    times <- df[[column]]

    # if necessary, convert to forward direction
    if (direction == "backward")
      times[times != -1] <- end_time - times[times != -1] + generation_time

    # convert to generations
    times[times != -1] <- as.integer(round(times[times != -1] / generation_time))

    df[[paste0(column, "_gen")]] <- times
    df[[paste0(column, "_orig")]] <- df[[column]]
    df[[column]] <- NULL
  }
  df
}
