#' Calculate Entropy Based on Keywords Over Time
#'
#' Computes the normalized Shannon entropy of keyword distributions from scientific
#' publications over a specified time range. Entropy measures the diversity and
#' evenness of keyword usage within research groups or the entire network.
#'
#' @param network A network object to analyze. For `scope = "groups"`, this should be
#'   the output of `sniff_groups()`. For `scope = "network"`, this should be a
#'   `tbl_graph` or `igraph` object from `sniff_network()`.
#' @param scope Character specifying the analysis scope: "groups" for multiple groups
#'   or "network" for the entire network (default: "groups").
#' @param start_year Starting year for entropy calculation. If NULL, uses the minimum
#'   publication year found in the network data.
#' @param end_year Ending year for entropy calculation. If NULL, uses the maximum
#'   publication year found in the network data.
#'
#' @return A list with three components:
#'   \item{data}{A tibble containing entropy values for each group and year}
#'   \item{plots}{A list of plotly objects visualizing entropy trends for each group}
#'   \item{years_range}{A vector with the start_year and end_year used in calculations}
#'
#' @details
#' The function calculates the normalized Shannon entropy (Pielou's evenness index)
#' based on Shannon's information theory (Shannon, 1948). For each year, entropy
#' is computed from the keyword distribution of publications in that year (annual mode).
#'
#' The normalized entropy is calculated as:
#' \deqn{J' = \frac{H}{H_{max}} = \frac{-\sum_{i=1}^{n} p_i \log_2 p_i}{\log_2 n}}
#' where \eqn{p_i} is the relative frequency of keyword \eqn{i}, \eqn{n} is the
#' number of unique keywords, and \eqn{H_{max} = \log_2 n} is the maximum possible
#' entropy for \eqn{n} categories.
#'
#' Entropy values range from 0 to 1, where:
#' \itemize{
#'   \item 0 indicates minimal diversity (one dominant keyword)
#'   \item 1 indicates maximal diversity (all keywords equally frequent)
#' }
#'
#' A sudden increase in entropy may signal the emergence of new research topics,
#' while a decrease suggests thematic convergence.
#'
#' @references
#' Shannon, C. E. (1948). A mathematical theory of communication. \emph{Bell System
#' Technical Journal}, 27(3), 379-423. \doi{10.1002/j.1538-7305.1948.tb01338.x}
#'
#' Pielou, E. C. (1966). The measurement of diversity in different types of
#' biological collections. \emph{Journal of Theoretical Biology}, 13, 131-144.
#'
#' @importFrom tidygraph activate
#' @importFrom tibble as_tibble
#' @importFrom dplyr mutate filter select pull group_by summarise n
#' @importFrom tidyr separate_rows
#' @importFrom purrr map map_dfr map2
#' @importFrom stringr str_squish
#' @importFrom igraph V
#' @importFrom stats na.omit
#' @importFrom glue glue
#' @export
#'
#' @examples
#' \dontrun{
#' # Calculate entropy for groups from sniff_groups() output
#' groups_data <- sniff_groups(your_network_data)
#' entropy_results <- sniff_entropy(groups_data, scope = "groups")
#'
#' # Calculate entropy for entire network
#' entropy_results <- sniff_entropy(network_data, scope = "network")
#'
#' # Specify custom year range
#' entropy_results <- sniff_entropy(
#'   groups_data,
#'   scope = "groups",
#'   start_year = 2010,
#'   end_year = 2020
#' )
#'
#' # Access results
#' entropy_data <- entropy_results$data
#' entropy_plots <- entropy_results$plots
#' }
#'
#' @seealso
#' \code{\link{sniff_groups}}, \code{\link{sniff_network}}, \code{\link{indexes_plots}}
sniff_entropy <- function(network, scope = "groups", start_year = NULL, end_year = NULL) {
  # Input validation
  if (is.null(network)) {
    stop("Network data not found in groups object", call. = FALSE)
  }

  required_scope <- c("network", "groups")
  if (!scope %in% required_scope) {
    stop(glue::glue("scope must be: {paste(required_scope, collapse = ' or ')}"), call. = FALSE)
  }

  if (scope == "groups") {
    list_dimensions <- c("network", "pubs_by_year", "aggregate")
    if (!all(list_dimensions %in% names(network))) {
      stop(glue::glue("network file must be generated by sniff_groups()"), call. = FALSE)
    }
    net_data <- network$network
  } else {
    if (!inherits(network, c("tbl_graph", "igraph"))) {
      stop("Input (network) must be a network object (tbl_graph or igraph)", call. = FALSE)
    }

    network |>
      tidygraph::activate(nodes) |>
      dplyr::mutate(group = "full_network") ->
      net_data
  }

  # Determine start_year and end_year if not provided
  if (is.null(start_year) || is.null(end_year)) {
    publication_years <- tryCatch(
      {
        igraph::V(net_data)$PY
      },
      error = function(e) {
        stop("Error accessing publication years from network: ", e$message, call. = FALSE)
      }
    )

    publication_years <- publication_years[!is.na(publication_years)]

    if (length(publication_years) == 0) {
      stop("No publication years found in network data", call. = FALSE)
    }

    if (is.null(start_year)) {
      start_year <- min(publication_years, na.rm = TRUE)
      message("Using minimum publication year as start_year: ", start_year)
    }

    if (is.null(end_year)) {
      end_year <- max(publication_years, na.rm = TRUE)
      message("Using maximum publication year as end_year: ", end_year)
    }
  }

  if (start_year >= end_year) {
    stop("start_year must be less than end_year", call. = FALSE)
  }

  # Get unique groups
  group <- tryCatch(
    {
      net_data |>
        tidygraph::activate(nodes) |>
        tibble::as_tibble() |>
        dplyr::pull("group") |>
        stats::na.omit() |>
        unique() |>
        sort()
    },
    error = function(e) {
      stop("Error extracting groups from network: ", e$message, call. = FALSE)
    }
  )

  if (length(group) == 0) {
    stop("No valid groups found for analysis", call. = FALSE)
  }

  # --- Pre-extract keyword data once (avoid repeated network queries) ---
  net_data |>
    tidygraph::activate(nodes) |>
    tibble::as_tibble() |>
    dplyr::select("name", "group", "DE", "PY") |>
    dplyr::filter(!is.na(.data$DE) & .data$DE != "") |>
    dplyr::filter(.data$group %in% !!group) |>
    tidyr::separate_rows("DE", sep = ";") |>
    dplyr::mutate(DE = stringr::str_squish(tolower(.data$DE))) |>
    dplyr::filter(.data$DE != "") ->
    all_keywords

  years_seq <- start_year:end_year

  # --- Entropy computation: annual, Pielou's J' ---
  entropy_list <- purrr::map(group, function(grp) {
    group_keywords <- all_keywords |>
      dplyr::filter(.data$group == grp)

    purrr::map_dfr(years_seq, function(current_year) {
      # Annual: only publications from this year
      year_data <- group_keywords |>
        dplyr::filter(.data$PY == current_year)

      if (nrow(year_data) == 0) {
        return(tibble::tibble(index = NA_real_, year = current_year, group = grp))
      }

      # Compute keyword frequencies and probabilities
      year_data |>
        dplyr::group_by(.data$DE) |>
        dplyr::summarise(freq = dplyr::n(), .groups = "drop") |>
        dplyr::mutate(P = .data$freq / sum(.data$freq)) ->
        freq_table

      n_keywords <- nrow(freq_table)

      # Shannon entropy needs at least 2 categories for normalization
      if (n_keywords <= 1) {
        return(tibble::tibble(index = 0, year = current_year, group = grp))
      }

      # Pielou's evenness: H / log2(n)
      H <- -sum(freq_table$P * log2(freq_table$P), na.rm = TRUE)
      H_max <- log2(n_keywords)
      entropy_value <- H / H_max

      tibble::tibble(index = entropy_value, year = current_year, group = grp)
    })
  })

  names(entropy_list) <- group

  # Combine all entropy data
  dplyr::bind_rows(entropy_list) |>
    dplyr::select("group", "year", "index") ->
    entropy_data

  # Create plots for each group
  plots_list <- purrr::map2(entropy_list, group, function(x, y) {
    if (all(is.na(x$index))) {
      warning("No data available for group: ", y, " - skipping plot")
      return(NULL)
    }
    tryCatch(
      indexes_plots(x, group_name = y, start_year, end_year, method = "entropy"),
      error = function(e) {
        warning("Error creating plot for group ", y, ": ", e$message)
        return(NULL)
      }
    )
  })

  valid_plots <- !sapply(plots_list, is.null)
  plots_list <- plots_list[valid_plots]
  names(plots_list) <- group[valid_plots]

  list(
    data = entropy_data,
    plots = plots_list,
    years_range = c(start_year = start_year, end_year = end_year)
  )
}
