#' Filter National Forest Inventory Data 
#' 
#' @description
#' The `filter_nfi()` function provides hierarchical and non-hierarchical filtering approaches for the complex structure of National Forest Inventory data based on user-provided condition expressions (`expr_texts`).
#' This function enables effective filtering while maintaining the relationship between plot data (parent data) and other data (child data).
#' 
#' @details
#' This function parses expressions targeting specific columns in the dataframes within the provided list.
#' 
#' Expression requirements:
#' - Each expression in `expr_texts` must start with a valid dataframe name in the list (e.g., "plot", "tree", "cwd") and combine the dataframe name, dollar sign, and condition (e.g. c("plot$OWN_CD == '5'").
#' - Separate expressions must be provided for each dataframe being filtered (e.g. c("plot$OWN_CD == '5'", "tree$FAMILY == 'Pinaceae' | tree$WDY_PLNTS_TYP_CD == '1'").
#'
#' Hierarchical filtering (`hier = TRUE`):
#' - Filters applied to plot table affect all connected child data (tree, CWD, stump, etc.).
#' - Filters applied to child data only operate within that dataframe and do not affect other dataframes.
#' - Example: If only coniferous forest subplots are selected in the plot table, the child data will retain only the tree, CWD, stump, etc., associated with those subplots.
#'
#' Non-hierarchical filtering (`hier = FALSE`):
#' - Filters applied to the parent dataframe (plot table) do not affect the child data.
#' - Filtering results from child data affect all other parent and child data.
#' - Example: If only certain species are selected in the tree table, the plot table, CWD table, stump table, etc., will be filtered based on the remaining subplots from this selection.
#'
#' @param data : A `list` generated by \code{\link{read_nfi}}. Each dataframe should have a 'SUB_PLOT' column.
#' @param expr_texts : @param expr_texts : A character vector; expressions specifying filtering conditions. Each expression should combine the dataframe name, dollar sign, and condition, with separate expressions for each data frame. (e.g., c("plot$OWN_CD == '5'", "tree$FAMILY == 'Pinaceae'"). Conditions must be valid R expressions.
#' @param hier : A logical flag (default TRUE); indicates whether to apply hierarchical filtering (`TRUE`) or non-hierarchical filtering (`FALSE`). Hierarchical filtering ensures that connected dataframes are filtered based on the results of filters applied to the parent frame.
#' 
#' @return A `list` of dataframes.
#' 
#' @examples
#' 
#' data("nfi_donghae")
#' 
#' # Applying hierarchical filtering to select only privately owned forest subplots.
#' # Ensures all child tables' subplots match the filtered plot table's subplots.
#' # Expected results after filtering:
#' # all(nfi_donghae$tree$SUB_PLOT %in% nfi_donghae$plot$SUB_PLOT)  result: TRUE
#  # all(nfi_donghae$plot$SUB_PLOT %in% nfi_donghae$tree$SUB_PLOT)  result: FALSE
#' nfi_donghae <- filter_nfi(nfi_donghae, c("plot$OWN_CD == '5'"), hier = TRUE)
#' 
#' \donttest{ 
#' # Non-hierarchical filtering to select only privately owned forest subplots.
#' # Child tables remain unfiltered and may not correspond to the plot table subplots.
#' # Expected results after filtering:
#' # all(nfi_donghae$tree$SUB_PLOT %in% nfi_donghae$plot$SUB_PLOT)  result: FALSE
#' nfi_donghae <- filter_nfi(nfi_donghae, c("plot$OWN_CD == '5'"), hier = FALSE)
#' 
#' # Non-Hierarchical Filtering with only woody plants.
#' # Other tables remain filtered and correspond to the tree table.
#' # Expected results after filtering:
#' # all(nfi_donghae$plot$SUB_PLOT %in% nfi_donghae$tree$SUB_PLOT)  result: TRUE
#' nfi_donghae <- filter_nfi(nfi_donghae, c("tree$WDY_PLNTS_TYP_CD == '1'"), hier = FALSE)
#' 
#' # Combining multiple filters across different dataframes
#' nfi_donghae <- filter_nfi(nfi_donghae, 
#'                     c("plot$OWN_CD == '5'", 
#'                     "tree$FAMILY == 'Pinaceae' | tree$WDY_PLNTS_TYP_CD == '1'"))
#' }
#' 
#' @export
# 

filter_nfi <- function(data, expr_texts, hier=TRUE){
  
  ## error message-------------------------------------------------------------- 
  # Extract dataframe names from each expression
  matches <- regmatches(expr_texts, gregexpr("\\w+(?=\\$)", expr_texts, perl=TRUE))
  # Check that each expression references a unique dataframe
  
  vector_data <- unlist(matches)
  
  matches_chek <- sapply(matches, function(x) length(unique(x)) == 1)
  data_names <- paste(names(data), collapse = ", ")
  
  if(!all(vector_data %in% names(data)) | any(matches_chek == FALSE)){
    stop(paste0("Each value in param 'expr_texts' must start with ", data_names ,
                ". and param 'expr_texts' requires separate expressions for each item in ", deparse(substitute(data)), 
                ". For example: c('plot$OWN_CD == \"5\"', 'tree$FAMILY == \"Pinaceae\" | tree$WDY_PLNTS_TYP_CD == \"1\"')"))
  }
  
 
  ## Preprocessing--------------------------------------------------------------
  # Iterate over each expression
  for(expr_text in expr_texts){
    # Remove the dataframe name from the expression
    modified_text <- gsub("\\w+\\$", "", expr_text)
    # Convert modified text into expressions
    modified_expressions <- rlang::parse_exprs(modified_text)
    
    # Extract dataframe name
    name <- regmatches(expr_text, gregexpr("\\w+(?=\\$)", expr_text, perl=TRUE))[[1]][1]

    # Apply conditions to the 'plot' dataframe
    if(grepl("plot\\$", expr_text)){
      # Hierarchical filtering
      if(hier){
        
        filter_plot <- data$plot %>%
          filter(!!!modified_expressions)
        
        if(grepl("CYCLE", modified_text, ignore.case = TRUE)){
          
          plot_all <- filter_plot[, c("SUB_PLOT", "CYCLE")]
          
          results <- lapply(data[-1], function(df) {
            df_filtered <- semi_join(df, plot_all, by=c("SUB_PLOT", "CYCLE"))
            return(df_filtered)
          })
          
        }else{
          
          plot_all <- unique(filter_plot$SUB_PLOT)
          
          results <- lapply(data[-1], function(df) {
            df_filtered <- df[df$SUB_PLOT %in% plot_all, ]
            return(df_filtered)
          })
          
        }

        
        data <- c(list(plot = filter_plot), results)
        
      }else{
        # Non-hierarchical filtering
        data$plot <- data$plot %>%
          filter(!!!modified_expressions)
      }
      
    }else{
      # Apply conditions to other dataframe
      if(hier){
        
        # Hierarchical filtering
        data[[name]] <- data[[name]] %>%
          filter(!!!modified_expressions)
        
      }else{
        # Non-hierarchical filtering
        filter_plot <- data[[name]] %>%
          filter(!!!modified_expressions)
        
        if(grepl("CYCLE", modified_text, ignore.case = TRUE)){
          
          plot_all <- filter_plot[, c("SUB_PLOT", "CYCLE")]
          
          results <- lapply(data, function(df) {
            df_filtered <- semi_join(df, plot_all, by=c("SUB_PLOT", "CYCLE"))
            return(df_filtered)
          })
          
        }else{
          
          plot_all <- unique(filter_plot$SUB_PLOT)
          
          results <- lapply(data, function(df) {
            df_filtered <- df[df$SUB_PLOT %in% plot_all, ]
            return(df_filtered)
          })
          
        }
      
        results[[name]] <- results[[name]] %>% 
          filter(!!!modified_expressions)
        
        data <- results
        
      }
    }
  }
    
  # Return the filtered data
  return(data)
}
