#' Estimate the Bivariate Normal Distribution from Marginal Summaries
#'
#' Estimate the correlation coefficient \eqn{\rho} (and marginal means / SDs)
#' of two normally-distributed variables using summary-level data from
#' multiple independent studies.
#'
#' @param n   Numeric vector. Sample size of each study.
#' @param xbar,ybar Numeric vectors. Sample means of the two variables.
#' @param s2x,s2y   Numeric vectors. Sample variances; required for \code{method = "proposed"}.
#' @param method Character. \code{"proposed"} uses the proposed MLE method
#'               in the paper; \code{"weighted"} replicates the weighted
#'               mean based method (Baseline) when no variances are available.
#' @param ci_method Confidence interval type: \code{"none"}, \code{"normal"},
#'                  or \code{"lr"} (likelihood ratio). Only implemented when
#'                  \code{method = "proposed"}.
#' @return A list with elements
#'   \itemize{
#'     \item \code{mu_x, mu_y} : estimated marginal means
#'     \item \code{sigma_x, sigma_y} : estimated SDs
#'     \item \code{rho} : estimated correlation
#'     \item \code{se} : standard error of \code{rho} (proposed only)
#'     \item \code{ci} : confidence interval for \code{rho} (if requested)
#'   }
#' @examples
#' data(cont_example)
#' # Example with full summaries
#' cor_cont(cont_example$Sample_Size, cont_example$Mean_X, cont_example$Mean_Y,
#'  cont_example$Variance_X, cont_example$Variance_Y, method = "proposed", ci_method = "lr")
#'
#' # Only means + n, weighted mean method
#' cor_cont(cont_example$Sample_Size, cont_example$Mean_X, cont_example$Mean_Y, method = "weighted")
#' @export
#' @importFrom stats optim qnorm qchisq
cor_cont <- function(n, xbar, ybar, s2x = NULL, s2y = NULL, method = c("proposed", "weighted"), ci_method = c("none", "normal", "lr")) {
  ##############################################################################
  ## 0.  Check
  method    <- match.arg(method)
  ci_method <- match.arg(ci_method)
  
  if (missing(n))
    stop("`n` (sample sizes) is required.")
  if (!is.numeric(n) || any(n <= 0))
    stop("`n` must be a positive numeric vector.")
  if (!is.numeric(xbar) || !is.numeric(ybar))
    stop("xbar and ybar must be numeric vectors.")
  if (length(n) != length(xbar) || length(xbar) != length(ybar))
    stop("n, xbar, and ybar must have equal length.")
  k <- length(n)
  if (method == "proposed") {
    if (is.null(s2x) || is.null(s2y))
      stop("s2x and s2y are required for method = 'proposed'.")
    if (length(s2x) != k || length(s2y) != k)
      stop("s2x/s2y must be same length as n.")
  }
  
  ##############################################################################
  ## 1. Estimated Means: weighted average
  mu_x <- sum(n * xbar) / sum(n)
  mu_y <- sum(n * ybar) / sum(n)
  
  ##############################################################################
  if (method == "weighted") {
    ##  Weighted Mean Based Method #########################################
    sx2 <- sum(n * (xbar - mu_x)^2) / k
    sy2 <- sum(n * (ybar - mu_y)^2) / k
    rho_num <- sum(n * (xbar - mu_x) * (ybar - mu_y)) / k
    rho_hat <- rho_num / sqrt(sx2 * sy2)
    
    out <- list(mu_x     = mu_x,
                mu_y     = mu_y,
                sigma_x  = sqrt(sx2),
                sigma_y  = sqrt(sy2),
                rho      = rho_hat,
                se       = NA,
                ci       = NULL)
    return(out)
  }
  
  ##############################################################################
  ## 2. Proposed MLE Method  sigma_x, sigma_y
  sx2 <- sum(n * (xbar - mu_x)^2 + (n - 1) * s2x) / sum(n)
  sy2 <- sum(n * (ybar - mu_y)^2 + (n - 1) * s2y) / sum(n)
  sigma_x <- sqrt(sx2)
  sigma_y <- sqrt(sy2)
  ####### Auxiliary functions: numerically stable logarithmic Bessel ###########
  safe_log_bessel <- function(nu, x) {
    log_val <- log(besselI(x, nu, expon.scaled = TRUE)) + x
    if (is.finite(log_val)) {
      return(log_val)
    }
    kappa <- sqrt(nu^2 + x^2)
    eta   <- kappa + nu * log(x / (nu + kappa))
    log_prefactor <- -0.5 * log(2 * pi * kappa)
    eta + log_prefactor
  }
  #######  log likelihood  ###################################################
  loglik <- function(rho) {
    ll <- 0
    for (i in seq_len(k)) {
      
      a <- (n[i] - 4) / 2
      b <- abs((n[i] - 1) * rho * sqrt(s2x[i]) * sqrt(s2y[i]) /
                 ((1 - rho^2) * sigma_x * sigma_y))
      
      ll <- ll +
        log(n[i] / (2 * pi * sigma_x * sigma_y * sqrt(1 - rho^2))) +
        (-n[i] /(2*(1 - rho^2))) * (((xbar[i]-mu_x)^2)/sigma_x^2 - 2*rho*(xbar[i]-mu_x)*(ybar[i]-mu_y)/(sigma_x*sigma_y) + ((ybar[i]-mu_y)^2)/sigma_y^2) +
        ((n[i]-4)/2)*log(s2x[i]*s2y[i]) -
        ((n[i]-1)/(2*(1-rho^2))) * (s2x[i]/sigma_x^2 + s2y[i]/sigma_y^2) -
        (n[i]-1)*log(2*sqrt(sigma_x^2*sigma_y^2*(1-rho^2))/(n[i]-1)) -
        (0.5*log(pi) + lgamma((n[i]-1)/2) + lgamma(n[i]/2 - 1)) +
        0.5*log(pi) + (a+0.5)*(log(2) - log(b)) + lgamma(a+1) + safe_log_bessel(a + 0.5, b)
    }
    ll
  }
  
  ####### Grid Search & L-BFGS-B optimization ##################################
  grid_seq <- seq(-0.99, 0.99, by = 0.001)
  start_rho <- grid_seq[ which.max(sapply(grid_seq, loglik)) ]
  
  opt <- optim(start_rho, loglik,
               method  = "L-BFGS-B",
               lower   = -0.99,
               upper   =  0.99,
               control = list(fnscale = -1))
  
  rho_hat <- opt$par
  
  ## Observed Fisher Info & SE #################################################
  fd2 <- function(f,x,h=1e-5) (f(x+h)-2*f(x)+f(x-h))/h^2
  var_hat <- -1 / fd2(loglik, rho_hat)
  if (!is.finite(var_hat) || var_hat <= 0) {
    warning("Estimated variance of rho is non positive. ",
            "Results may be unreliable due to limited data or numerical issues.")
    se_hat <- NA
  } else {
    se_hat <- sqrt(var_hat)
  }
  
  ###### Confidence Interval ###################################################
  ci <- NULL
  if (ci_method == "normal" && !is.na(se_hat)) {
    z  <- qnorm(0.975)
    ci <- c(lower = max(-1, rho_hat - z*se_hat),
            upper = min( 1, rho_hat + z*se_hat))
  } else if (ci_method == "lr") {
    bound <- -0.5 * qchisq(0.95, 1) + loglik(rho_hat)
    f     <- function(r) abs(loglik(r) - bound)
    low   <- optimize(f, c(-0.99, rho_hat))$minimum
    up    <- optimize(f, c(rho_hat, 0.99))$minimum
    ci    <- c(lower = low, upper = up)
  }
  
  #####  Return  ###################################################
  list(mu_x    = mu_x,
       mu_y    = mu_y,
       sigma_x = sigma_x,
       sigma_y = sigma_y,
       rho     = rho_hat,
       se      = se_hat,
       ci      = ci)
}



