#' Hierarchical Clustering - Centroid
#'
#' @description A function that performs hierarchical clustering with centroid linkage.
#' It can also print the clustering steps and display a dendrogram.
#'
#' @param data Numerical matrix or data frame of observations (rows = observations, columns = variables).
#' @param metric Distance metric to be used (default: "euclidean").
#' @param print.steps If TRUE, the algorithm's steps are printed.
#' @param plot If TRUE, a dendrogram is plotted.
#' @param label.names If TRUE, uses the row names as labels in the dendrogram.
#'
#' @return object of class "hclust".
#' @export
#' @examples
#' y1 <- c(1, 2, 1, 0); y2 <- c(2, 1, 0, 2)
#' y3 <- c(8, 8, 9, 7); y4 <- c(6, 9, 8, 9)
#' Data <- rbind(y1, y2, y3, y4)
#' hc <- hclust_centroid(Data, metric = "euclidean",
#'                       print.steps = TRUE,
#'                       plot = TRUE,
#'                       label.names = TRUE)


hclust_centroid <- function(data, metric = "euclidean", print.steps = TRUE, plot = TRUE, label.names = TRUE) {
  # checagens basicas
  if (is.data.frame(data)) data <- as.matrix(data)
  if (!is.matrix(data) || !is.numeric(data)) stop("The data must be a numeric matrix or data frame.")
  n <- nrow(data)
  if (n < 2) stop("A minimum of two observations is needed.")

  # nomes das observacoes
  obs_names <- if (!is.null(rownames(data))) rownames(data) else as.character(seq_len(n))

  # inicializa clusters (armazenamos indices das observacoes)
  clusters <- lapply(seq_len(n), function(i) i)
  cluster_ids <- -seq_len(n)  # IDs internos exigidos pelo hclust

  merge_mat <- matrix(0L, nrow = n - 1, ncol = 2)
  heights <- numeric(n - 1)

  # funcao auxiliar para imprimir cluster
  cluster_label <- function(idx) {
    if (label.names) {
      paste(obs_names[idx], collapse = "+")
    } else {
      paste(-idx, collapse = ",")
    }
  }

  if (print.steps) {
    cat("Start: each observation is a cluster:\n")
    for (i in seq_along(clusters))
      cat(sprintf(" [%s] members: (%s)\n", cluster_label(clusters[[i]]), cluster_label(clusters[[i]])))
    cat("\n")
  }

  for (k in seq_len(n - 1)) {
    # calcular centroides
    centroids <- do.call(rbind, lapply(clusters, function(idx) colMeans(data[idx, , drop = FALSE])))
    rownames(centroids) <- sapply(clusters, cluster_label)

    # matriz de distancias entre centroides
    dmat <- as.matrix(dist(centroids, method = metric))
    diag(dmat) <- 0  # diagonal = 0

    if (print.steps) {
      cat(sprintf("... Step %d ...\n", k))
      cat("Current clusters and their centroids:\n")
      for (i in seq_along(clusters)) {
        cat(sprintf(" [%s] Centroid: %s\n",
                    cluster_label(clusters[[i]]),
                    paste(round(centroids[i, ], 6), collapse = ", ")))
      }
      cat("\nDistance matrix between centroids:\n")
      print(round(dmat, 6))
    }

    # escolher par com menor distancia (ignora diagonal)
    dmat_tmp <- dmat
    diag(dmat_tmp) <- Inf
    minpos <- which(dmat_tmp == min(dmat_tmp), arr.ind = TRUE)[1, ]
    i <- minpos[1]; j <- minpos[2]

    merge_mat[k, ] <- as.integer(c(cluster_ids[i], cluster_ids[j]))
    heights[k] <- dmat[i, j]

    if (print.steps) {
      cat(sprintf(" -> Merging clusters %s and %s (dist = %g)\n",
                  cluster_label(clusters[[i]]),
                  cluster_label(clusters[[j]]),
                  round(dmat[i, j], 6)))
    }

    # criar novo cluster
    new_cluster <- c(clusters[[i]], clusters[[j]])
    clusters <- clusters[-c(i, j)]
    cluster_ids <- cluster_ids[-c(i, j)]
    clusters[[length(clusters) + 1]] <- new_cluster
    cluster_ids <- c(cluster_ids, k)

    if (print.steps) {
      cat("Clusters after merging:\n")
      for (ii in seq_along(clusters)) {
        cat(sprintf(" [%s]\n", cluster_label(clusters[[ii]])))
      }
      cat("\n")
    }
  }

  # construir a ordem dos rotulos
  traverse <- function(node) {
    if (node < 0) return(-node)
    left <- merge_mat[node, 1]
    right <- merge_mat[node, 2]
    c(traverse(left), traverse(right))
  }
  order_vec <- traverse(n - 1)

  hc <- list(merge = merge_mat,
             height = heights,
             order = as.integer(order_vec),
             labels = obs_names,
             method = paste0("centroid (", metric, ")"),
             call = match.call())
  class(hc) <- "hclust"

  if (plot) {
    plot(hc, main = paste0("Dendrogram - Centroid Linkage (", metric, ")"))
  }

  invisible(hc)
}

