module RegimeChangeJulia

using Statistics
using LinearAlgebra
using SpecialFunctions
using Random
using Logging

export pelt_detect, fpop_detect, bocpd_detect, cusum_detect, 
       kernel_cpd_detect, wbs_detect, segneigh_detect,
       pelt_multivariate, validate_data, ValidationError,
       NormalGammaPrior, ChangePointResult

module Config
    const EPSILON = 1e-10
    const MAX_ARRAY_SIZE = 10^7
    const MAX_ITERATIONS_FACTOR = 10
    const DEFAULT_MIN_SEGMENT = 2
    const VARIANCE_REGULARIZATION = 1e-10
    const CONDITION_NUMBER_THRESHOLD = 1e12
    const LOG_UNDERFLOW_THRESHOLD = -700.0
    const LOG_OVERFLOW_THRESHOLD = 700.0
    const WELFORD_STABILITY_THRESHOLD = 1e-14
end

struct ValidationError <: Exception
    msg::String
    parameter::String
    value::Any
end

function Base.showerror(io::IO, e::ValidationError)
    print(io, "ValidationError in parameter '$(e.parameter)': $(e.msg) (got: $(e.value))")
end

struct NumericalInstabilityError <: Exception
    msg::String
    context::String
end

function Base.showerror(io::IO, e::NumericalInstabilityError)
    print(io, "NumericalInstabilityError in $(e.context): $(e.msg)")
end

struct MaxIterationsError <: Exception
    iterations::Int
    context::String
end

function Base.showerror(io::IO, e::MaxIterationsError)
    print(io, "MaxIterationsError: Exceeded $(e.iterations) iterations in $(e.context)")
end

struct ChangePointResult
    changepoints::Vector{Int}
    cost::Float64
    algorithm::String
    converged::Bool
    iterations::Int
    metadata::Dict{String,Any}
end

function ChangePointResult(changepoints::Vector{Int}; 
                           cost::Float64=NaN, 
                           algorithm::String="unknown",
                           converged::Bool=true,
                           iterations::Int=0,
                           metadata::Dict{String,Any}=Dict{String,Any}())
    ChangePointResult(changepoints, cost, algorithm, converged, iterations, metadata)
end

function validate_data(data::AbstractVector{<:Real}; 
                       allow_nan::Bool=false, 
                       allow_inf::Bool=false,
                       min_length::Int=1,
                       max_length::Int=Config.MAX_ARRAY_SIZE)
    
    if isempty(data)
        throw(ValidationError("Data vector cannot be empty", "data", "empty vector"))
    end
    
    n = length(data)
    if n < min_length
        throw(ValidationError("Data length must be at least $min_length", "data", "length=$n"))
    end
    if n > max_length
        throw(ValidationError("Data length exceeds maximum allowed ($max_length)", "data", "length=$n"))
    end
    
    if !allow_nan && any(isnan, data)
        nan_idx = findfirst(isnan, data)
        throw(ValidationError("Data contains NaN values", "data", "NaN at index $nan_idx"))
    end
    
    if !allow_inf && any(isinf, data)
        inf_idx = findfirst(isinf, data)
        throw(ValidationError("Data contains Inf values", "data", "Inf at index $inf_idx"))
    end
    
    return true
end

function validate_positive(value::Real, name::String)
    if value <= 0
        throw(ValidationError("Value must be strictly positive", name, value))
    end
    return true
end

function validate_non_negative(value::Real, name::String)
    if value < 0
        throw(ValidationError("Value must be non-negative", name, value))
    end
    return true
end

function validate_in_range(value::Real, name::String, min_val::Real, max_val::Real)
    if value < min_val || value > max_val
        throw(ValidationError("Value must be in range [$min_val, $max_val]", name, value))
    end
    return true
end

function validate_min_segment(min_segment::Int, n::Int, k::Int=1)
    if min_segment < 1
        throw(ValidationError("Minimum segment must be at least 1", "min_segment", min_segment))
    end
    if (k + 1) * min_segment > n
        throw(ValidationError(
            "Data too short for $(k) changepoints with min_segment=$min_segment (need at least $((k+1)*min_segment) observations)",
            "min_segment", min_segment
        ))
    end
    return true
end

function safe_log(x::Float64; threshold::Float64=Config.EPSILON)
    if x <= 0
        return Config.LOG_UNDERFLOW_THRESHOLD
    elseif x < threshold
        return log(threshold)
    else
        return log(x)
    end
end

function safe_exp(x::Float64)
    if x < Config.LOG_UNDERFLOW_THRESHOLD
        return 0.0
    elseif x > Config.LOG_OVERFLOW_THRESHOLD
        return exp(Config.LOG_OVERFLOW_THRESHOLD)
    else
        return exp(x)
    end
end

function logsumexp(x::AbstractVector{Float64})
    if isempty(x)
        return -Inf
    end
    
    valid_x = filter(v -> v > -Inf, x)
    if isempty(valid_x)
        return -Inf
    end
    
    m = maximum(valid_x)
    if m == -Inf
        return -Inf
    end
    if m == Inf
        return Inf
    end
    
    return m + log(sum(safe_exp.(valid_x .- m)))
end

function welford_mean_var(data::AbstractVector{<:Real})
    n = 0
    mean = 0.0
    M2 = 0.0
    
    for x in data
        n += 1
        delta = x - mean
        mean += delta / n
        delta2 = x - mean
        M2 += delta * delta2
    end
    
    if n < 2
        return (mean=mean, var=0.0, n=n)
    end
    
    variance = M2 / n
    return (mean=mean, var=max(0.0, variance), n=n)
end

function welford_update(state::NamedTuple, x::Float64)
    n = state.n + 1
    delta = x - state.mean
    mean = state.mean + delta / n
    delta2 = x - mean
    M2 = state.M2 + delta * delta2
    
    return (n=n, mean=mean, M2=M2)
end

function robust_variance(data::AbstractVector{<:Real}; regularize::Bool=true)
    if length(data) < 2
        return regularize ? Config.VARIANCE_REGULARIZATION : 0.0
    end
    
    # CORRECCIÓN: Usamos MAD primero. Si es 0 (ej. data constante), hacemos fallback a Welford.
    mad_val = calculate_mad(data)
    variance = mad_val^2
    
    if variance < Config.EPSILON
        # Fallback si la MAD es 0
        welford = welford_mean_var(data)
        variance = welford.var
    end
    
    if regularize && variance < Config.VARIANCE_REGULARIZATION
        variance = Config.VARIANCE_REGULARIZATION
    end
    
    return variance
end

function robust_std(data::AbstractVector{<:Real}; regularize::Bool=true)
    n = length(data)
    if n < 2
        return regularize ? Config.VARIANCE_REGULARIZATION : 0.0
    end

    # --- CORRECCIÓN CRÍTICA ---
    # 1. Calculamos las diferencias entre puntos consecutivos (x_t - x_{t-1})
    # Esto elimina la tendencia y los saltos de media, dejando solo el ruido.
    diffs = diff(data)
    
    # 2. Calculamos la MAD de esas diferencias
    mad_diff = calculate_mad(diffs)
    
    # 3. Ajuste Matemático (Factor sqrt(2)):
    # La varianza de (A - B) es la suma de sus varianzas => 2*Var(ruido).
    # Por tanto, la desviación es sqrt(2) veces mayor. Debemos dividir para corregir.
    sigma = mad_diff / 1.41421356 
    
    if regularize && sigma < Config.EPSILON
        # Si la serie es perfectamente constante, devolvemos un valor mínimo
        sigma = Config.VARIANCE_REGULARIZATION
    end
    
    return sigma
end

function check_numerical_stability(value::Float64, context::String; 
                                   warn_threshold::Float64=1e10)
    if isnan(value)
        throw(NumericalInstabilityError("NaN value computed", context))
    end
    if isinf(value)
        throw(NumericalInstabilityError("Inf value computed", context))
    end
    if abs(value) > warn_threshold
        @warn "Large value detected in $context" value=value
    end
    return value
end

abstract type AbstractCostFunction end

struct GaussianCost <: AbstractCostFunction
    cumsum::Vector{Float64}
    cumsum_sq::Vector{Float64}
    cumsum_compensated::Vector{Float64}
    n::Int
    data_range::Tuple{Float64, Float64}
end

function GaussianCost(data::Vector{Float64})
    validate_data(data)
    n = length(data)
    
    cumsum_data = zeros(n)
    cumsum_sq = zeros(n)
    cumsum_comp = zeros(n)
    
    sum_val = 0.0
    sum_sq = 0.0
    comp = 0.0
    comp_sq = 0.0
    
    for i in 1:n
        y = data[i] - comp
        t = sum_val + y
        comp = (t - sum_val) - y
        sum_val = t
        
        y_sq = data[i]^2 - comp_sq
        t_sq = sum_sq + y_sq
        comp_sq = (t_sq - sum_sq) - y_sq
        sum_sq = t_sq
        
        cumsum_data[i] = sum_val
        cumsum_sq[i] = sum_sq
        cumsum_comp[i] = comp
    end
    
    data_range = (minimum(data), maximum(data))
    
    GaussianCost(cumsum_data, cumsum_sq, cumsum_comp, n, data_range)
end

function segment_cost(gc::GaussianCost, start::Int, stop::Int)
    if start < 1 || stop > gc.n || stop < start
        @debug "Invalid segment bounds" start=start stop=stop n=gc.n
        return Inf
    end
    
    len = stop - start + 1
    if len < 1
        return 0.0
    end
    
    if start == 1
        sum_x = gc.cumsum[stop]
        sum_x2 = gc.cumsum_sq[stop]
    else
        sum_x = gc.cumsum[stop] - gc.cumsum[start - 1]
        sum_x2 = gc.cumsum_sq[stop] - gc.cumsum_sq[start - 1]
    end
    
    mean_val = sum_x / len
    cost = sum_x2 - sum_x * mean_val
    
    if cost < -Config.EPSILON * abs(sum_x2)
        @warn "Negative cost detected, likely numerical instability" cost=cost sum_x2=sum_x2 start=start stop=stop
        cost = 0.0
    end
    
    return max(0.0, cost)
end

struct GaussianMeanVarCost <: AbstractCostFunction
    cumsum::Vector{Float64}
    cumsum_sq::Vector{Float64}
    n::Int
    min_variance::Float64
end

function GaussianMeanVarCost(data::Vector{Float64}; 
                             min_variance::Float64=Config.VARIANCE_REGULARIZATION)
    validate_data(data)
    n = length(data)
    
    GaussianMeanVarCost(cumsum(data), cumsum(data .^ 2), n, min_variance)
end

function segment_cost(gc::GaussianMeanVarCost, start::Int, stop::Int)
    if start < 1 || stop > gc.n || stop < start
        return Inf
    end
    
    len = stop - start + 1
    if len < 2
        return 0.0
    end
    
    if start == 1
        sum_x = gc.cumsum[stop]
        sum_x2 = gc.cumsum_sq[stop]
    else
        sum_x = gc.cumsum[stop] - gc.cumsum[start - 1]
        sum_x2 = gc.cumsum_sq[stop] - gc.cumsum_sq[start - 1]
    end
    
    μ = sum_x / len
    σ2 = (sum_x2 - len * μ^2) / len
    
    if σ2 <= gc.min_variance
        σ2 = gc.min_variance
    end
    
    return len * safe_log(σ2)
end

struct PoissonCost <: AbstractCostFunction
    cumsum::Vector{Float64}
    n::Int
end

function PoissonCost(data::Vector{Float64})
    validate_data(data)
    
    if any(x -> x < 0, data)
        throw(ValidationError("Poisson cost requires non-negative data", "data", "contains negative values"))
    end
    
    PoissonCost(cumsum(data), length(data))
end

function segment_cost(pc::PoissonCost, start::Int, stop::Int)
    if start < 1 || stop > pc.n || stop < start
        return Inf
    end
    
    len = stop - start + 1
    if len < 1
        return 0.0
    end
    
    if start == 1
        sum_x = pc.cumsum[stop]
    else
        sum_x = pc.cumsum[stop] - pc.cumsum[start - 1]
    end
    
    if sum_x <= Config.EPSILON
        return 0.0
    end
    
    λ = sum_x / len
    
    return -sum_x * safe_log(λ) + len * λ
end

struct ExponentialCost <: AbstractCostFunction
    cumsum::Vector{Float64}
    n::Int
end

function ExponentialCost(data::Vector{Float64})
    validate_data(data)
    
    if any(x -> x <= 0, data)
        throw(ValidationError("Exponential cost requires strictly positive data", "data", "contains non-positive values"))
    end
    
    ExponentialCost(cumsum(data), length(data))
end

function segment_cost(ec::ExponentialCost, start::Int, stop::Int)
    if start < 1 || stop > ec.n || stop < start
        return Inf
    end
    
    len = stop - start + 1
    if len < 1
        return 0.0
    end
    
    if start == 1
        sum_x = ec.cumsum[stop]
    else
        sum_x = ec.cumsum[stop] - ec.cumsum[start - 1]
    end
    
    if sum_x <= Config.EPSILON
        return Inf 
    end
    
    return len * safe_log(sum_x / len) + len
end

function create_cost_function(data::Vector{Float64}, cost_type::String)
    cost_type_lower = lowercase(cost_type)
    
    if cost_type_lower in ["mean", "gaussian", "normal", "l2"]
        return GaussianCost(data)
    elseif cost_type_lower in ["meanvar", "gaussian_meanvar", "normal_meanvar"]
        return GaussianMeanVarCost(data)
    elseif cost_type_lower in ["poisson", "count"]
        return PoissonCost(data)
    elseif cost_type_lower in ["exponential", "exp"]
        return ExponentialCost(data)
    else
        @warn "Unknown cost type '$cost_type', defaulting to Gaussian mean"
        return GaussianCost(data)
    end
end

function pelt_detect(data::Vector{Float64}, penalty::Float64, min_segment::Int;
                     cost_type::String="mean",
                     max_changepoints::Union{Nothing,Int}=nothing)
    
    validate_data(data; min_length=2)
    validate_non_negative(penalty, "penalty")
    validate_min_segment(min_segment, length(data))
    
    if !isnothing(max_changepoints)
        validate_positive(max_changepoints, "max_changepoints")
    end
    
    n = length(data)
    
    @debug "PELT initialized" n=n penalty=penalty min_segment=min_segment cost_type=cost_type
    
    if n < 2 * min_segment
        @info "Data too short for changepoint detection" n=n min_required=2*min_segment
        return ChangePointResult(Int[]; 
                                 cost=0.0, 
                                 algorithm="PELT",
                                 metadata=Dict{String,Any}("reason" => "data_too_short"))
    end
    
    cost_func = create_cost_function(data, cost_type)
    
    F = fill(Inf, n + 1)
    F[1] = -penalty
    
    cp_prev = zeros(Int, n + 1)
    
    n_cps = zeros(Int, n + 1)
    
    R = [1]
    
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n
    
    for t in min_segment:n
        total_iterations += 1
        
        if total_iterations > max_iterations
            throw(MaxIterationsError(max_iterations, "PELT main loop"))
        end
        
        best_cost = Inf
        best_s = 1
        costs_for_pruning = Float64[]
        
        for s in R
            if t - s + 1 >= min_segment
                seg_cost = segment_cost(cost_func, s, t)
                total_cost = F[s] + seg_cost + penalty
                
                if isnan(total_cost)
                    @warn "NaN cost encountered" s=s t=t F_s=F[s] seg_cost=seg_cost
                    total_cost = Inf
                end
                
                push!(costs_for_pruning, total_cost)
                
                if total_cost < best_cost
                    if isnothing(max_changepoints) || n_cps[s] < max_changepoints
                        best_cost = total_cost
                        best_s = s
                    end
                end
            else
                push!(costs_for_pruning, Inf)
            end
        end
        
        F[t + 1] = best_cost
        cp_prev[t + 1] = best_s
        n_cps[t + 1] = n_cps[best_s] + (best_s > 1 ? 1 : 0)
        
        R_new = Int[]
        for (i, s) in enumerate(R)
            keep = false
            
            if t - s + 1 < min_segment
                keep = true
            elseif i <= length(costs_for_pruning) && costs_for_pruning[i] <= F[t + 1] + penalty
                keep = true
            end
            
            if keep
                push!(R_new, s)
            end
        end
        
        push!(R_new, t + 1)
        R = R_new
        
        @debug "PELT iteration" t=t candidates=length(R) best_cost=best_cost
    end
    
    changepoints = Int[]
    t = n + 1
    backtrack_iterations = 0
    
    while t > 1
        backtrack_iterations += 1
        if backtrack_iterations > n + 1
            throw(NumericalInstabilityError("Infinite loop in backtracking", "PELT backtrack"))
        end
        
        prev = cp_prev[t]
        if prev > 1
            push!(changepoints, prev - 1)
        end
        t = prev
    end
    
    changepoints = sort(changepoints)
    
    @info "PELT completed" n_changepoints=length(changepoints) final_cost=F[n+1] iterations=total_iterations
    
    return ChangePointResult(
        changepoints;
        cost=F[n + 1],
        algorithm="PELT",
        iterations=total_iterations,
        metadata=Dict{String,Any}(
            "penalty" => penalty,
            "cost_type" => cost_type,
            "min_segment" => min_segment,
            "final_candidates" => length(R)
        )
    )
end

struct FunctionalPiece
    a::Float64
    b::Float64
    c::Float64
    left::Float64
    right::Float64
    origin::Int
end

function evaluate(fp::FunctionalPiece, μ::Float64)
    return fp.a * μ^2 + fp.b * μ + fp.c
end

function minimum_in_interval(fp::FunctionalPiece)
    if fp.a <= Config.EPSILON
        left_val = evaluate(fp, fp.left)
        right_val = evaluate(fp, fp.right)
        return left_val < right_val ? (fp.left, left_val) : (fp.right, right_val)
    end
    
    μ_opt = -fp.b / (2 * fp.a)
    
    if μ_opt < fp.left
        return (fp.left, evaluate(fp, fp.left))
    elseif μ_opt > fp.right
        return (fp.right, evaluate(fp, fp.right))
    else
        return (μ_opt, evaluate(fp, μ_opt))
    end
end

function fpop_detect(data::Vector{Float64}, penalty::Float64, min_segment::Int;
                     max_changepoints::Union{Nothing,Int}=nothing)
    
    validate_data(data; min_length=2)
    validate_non_negative(penalty, "penalty")
    validate_min_segment(min_segment, length(data))
    
    n = length(data)
    
    if n < 2 * min_segment
        return ChangePointResult(Int[];
                                 cost=0.0,
                                 algorithm="FPOP",
                                 metadata=Dict{String,Any}("reason" => "data_too_short"))
    end
    
    cumsum_data = zeros(n)
    cumsum_sq = zeros(n)
    
    sum_val = 0.0
    sum_sq = 0.0
    comp = 0.0
    comp_sq = 0.0
    
    for i in 1:n
        y = data[i] - comp
        t = sum_val + y
        comp = (t - sum_val) - y
        sum_val = t
        
        y_sq = data[i]^2 - comp_sq
        t_sq = sum_sq + y_sq
        comp_sq = (t_sq - sum_sq) - y_sq
        sum_sq = t_sq
        
        cumsum_data[i] = sum_val
        cumsum_sq[i] = sum_sq
    end
    
    function get_segment_cost_coeffs(start::Int, stop::Int)
        len = stop - start + 1
        
        if start == 1
            sum_x = cumsum_data[stop]
            sum_x2 = cumsum_sq[stop]
        else
            sum_x = cumsum_data[stop] - cumsum_data[start - 1]
            sum_x2 = cumsum_sq[stop] - cumsum_sq[start - 1]
        end
        
        return (Float64(len), -2.0 * sum_x, sum_x2)
    end
    
    F = fill(Inf, n + 1)
    F[1] = -penalty
    
    cp_prev = zeros(Int, n + 1)
    
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n * Int(ceil(log2(n + 1)))
    
    for t in min_segment:n
        best_cost = Inf
        best_s = 1
        
        for s in 1:(t - min_segment + 1)
            total_iterations += 1
            if total_iterations > max_iterations
                @warn "FPOP: Maximum iterations reached, returning partial result"
                break
            end
            
            a, b, c = get_segment_cost_coeffs(s, t)
            
            if a > Config.EPSILON
                μ_opt = -b / (2 * a)
                segment_min_cost = a * μ_opt^2 + b * μ_opt + c
            else
                segment_min_cost = c
            end
            
            segment_min_cost = max(0.0, segment_min_cost)
            
            total_cost = F[s] + segment_min_cost + penalty
            
            if total_cost < best_cost
                best_cost = total_cost
                best_s = s
            end
        end
        
        F[t + 1] = best_cost
        cp_prev[t + 1] = best_s
    end
    
    changepoints = Int[]
    t = n + 1
    while t > 1
        prev = cp_prev[t]
        if prev > 1
            push!(changepoints, prev - 1)
        end
        t = prev
    end
    
    return ChangePointResult(
        sort(changepoints);
        cost=F[n + 1],
        algorithm="FPOP",
        iterations=total_iterations,
        metadata=Dict{String,Any}(
            "penalty" => penalty,
            "min_segment" => min_segment
        )
    )
end

mutable struct NormalGammaPrior
    μ0::Float64
    κ0::Float64
    α0::Float64
    β0::Float64
    
    function NormalGammaPrior(μ0::Float64, κ0::Float64, α0::Float64, β0::Float64)
        if κ0 <= 0
            throw(ValidationError("κ0 must be positive", "κ0", κ0))
        end
        if α0 <= 0
            throw(ValidationError("α0 must be positive", "α0", α0))
        end
        if β0 <= 0
            throw(ValidationError("β0 must be positive", "β0", β0))
        end
        new(μ0, κ0, α0, β0)
    end
end

NormalGammaPrior() = NormalGammaPrior(0.0, 0.01, 1.0, 1.0)

mutable struct BOCPDState
    μ_post::Vector{Float64}
    κ_post::Vector{Float64}
    α_post::Vector{Float64}
    β_post::Vector{Float64}
end

function bocpd_detect(data::Vector{Float64}, hazard_rate::Float64;
                      prior::NormalGammaPrior=NormalGammaPrior(),
                      threshold::Float64=0.5,
                      max_runlen::Int=500,
                      return_full::Bool=false)
    
    validate_data(data)
    validate_in_range(hazard_rate, "hazard_rate", Config.EPSILON, 1.0 - Config.EPSILON)
    validate_in_range(threshold, "threshold", 0.0, 1.0)
    validate_positive(max_runlen, "max_runlen")
    
    n = length(data)
    max_r = min(n, max_runlen)
    
    @debug "BOCPD initialized" n=n hazard_rate=hazard_rate max_runlen=max_r
    
    log_R = fill(-Inf, max_r + 1, n + 1)
    log_R[1, 1] = 0.0 
    
    state = BOCPDState(
        fill(prior.μ0, max_r + 1),
        fill(prior.κ0, max_r + 1),
        fill(prior.α0, max_r + 1),
        fill(prior.β0, max_r + 1)
    )
    
    log_H = safe_log(hazard_rate)
    log_1mH = safe_log(1 - hazard_rate)
    
    changepoints = Int[]
    prob_change = zeros(n)
    map_runlen = zeros(Int, n)
    
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n * max_r
    
    for t in 1:n
        x = data[t]
        
        log_pred = fill(-Inf, max_r + 1)
        
        for r in 0:min(t - 1, max_r)
            total_iterations += 1
            if total_iterations > max_iterations
                @warn "BOCPD: Maximum iterations reached"
                break
            end
            
            df = 2 * state.α_post[r + 1]
            μ = state.μ_post[r + 1]
            κ = state.κ_post[r + 1]
            α = state.α_post[r + 1]
            β = state.β_post[r + 1]
            
            scale_sq = β * (κ + 1) / (α * κ)
            if scale_sq <= 0
                scale_sq = Config.VARIANCE_REGULARIZATION
            end
            scale = sqrt(scale_sq)
            
            z = (x - μ) / scale
            
            log_pred[r + 1] = lgamma((df + 1) / 2) - lgamma(df / 2) -
                              0.5 * log(π * df) - safe_log(scale) -
                              ((df + 1) / 2) * log(1 + z^2 / df)
            
            log_pred[r + 1] = clamp(log_pred[r + 1], Config.LOG_UNDERFLOW_THRESHOLD, 0.0)
        end
        
        log_growth = fill(-Inf, max_r + 1)
        for r in 0:min(t - 1, max_r - 1)
            if log_R[r + 1, t] > Config.LOG_UNDERFLOW_THRESHOLD && 
               log_pred[r + 1] > Config.LOG_UNDERFLOW_THRESHOLD
                log_growth[r + 2] = log_R[r + 1, t] + log_pred[r + 1] + log_1mH
            end
        end
        
        log_cp_contributions = Float64[]
        for r in 0:min(t - 1, max_r)
            if log_R[r + 1, t] > Config.LOG_UNDERFLOW_THRESHOLD && 
               log_pred[r + 1] > Config.LOG_UNDERFLOW_THRESHOLD
                push!(log_cp_contributions, log_R[r + 1, t] + log_pred[r + 1] + log_H)
            end
        end
        
        if !isempty(log_cp_contributions)
            log_growth[1] = logsumexp(log_cp_contributions)
        end
        
        log_total = logsumexp(log_growth)
        if log_total > Config.LOG_UNDERFLOW_THRESHOLD
            log_R[:, t + 1] = log_growth .- log_total
        else
            log_R[1, t + 1] = 0.0
        end
        
        prob_change[t] = safe_exp(log_R[1, t + 1])
        
        valid_log_R = log_R[:, t + 1]
        map_runlen[t] = argmax(valid_log_R) - 1
        
        if prob_change[t] > threshold && t > 1
            push!(changepoints, t)
        end
        
        μ_new = fill(prior.μ0, max_r + 1)
        κ_new = fill(prior.κ0, max_r + 1)
        α_new = fill(prior.α0, max_r + 1)
        β_new = fill(prior.β0, max_r + 1)
        
        for r in 1:min(t, max_r)
            κ_old = state.κ_post[r]
            μ_old = state.μ_post[r]
            α_old = state.α_post[r]
            β_old = state.β_post[r]
            
            κ_new[r + 1] = κ_old + 1
            μ_new[r + 1] = (κ_old * μ_old + x) / κ_new[r + 1]
            α_new[r + 1] = α_old + 0.5
            
            diff = x - μ_old
            β_new[r + 1] = β_old + 0.5 * κ_old / κ_new[r + 1] * diff^2
            
            β_new[r + 1] = max(β_new[r + 1], Config.EPSILON)
        end
        
        state.μ_post = μ_new
        state.κ_post = κ_new
        state.α_post = α_new
        state.β_post = β_new
    end
    
    metadata = Dict{String,Any}(
        "hazard_rate" => hazard_rate,
        "threshold" => threshold,
        "prob_change" => prob_change,
        "map_runlen" => map_runlen
    )
    
    if return_full
        metadata["log_R"] = log_R
    end
    
    return ChangePointResult(
        changepoints;
        algorithm="BOCPD",
        iterations=total_iterations,
        metadata=metadata
    )
end

function cusum_detect(data::Vector{Float64}, threshold::Float64;
                      baseline_n::Int=50, 
                      two_sided::Bool=true,
                      reset_after_detection::Bool=true)
    
    validate_data(data)
    validate_positive(threshold, "threshold")
    validate_positive(baseline_n, "baseline_n")
    
    n = length(data)
    
    baseline_n = min(baseline_n, n ÷ 4)
    baseline_n = max(baseline_n, 10)
    
    if n < baseline_n
        return ChangePointResult(Int[];
                                 algorithm="CUSUM",
                                 metadata=Dict{String,Any}("reason" => "data_too_short"))
    end
    
    baseline = data[1:baseline_n]
    baseline_stats = welford_mean_var(baseline)
    μ0 = baseline_stats.mean
    σ0 = sqrt(max(baseline_stats.var, Config.VARIANCE_REGULARIZATION))
    
    @debug "CUSUM baseline estimated" μ0=μ0 σ0=σ0 baseline_n=baseline_n
    
    S_plus = zeros(n)
    S_minus = zeros(n)
    
    changepoints = Int[]
    last_cp = 0
    
    for t in 1:n
        z = (data[t] - μ0) / σ0
        
        S_plus[t] = max(0.0, (t > 1 ? S_plus[t-1] : 0.0) + z)
        S_minus[t] = max(0.0, (t > 1 ? S_minus[t-1] : 0.0) - z)
        
        S_plus[t] = min(S_plus[t], 1e10)
        S_minus[t] = min(S_minus[t], 1e10)
        
        S_max = two_sided ? max(S_plus[t], S_minus[t]) : S_plus[t]
        
        min_gap = baseline_n
        
        if S_max > threshold && t > last_cp + min_gap
            push!(changepoints, t)
            last_cp = t
            
            if reset_after_detection
                S_plus[t] = 0.0
                S_minus[t] = 0.0
            end
        end
    end
    
    return ChangePointResult(
        changepoints;
        algorithm="CUSUM",
        metadata=Dict{String,Any}(
            "S_plus" => S_plus,
            "S_minus" => S_minus,
            "baseline_mean" => μ0,
            "baseline_sd" => σ0,
            "threshold" => threshold,
            "two_sided" => two_sided
        )
    )
end

function kernel_cpd_detect(data::Vector{Float64}, penalty::Float64;
                           kernel::String="rbf", 
                           bandwidth::Float64=0.0,
                           min_segment::Int=10,
                           max_points::Int=5000)
    
    validate_data(data)
    validate_non_negative(penalty, "penalty")
    validate_positive(min_segment, "min_segment")
    validate_positive(max_points, "max_points")
    
    n = length(data)
    
    if n < 2 * min_segment
        return ChangePointResult(Int[];
                                 algorithm="KernelCPD",
                                 metadata=Dict{String,Any}("reason" => "data_too_short"))
    end
    
    if bandwidth <= 0
        sample_size = min(500, n)
        if sample_size < n
            idx = sort(sample(1:n, sample_size, replace=false))
            sample_data = data[idx]
        else
            sample_data = data
        end
        
        dists = Float64[]
        for i in 1:length(sample_data)
            for j in (i+1):length(sample_data)
                push!(dists, abs(sample_data[i] - sample_data[j]))
            end
        end
        
        if !isempty(dists)
            bandwidth = median(dists)
        end
        
        if bandwidth < Config.EPSILON
            bandwidth = robust_std(data)
        end
        if bandwidth < Config.EPSILON
            bandwidth = 1.0
            @warn "Could not estimate bandwidth, using default" bandwidth=bandwidth
        end
        
        @debug "Automatic bandwidth selected" bandwidth=bandwidth
    end
    
    kernel_lower = lowercase(kernel)
    kernel_func = if kernel_lower == "rbf" || kernel_lower == "gaussian"
        (x, y) -> exp(-(x - y)^2 / (2 * bandwidth^2))
    elseif kernel_lower == "laplacian" || kernel_lower == "laplace"
        (x, y) -> exp(-abs(x - y) / bandwidth)
    elseif kernel_lower == "linear"
        (x, y) -> x * y
    else
        @warn "Unknown kernel '$kernel', defaulting to RBF"
        (x, y) -> exp(-(x - y)^2 / (2 * bandwidth^2))
    end
    
    use_approximation = n > max_points
    
    if use_approximation
        @info "Using Nyström approximation for large dataset" n=n max_points=max_points
        return _kernel_cpd_nystrom(data, penalty, kernel_func, min_segment, max_points)
    end
    
    K = zeros(n, n)
    for i in 1:n
        K[i, i] = kernel_func(data[i], data[i])
        for j in (i+1):n
            K[i, j] = kernel_func(data[i], data[j])
            K[j, i] = K[i, j]
        end
    end
    
    cumsum_K = zeros(n + 1, n + 1)
    for i in 1:n
        for j in 1:n
            cumsum_K[i + 1, j + 1] = cumsum_K[i, j + 1] + cumsum_K[i + 1, j] - 
                                     cumsum_K[i, j] + K[i, j]
        end
    end
    
    function kernel_cost(start::Int, stop::Int)
        m = stop - start + 1
        if m < min_segment
            return Inf
        end
        
        K_sum = cumsum_K[stop + 1, stop + 1] - cumsum_K[start, stop + 1] -
                cumsum_K[stop + 1, start] + cumsum_K[start, start]
        
        if K_sum < 0
            K_sum = 0.0
        end
        
        return -K_sum / (m^2)
    end
    
    F = fill(Inf, n + 1)
    F[1] = -penalty
    cp_prev = zeros(Int, n + 1)
    
    R = [1]
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n^2
    
    for t in min_segment:n
        best_cost = Inf
        best_s = 1
        
        for s in R
            total_iterations += 1
            if total_iterations > max_iterations
                @warn "KernelCPD: Maximum iterations reached"
                break
            end
            
            if t - s + 1 >= min_segment
                cost = F[s] + kernel_cost(s, t) + penalty
                if !isnan(cost) && cost < best_cost
                    best_cost = cost
                    best_s = s
                end
            end
        end
        
        F[t + 1] = best_cost
        cp_prev[t + 1] = best_s
        
        R_new = filter(s -> t - s + 1 < min_segment || 
                       F[s] + kernel_cost(s, t) <= F[t + 1] + penalty, R)
        push!(R_new, t + 1)
        R = R_new
    end
    
    changepoints = Int[]
    t = n + 1
    while t > 1
        prev = cp_prev[t]
        if prev > 1
            push!(changepoints, prev - 1)
        end
        t = prev
    end
    
    return ChangePointResult(
        sort(changepoints);
        cost=F[n + 1],
        algorithm="KernelCPD",
        iterations=total_iterations,
        metadata=Dict{String,Any}(
            "kernel" => kernel,
            "bandwidth" => bandwidth,
            "penalty" => penalty,
            "used_approximation" => false
        )
    )
end

function _kernel_cpd_nystrom(data::Vector{Float64}, penalty::Float64,
                             kernel_func::Function, min_segment::Int, 
                             n_landmarks::Int)
    n = length(data)
    
    landmark_idx = sort(sample(1:n, min(n_landmarks, n), replace=false))
    landmarks = data[landmark_idx]
    m = length(landmarks)
    
    K_nm = zeros(n, m)
    for i in 1:n
        for j in 1:m
            K_nm[i, j] = kernel_func(data[i], landmarks[j])
        end
    end
    
    K_mm = zeros(m, m)
    for i in 1:m
        K_mm[i, i] = kernel_func(landmarks[i], landmarks[i])
        for j in (i+1):m
            K_mm[i, j] = kernel_func(landmarks[i], landmarks[j])
            K_mm[j, i] = K_mm[i, j]
        end
    end
    
    K_mm += Config.VARIANCE_REGULARIZATION * I
    
    eigen_result = eigen(Symmetric(K_mm))
    eigenvalues = max.(eigen_result.values, Config.EPSILON)
    sqrt_inv_eigenvalues = 1.0 ./ sqrt.(eigenvalues)
    
    Φ = K_nm * (eigen_result.vectors * Diagonal(sqrt_inv_eigenvalues))
    
    function approx_kernel_cost(start::Int, stop::Int)
        len = stop - start + 1
        if len < min_segment
            return Inf
        end
        
        mean_features = vec(mean(Φ[start:stop, :], dims=1))
        return -len * dot(mean_features, mean_features)
    end
    
    F = fill(Inf, n + 1)
    F[1] = -penalty
    cp_prev = zeros(Int, n + 1)
    
    for t in min_segment:n
        best_cost = Inf
        best_s = 1
        
        for s in 1:(t - min_segment + 1)
            cost = F[s] + approx_kernel_cost(s, t) + penalty
            if cost < best_cost
                best_cost = cost
                best_s = s
            end
        end
        
        F[t + 1] = best_cost
        cp_prev[t + 1] = best_s
    end
    
    changepoints = Int[]
    t = n + 1
    while t > 1
        prev = cp_prev[t]
        if prev > 1
            push!(changepoints, prev - 1)
        end
        t = prev
    end
    
    return ChangePointResult(
        sort(changepoints);
        cost=F[n + 1],
        algorithm="KernelCPD",
        metadata=Dict{String,Any}(
            "used_approximation" => true,
            "n_landmarks" => m
        )
    )
end

function wbs_detect(data::Vector{Float64}, penalty::Float64;
                    n_intervals::Int=2000, # AUMENTADO: 100 es insuficiente, estándar es 2000-5000
                    min_segment::Int=5,
                    threshold::Float64=0.0,
                    rng::AbstractRNG=MersenneTwister(42))
   println("!!! ESTOY EJECUTANDO LA VERSIÓN NUEVA CON DIFF !!!") 
    validate_data(data)
    n = length(data)
    
    if n < 2 * min_segment
        return ChangePointResult(Int[]; algorithm="WBS", metadata=Dict{String,Any}("reason" => "data_too_short"))
    end

    # 1. Ahora robust_std usa MAD, por lo que este valor será pequeño y correcto
    sigma = robust_std(data)
    
    intervals = Tuple{Int, Int}[]
    attempts = 0
    max_attempts = n_intervals * 10
    
    while length(intervals) < n_intervals && attempts < max_attempts
        attempts += 1
        s = rand(rng, 1:n)
        e = rand(rng, s:n)
        if e - s + 1 >= 2 * min_segment
            push!(intervals, (s, e))
        end
    end
    push!(intervals, (1, n)) 
    
    cumsum_data = cumsum(data)
    
    function cusum_stat(s::Int, e::Int, t::Int)
        n_left = t - s + 1
        n_right = e - t
        n_total = e - s + 1
        
        if n_left < min_segment || n_right < min_segment
            return -Inf
        end
        
        sum_left = (s == 1) ? cumsum_data[t] : cumsum_data[t] - cumsum_data[s - 1]
        sum_right = cumsum_data[e] - cumsum_data[t]
        
        mean_left = sum_left / n_left
        mean_right = sum_right / n_right
        
        raw_stat = sqrt(n_left * n_right / n_total) * abs(mean_left - mean_right)
        
        # Al dividir por un sigma basado en MAD, el stat será mucho más alto en los saltos
        return raw_stat / sigma
    end
    
    # Lógica de Umbral: Si pasas un penalty (ej. BIC), usamos su raíz como umbral
    if threshold > 0
        effective_threshold = threshold
    elseif penalty > 0
        effective_threshold = sqrt(penalty) 
    else
        effective_threshold = sqrt(2 * log(n))
    end
    
    changepoints = Int[]
    max_recursion = Int(ceil(2 * log2(n + 1)))
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n * n_intervals
    
    function wbs_recursive!(start::Int, stop::Int, depth::Int)
        if depth > max_recursion || stop - start + 1 < 2 * min_segment
            return
        end
        
        best_stat = -Inf
        best_t = -1
        
        for (s, e) in intervals
            s_int = max(s, start)
            e_int = min(e, stop)
            if e_int - s_int + 1 < 2 * min_segment; continue; end
            
            for t in (s_int + min_segment - 1):(e_int - min_segment)
                total_iterations += 1
                if total_iterations > max_iterations; return; end
                
                stat = cusum_stat(s_int, e_int, t)
                if stat > best_stat
                    best_stat = stat
                    best_t = t
                end
            end
        end
        
        if best_stat > effective_threshold && best_t > 0
            push!(changepoints, best_t)
            wbs_recursive!(start, best_t, depth + 1)
            wbs_recursive!(best_t + 1, stop, depth + 1)
        end
    end
    
    wbs_recursive!(1, n, 0)
    
    return ChangePointResult(sort(changepoints); cost=0.0, algorithm="WBS", iterations=total_iterations,
        metadata=Dict{String,Any}("penalty" => penalty, "threshold" => effective_threshold, "sigma" => sigma))
end

function segneigh_detect(data::Vector{Float64}, n_changepoints::Int;
                         min_segment::Int=2, 
                         cost_type::String="mean")
    
    validate_data(data)
    validate_non_negative(n_changepoints, "n_changepoints")
    validate_positive(min_segment, "min_segment")
    
    n = length(data)
    k = n_changepoints
    
    if k == 0
        return ChangePointResult(Int[];
                                 algorithm="SegNeigh",
                                 metadata=Dict{String,Any}("reason" => "zero_changepoints_requested"))
    end
    
    validate_min_segment(min_segment, n, k)
    
    cost_func = create_cost_function(data, cost_type)
    
    F = fill(Inf, n + 1, k + 2)
    F[1, 1] = 0.0
    
    cp_prev = zeros(Int, n + 1, k + 2)
    
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n^2 * (k + 1)
    
    for j in 2:(k + 2)
        for t in (j * min_segment):n
            best_cost = Inf
            best_s = -1
            
            for s in ((j - 1) * min_segment):(t - min_segment)
                total_iterations += 1
                if total_iterations > max_iterations
                    @warn "SegNeigh: Maximum iterations reached"
                    break
                end
                
                if F[s + 1, j - 1] < Inf
                    seg_cost = segment_cost(cost_func, s + 1, t)
                    cost = F[s + 1, j - 1] + seg_cost
                    
                    if cost < best_cost
                        best_cost = cost
                        best_s = s
                    end
                end
            end
            
            F[t + 1, j] = best_cost
            cp_prev[t + 1, j] = best_s + 1
        end
    end
    
    if F[n + 1, k + 2] == Inf
        @warn "No valid segmentation found with $k changepoints"
        return ChangePointResult(Int[];
                                 algorithm="SegNeigh",
                                 converged=false,
                                 metadata=Dict{String,Any}("reason" => "no_valid_segmentation"))
    end
    
    changepoints = Int[]
    t = n + 1
    j = k + 2
    
    backtrack_iterations = 0
    while j > 1 && t > 1
        backtrack_iterations += 1
        if backtrack_iterations > n + k + 2
            throw(NumericalInstabilityError("Infinite loop in backtracking", "SegNeigh backtrack"))
        end
        
        prev = cp_prev[t, j]
        if prev > 1
            push!(changepoints, prev - 1)
        end
        t = prev
        j -= 1
    end
    
    return ChangePointResult(
        sort(changepoints);
        cost=F[n + 1, k + 2],
        algorithm="SegNeigh",
        iterations=total_iterations,
        metadata=Dict{String,Any}(
            "n_changepoints" => k,
            "cost_type" => cost_type,
            "min_segment" => min_segment
        )
    )
end

function pelt_multivariate(data::Matrix{Float64}, penalty::Float64, 
                           min_segment::Int;
                           regularization::Float64=1e-6)
    
    n, d = size(data)
    
    if n < 1
        throw(ValidationError("Data matrix cannot be empty", "data", "empty matrix"))
    end
    if d < 1
        throw(ValidationError("Data must have at least one dimension", "data", "d=$d"))
    end
    
    if any(isnan, data)
        throw(ValidationError("Data contains NaN values", "data", "contains NaN"))
    end
    if any(isinf, data)
        throw(ValidationError("Data contains Inf values", "data", "contains Inf"))
    end
    
    validate_non_negative(penalty, "penalty")
    validate_positive(min_segment, "min_segment")
    validate_positive(regularization, "regularization")
    
    min_for_cov = max(min_segment, d + 1)
    
    if n < 2 * min_for_cov
        return ChangePointResult(Int[];
                                 algorithm="PELT-MV",
                                 metadata=Dict{String,Any}("reason" => "data_too_short"))
    end
    
    cumsum_data = cumsum(data, dims=1)
    
    function mv_segment_cost(start::Int, stop::Int)
        len = stop - start + 1
        if len < min_for_cov
            return Inf
        end
        
        segment = data[start:stop, :]
        
        if len <= d
            vars = var(segment, dims=1)
            vars = max.(vars, regularization)
            return len * sum(log.(vars))
        end
        
        Σ = cov(segment)
        
        Σ += regularization * I
        
        cond_num = cond(Σ)
        if cond_num > Config.CONDITION_NUMBER_THRESHOLD
            @warn "Ill-conditioned covariance matrix" condition_number=cond_num start=start stop=stop
            vars = diag(Σ)
            vars = max.(vars, regularization)
            return len * sum(log.(vars))
        end
        
        return len * logdet(Σ)
    end
    
    F = fill(Inf, n + 1)
    F[1] = -penalty
    cp_prev = zeros(Int, n + 1)
    R = [1]
    
    total_iterations = 0
    max_iterations = Config.MAX_ITERATIONS_FACTOR * n^2
    
    for t in min_for_cov:n
        best_cost = Inf
        best_s = 1
        
        for s in R
            total_iterations += 1
            if total_iterations > max_iterations
                @warn "PELT-MV: Maximum iterations reached"
                break
            end
            
            if t - s + 1 >= min_for_cov
                cost = F[s] + mv_segment_cost(s, t) + penalty
                if !isnan(cost) && !isinf(cost) && cost < best_cost
                    best_cost = cost
                    best_s = s
                end
            end
        end
        
        F[t + 1] = best_cost
        cp_prev[t + 1] = best_s
        
        R_new = filter(s -> F[s] <= F[t + 1] + penalty, R)
        push!(R_new, t + 1)
        R = R_new
    end
    
    changepoints = Int[]
    t = n + 1
    while t > 1
        prev = cp_prev[t]
        if prev > 1
            push!(changepoints, prev - 1)
        end
        t = prev
    end
    
    return ChangePointResult(
        sort(changepoints);
        cost=F[n + 1],
        algorithm="PELT-MV",
        iterations=total_iterations,
        metadata=Dict{String,Any}(
            "dimensions" => d,
            "penalty" => penalty,
            "regularization" => regularization
        )
    )
end

function suggest_penalty(n::Int, method::String="BIC")
    validate_positive(n, "n")
    
    method_upper = uppercase(method)
    
    if method_upper == "BIC"
        return 2 * log(n)
    elseif method_upper == "MBIC" || method_upper == "MBIC1"
        return log(n)^1.5
    elseif method_upper == "MBIC2"
        return 3 * log(n)
    elseif method_upper == "AIC"
        @warn "AIC penalty often leads to overfitting in changepoint detection"
        return 2.0
    elseif method_upper == "HANNAN-QUINN" || method_upper == "HQ"
        return 2 * log(log(n))
    else
        @warn "Unknown penalty method '$method', using BIC"
        return 2 * log(n)
    end
end

function evaluate_segmentation(data::Vector{Float64}, changepoints::Vector{Int};
                               cost_type::String="mean")
    validate_data(data)
    n = length(data)
    
    cost_func = create_cost_function(data, cost_type)
    
    boundaries = [0; sort(changepoints); n]
    k = length(changepoints)
    
    segment_costs = Float64[]
    total_cost = 0.0
    
    for i in 1:(length(boundaries) - 1)
        start = boundaries[i] + 1
        stop = boundaries[i + 1]
        
        if stop >= start
            seg_cost = segment_cost(cost_func, start, stop)
            push!(segment_costs, seg_cost)
            total_cost += seg_cost
        end
    end
    
    bic = total_cost + k * log(n)
    
    return (total_cost=total_cost, segment_costs=segment_costs, bic=bic, 
            n_changepoints=k, n_segments=k+1)
end

function __init__()
    @info "RegimeChangeJulia v2.0.0 (Hardened Edition) loaded"
    @info "Available algorithms: PELT, FPOP, BOCPD, CUSUM, KernelCPD, WBS, SegNeigh"
end

end