## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----eval=FALSE---------------------------------------------------------------
# # Install from github
# devtools::install_github('klintkanopka/mixedsubjects')

## ----load-package, echo=FALSE, message=FALSE----------------------------------
# For the vignette, we source the files directly

library(mixedsubjects)

pkg_dir <- system.file(package = "mixedsubjects")
if (pkg_dir == "") {
  # Running from source
  pkg_dir <- "../R"
  for (f in list.files(pkg_dir, pattern = "\\.R$", full.names = TRUE)) {
    source(f)
  }
}

## ----simulate-data------------------------------------------------------------
set.seed(123)

# True treatment effect
true_ate <- 0.3

# Human subjects data (observed)
n_observed <- 200
observed_df <- data.frame(
  # Treatment assignment (balanced)
  D = rep(c(1, 0), each = n_observed / 2)
)

# Generate outcomes: Y(0) ~ N(0, 1), Y(1) ~ N(0.3, 1)
observed_df$Y <- ifelse(
  observed_df$D == 1,
  rnorm(n_observed / 2, mean = true_ate, sd = 1),
  rnorm(n_observed / 2, mean = 0, sd = 1)
)

# LLM predictions (correlated with true outcomes, but with some error)
# S^(1) predicts Y(1), S^(0) predicts Y(0)
observed_df$S1 <- 0.6 * observed_df$Y + rnorm(n_observed, 0, 0.5)
observed_df$S0 <- 0.5 * observed_df$Y + rnorm(n_observed, 0, 0.6)

# Unobserved units (only have predictions, no actual Y)
# Predictions must come from the same pipeline as observed (Assumption C: Random Labeling)
n_unobserved <- 1000
D_unobs <- rep(c(1, 0), each = n_unobserved / 2)
latent_Y <- ifelse(D_unobs == 1,
                   rnorm(n_unobserved / 2, mean = true_ate, sd = 1),
                   rnorm(n_unobserved / 2, mean = 0, sd = 1))
unobserved_df <- data.frame(
  D = D_unobs,
  S1 = 0.6 * latent_Y + rnorm(n_unobserved, 0, 0.5),
  S0 = 0.5 * latent_Y + rnorm(n_unobserved, 0, 0.6)
)

## ----create-msd-data----------------------------------------------------------
msd <- msd_data(observed = observed_df, unobserved = unobserved_df)
print(msd)

## ----estimate-ate-------------------------------------------------------------
# Classical difference-in-means (ignores predictions)
result_dim <- msd_dim(msd)

# D-T DiP (uses both predictions with arm-specific tuning)
result_dt_dip <- msd_dt_dip(msd)

# Compare
cat("Difference-in-Means:\n")
cat("  Estimate:", round(result_dim$estimate, 3), "\n")
cat("  SE:", round(result_dim$se, 3), "\n")
cat("  95% CI: [", round(result_dim$ci_lower, 3), ", ",
    round(result_dim$ci_upper, 3), "]\n\n")

cat("D-T DiP:\n")
cat("  Estimate:", round(result_dt_dip$estimate, 3), "\n")
cat("  SE:", round(result_dt_dip$se, 3), "\n")
cat("  95% CI: [", round(result_dt_dip$ci_lower, 3), ", ",
    round(result_dt_dip$ci_upper, 3), "]\n")

## ----data-format-1, eval=FALSE------------------------------------------------
# msd <- msd_data(
#   observed = observed_df,    # Must have Y, D, and optionally S0, S1
#   unobserved = unobserved_df # Must have D and S0, S1 (no Y)
# )

## ----data-format-2------------------------------------------------------------
# Combine into single dataframe
combined_df <- rbind(
  observed_df,
  data.frame(Y = NA, D = unobserved_df$D,
             S0 = unobserved_df$S0, S1 = unobserved_df$S1)
)

# Create msd_data object
msd_combined <- msd_data(data = combined_df)
print(msd_combined)

## ----custom-columns, eval=FALSE-----------------------------------------------
# # Custom column names
# my_data <- data.frame(
#   response_var = rnorm(100),
#   is_treated = rep(c(1, 0), each = 50),
#   claude_pred_ctrl = rnorm(100),
#   claude_pred_trt = rnorm(100)
# )
# 
# msd <- msd_data(
#   observed = my_data,
#   outcome = "response_var",
#   treatment = "is_treated",
#   pred_control = "claude_pred_ctrl",
#   pred_treated = "claude_pred_trt"
# )

## ----different-columns, eval=FALSE--------------------------------------------
# msd <- msd_data(
#   observed = obs_df,
#   unobserved = unobs_df,
#   obs_outcome = "Y",
#   obs_treatment = "treated",
#   obs_pred_control = "gpt_pred_0",
#   obs_pred_treated = "gpt_pred_1",
#   unobs_treatment = "D",
#   unobs_pred_control = "S0",
#   unobs_pred_treated = "S1"
# )

## ----formula-example, eval=FALSE----------------------------------------------
# # Formula: outcome ~ treatment | predictions
# # Using custom column names directly in the estimator
# 
# # For GREG-type estimators (single prediction per arm)
# result <- msd_greg(response ~ treatment | pred_1 + pred_0,
#                    observed = obs_df, unobserved = unobs_df)
# 
# # For DiP-type estimators (both predictions)
# result <- msd_dt_dip(Y ~ D | S1 + S0,
#                      observed = obs_df, unobserved = unobs_df)

## ----formula-dim, eval=FALSE--------------------------------------------------
# result <- msd_dim(response ~ treated, observed = obs_df)

## ----dim-example--------------------------------------------------------------
result <- msd_dim(msd)
print(result)

## ----greg-example-------------------------------------------------------------
result <- msd_greg(msd)
print(result)

## ----ppi-example--------------------------------------------------------------
result <- msd_ppi(msd, n_folds = 2)
print(result)

## ----dt-example---------------------------------------------------------------
result <- msd_dt(msd, n_folds = 2)
print(result)

## ----dip-example--------------------------------------------------------------
result <- msd_dip(msd)
print(result)

## ----dip-pp-example-----------------------------------------------------------
result <- msd_dip_pp(msd, n_folds = 2)
print(result)

## ----dt-dip-example-----------------------------------------------------------
result <- msd_dt_dip(msd, n_folds = 2)
print(result)

## ----estimate-all-------------------------------------------------------------
all_results <- estimate_all(msd)
print(all_results)

## ----check-correlation--------------------------------------------------------
cor(unobserved_df$S1, unobserved_df$S0)

## ----bootstrap-example--------------------------------------------------------
# Bootstrap variance for D-T DiP
boot_result <- bootstrap_variance(
  msd,
  estimator = "dt_dip",
  n_bootstrap = 500,
  seed = 42
)

cat("Delta-method SE:", round(result_dt_dip$se, 4), "\n")
cat("Bootstrap SE:", round(boot_result$se, 4), "\n")

## ----optimal-design-----------------------------------------------------------
design <- optimal_design(
  pilot_data = msd,          # Pilot study data
  budget = 5000,             # Total budget in dollars
  cost_human = 5,            # $5 per human response
  cost_prediction = 0.01,    # $0.01 per LLM prediction
  treatment_prob = 0.5       # Balanced treatment assignment
)

print(design)

## ----workflow-pilot, eval=FALSE-----------------------------------------------
# # Collect pilot data
# pilot_observed <- collect_human_responses(n = 100)
# pilot_observed$S1 <- get_llm_predictions(pilot_observed, condition = "treatment")
# pilot_observed$S0 <- get_llm_predictions(pilot_observed, condition = "control")
# 
# pilot_unobserved <- generate_synthetic_units(n = 200)
# pilot_unobserved$S1 <- get_llm_predictions(pilot_unobserved, condition = "treatment")
# pilot_unobserved$S0 <- get_llm_predictions(pilot_unobserved, condition = "control")
# 
# pilot_msd <- msd_data(observed = pilot_observed, unobserved = pilot_unobserved)

## ----workflow-quality, eval=FALSE---------------------------------------------
# # Check correlations
# pilot_results <- estimate_all(pilot_msd)
# print(pilot_results)
# 
# # Compare to DiM baseline
# dim_se <- pilot_results$SE[pilot_results$Estimator == "Difference-in-Means (DiM)"]
# best_se <- min(pilot_results$SE)
# variance_reduction <- 1 - (best_se / dim_se)^2
# cat("Potential variance reduction:", round(variance_reduction * 100, 1), "%\n")

## ----workflow-design, eval=FALSE----------------------------------------------
# design <- optimal_design(
#   pilot_data = pilot_msd,
#   budget = 10000,
#   cost_human = 5,
#   cost_prediction = 0.01
# )
# print(design)

## ----workflow-main, eval=FALSE------------------------------------------------
# # Collect human responses
# main_observed <- collect_human_responses(n = design$optimal_n_obs)
# 
# # Generate LLM predictions
# main_observed$S1 <- get_llm_predictions(main_observed, "treatment")
# main_observed$S0 <- get_llm_predictions(main_observed, "control")
# 
# main_unobserved <- generate_synthetic_units(n = design$optimal_n_unobs)
# main_unobserved$S1 <- get_llm_predictions(main_unobserved, "treatment")
# main_unobserved$S0 <- get_llm_predictions(main_unobserved, "control")
# 
# main_msd <- msd_data(observed = main_observed, unobserved = main_unobserved)

## ----workflow-analyze, eval=FALSE---------------------------------------------
# # Run the recommended estimator
# result <- msd_dt_dip(main_msd)  # Or whichever was recommended
# print(result)
# 
# # Optionally verify with bootstrap
# boot_result <- bootstrap_variance(main_msd, "dt_dip", n_bootstrap = 1000)

## ----session-info-------------------------------------------------------------
sessionInfo()