% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lead_data_loaders.R
\name{load_cohort_data}
\alias{load_cohort_data}
\title{Load DOE LEAD Tool Cohort Data}
\usage{
load_cohort_data(
  dataset = c("ami", "fpl"),
  states = NULL,
  counties = NULL,
  vintage = "2022",
  income_brackets = NULL,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{dataset}{Character, either "ami" (Area Median Income) or "fpl"
(Federal Poverty Line)}

\item{states}{Character vector of state abbreviations to filter by (optional)}

\item{counties}{Character vector of county names or FIPS codes to filter by (optional).
County names are matched case-insensitively. Requires \code{states} to be specified.}

\item{vintage}{Character, data vintage: "2018" or "2022" (default "2022")}

\item{income_brackets}{Character vector of income brackets to filter by (optional)}

\item{verbose}{Logical, print status messages (default TRUE)}

\item{...}{Additional filter expressions passed to dplyr::filter() for dynamic filtering.
Allows filtering by any column in the dataset using tidyverse syntax.
Example: \verb{households > 100, total_income > 50000}}
}
\value{
A tibble with columns:
\itemize{
\item geoid: Census tract identifier
\item income_bracket: Income bracket label
\item households: Number of households
\item total_income: Total household income ($)
\item total_electricity_spend: Total electricity spending ($)
\item total_gas_spend: Total gas spending ($)
\item total_other_spend: Total other fuel spending ($)
\item TEN: Housing tenure category (1=Owned free/clear, 2=Owned with mortgage,
3=Rented, 4=Occupied without rent). Enables analysis of energy burden
differences between renters and owners.
\item TEN-YBL6: Housing tenure crossed with year structure built (6 categories).
Allows analysis of how building age and ownership status interact to affect
energy burden (e.g., older rental units vs newer owner-occupied homes).
\item TEN-BLD: Housing tenure crossed with building type (e.g., single-family,
multi-unit). Enables analysis of energy burden across different housing
structures and ownership patterns.
\item TEN-HFL: Housing tenure crossed with primary heating fuel type (e.g., gas,
electric, oil). Critical for analyzing how heating fuel choice and tenure
status jointly influence energy costs and burden.
}
}
\description{
Load household energy burden cohort data with automatic fallback:
\enumerate{
\item Try local database
\item Fall back to local CSV files
\item Auto-download from OpenEI if neither exists
\item Auto-import downloaded data to database for future use
}
}
\examples{
\donttest{
# Single state (fast, good for learning)
nc_ami <- load_cohort_data(dataset = "ami", states = "NC")

# Load specific vintage
nc_2018 <- load_cohort_data(dataset = "ami", states = "NC", vintage = "2018")
}

\donttest{
if (interactive()) {
  # Multiple states (regional analysis - requires data download)
  southeast <- load_cohort_data(dataset = "fpl", states = c("NC", "SC", "GA", "FL"))

  # Nationwide (all 51 states - no filter)
  us_data <- load_cohort_data(dataset = "ami", vintage = "2022")

# Filter to specific income brackets
low_income <- load_cohort_data(
  dataset = "ami",
  states = "NC",
  income_brackets = c("0-30\% AMI", "30-50\% AMI")
)

# Filter to specific counties within a state
triangle <- load_cohort_data(
  dataset = "fpl",
  states = "NC",
  counties = c("Orange", "Durham", "Wake")
)

# Or use county FIPS codes
orange <- load_cohort_data(
  dataset = "fpl",
  states = "NC",
  counties = "37135"
)

# Use dynamic filtering for custom criteria
high_burden <- load_cohort_data(
  dataset = "ami",
  states = "NC",
  households > 100,
  total_electricity_spend / total_income > 0.06
)

# Analyze energy burden by housing characteristics
# Compare renters vs owners by heating fuel type
nc_housing <- load_cohort_data(dataset = "ami", states = "NC")
library(dplyr)

# Group by tenure and heating fuel to analyze energy burden patterns
housing_analysis <- nc_housing \%>\%
  filter(!is.na(TEN), !is.na(`TEN-HFL`)) \%>\%
  group_by(TEN, `TEN-HFL`) \%>\%
  summarise(
    total_households = sum(households),
    avg_energy_burden = weighted.mean(
      (total_electricity_spend + total_gas_spend + total_other_spend) / total_income,
      w = households,
      na.rm = TRUE
    ),
    .groups = "drop"
  )
}
}
}
