## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(dplyr) # For data manipulation library(survey) # For complex survey analysis library(srvyr) # For complex survey analysis with dplyr syntax library(svrep) # Load and inspect the data data("lou_vax_survey", package = 'svrep') head(lou_vax_survey) colnames(lou_vax_survey) ## ----------------------------------------------------------------------------- lou_vax_survey |> count(RESPONSE_STATUS) |> mutate(pct = n/sum(n)) ## ----------------------------------------------------------------------------- # Describe the survey design lou_vax_survey <- svydesign(ids = ~ 1, weights = ~ SAMPLING_WEIGHT, data = lou_vax_survey) print(lou_vax_survey) # Create appropriate replicate weights lou_vax_survey <- lou_vax_survey |> as_bootstrap_design(replicates = 100, mse = TRUE, type = "Rao-Wu-Yue-Beaumont") print(lou_vax_survey) ## ----------------------------------------------------------------------------- lou_vax_survey <- lou_vax_survey |> as_survey() print(lou_vax_survey) ## ----------------------------------------------------------------------------- # Weights before adjustment lou_vax_survey |> group_by(RESPONSE_STATUS) |> cascade( `Sum of Weights` = sum(cur_svy_wts()), .fill = "TOTAL" ) ## ----------------------------------------------------------------------------- # Conduct a basic nonresponse adjustment nr_adjusted_survey <- lou_vax_survey |> redistribute_weights( reduce_if = RESPONSE_STATUS == "Nonrespondent", increase_if = RESPONSE_STATUS == "Respondent" ) ## ----------------------------------------------------------------------------- # Check the sum of full-sample weights by response status nr_adjusted_survey |> group_by(RESPONSE_STATUS) |> cascade( `Sum of Weights` = sum(cur_svy_wts()), .fill = "TOTAL" ) ## ----------------------------------------------------------------------------- # Check sums of replicate weights by response status nr_adjusted_survey |> summarize_rep_weights( type = "specific", by = "RESPONSE_STATUS" ) |> arrange(Rep_Column, RESPONSE_STATUS) |> head(10) ## ----------------------------------------------------------------------------- lou_vax_survey |> group_by(RACE_ETHNICITY) |> summarize(Response_Rate = mean(RESPONSE_STATUS == "Respondent"), Sample_Size = n(), n_Respondents = sum(RESPONSE_STATUS == "Respondent")) ## ----------------------------------------------------------------------------- nr_adjusted_survey <- lou_vax_survey |> redistribute_weights( reduce_if = RESPONSE_STATUS == "Nonrespondent", increase_if = RESPONSE_STATUS == "Respondent", by = c("RACE_ETHNICITY") ) ## ----------------------------------------------------------------------------- # Fit a response propensity model response_propensity_model <- lou_vax_survey |> mutate(IS_RESPONDENT = ifelse(RESPONSE_STATUS == "Respondent", 1, 0)) |> svyglm(formula = IS_RESPONDENT ~ RACE_ETHNICITY + EDUC_ATTAINMENT, family = quasibinomial(link = 'logit')) # Predict response propensities for individual cases lou_vax_survey <- lou_vax_survey |> mutate( RESPONSE_PROPENSITY = predict(response_propensity_model, newdata = cur_svy(), type = "response") ) # Divide sample into propensity classes lou_vax_survey <- lou_vax_survey |> mutate(PROPENSITY_CELL = ntile(x = RESPONSE_PROPENSITY, n = 5)) lou_vax_survey |> group_by(PROPENSITY_CELL) |> summarize(n = n(), min = min(RESPONSE_PROPENSITY), mean = mean(RESPONSE_PROPENSITY), max = max(RESPONSE_PROPENSITY)) # Redistribute weights by propensity class nr_adjusted_survey <- lou_vax_survey |> redistribute_weights( reduce_if = RESPONSE_STATUS == "Nonrespondent", increase_if = RESPONSE_STATUS == "Respondent", by = "PROPENSITY_CELL" ) # Inspect weights before adjustment lou_vax_survey |> summarize_rep_weights(type = "specific", by = c("PROPENSITY_CELL")) |> arrange(Rep_Column, PROPENSITY_CELL) |> select(PROPENSITY_CELL, Rep_Column, N_NONZERO, SUM) |> head(10) # Inspect weights after adjustment nr_adjusted_survey |> summarize_rep_weights(type = "specific", by = c("PROPENSITY_CELL", "RESPONSE_STATUS")) |> arrange(Rep_Column, PROPENSITY_CELL, RESPONSE_STATUS) |> select(PROPENSITY_CELL, RESPONSE_STATUS, Rep_Column, N_NONZERO, SUM) |> head(10) ## ----------------------------------------------------------------------------- data_frame_with_nr_adjusted_weights <- nr_adjusted_survey |> as_data_frame_with_weights( full_wgt_name = "NR_ADJ_WGT", rep_wgt_prefix = "NR_ADJ_REP_WGT_" ) # Preview first few column names colnames(data_frame_with_nr_adjusted_weights) |> head(12) ## ----eval=FALSE--------------------------------------------------------------- # # Write the data to a CSV file # write.csv( # x = data_frame_with_nr_adjusted_weights, # file = "survey-data-with-nonresponse-adjusted-weights.csv" # )