## ----eval = FALSE, message=FALSE-------------------------------------------------------------------------------------- # library(crunch) ## ----results='hide', echo=FALSE, message=FALSE------------------------------------------------------------------------ library(crunch) set_crunch_opts("crunch.api" = "https://team.crunch.io/api/") library(httptest) run_cleanup <- !dir.exists("fork-and-merge") ds_to_delete <- c() httpcache::clearCache() start_vignette("fork-and-merge") # The usual redactor does not work, because the end of the UUIDs for variables # are stable between datasets (the first variable ends with "000000") # and so we have a clash when using variables from merged dataests # So this redactor should be the same as the other except for the variable UUID handling # (For context, we have to redact so we don't get paths that are too long to subtmit to CRAN) # # Eventually this should probably be the default, but it only really affects # this vignette and I don't feel like fixing everything (other vignettes, as well as crplyr?) set_redactor(function(response) { ## Remove multipart form fields because POST sources/ sends a tmpfile path ## that's different every time, so the request will never match. response$request$fields <- NULL response %>% redact_auth() %>% gsub_response("([0-9a-f]{6})[0-9a-f]{26}", "\\1") %>% ## Prune UUIDs httptest::gsub_response("([0-9A-Za-z]{4})[0-9A-Za-z]{18}[0-9]{4}([0-9]{2})", "\\1\\2") %>% # UUIDs in variables now too gsub_response( "https.//(app|team).crunch.io/api/progress/[^\"].*?/", "https://\\1.crunch.io/api/progress/" ) %>% ## Progress is meaningless in mocks gsub_response("https.//(app|team).crunch.io", "") %>% ## Shorten URL gsub_response("https%3A%2F%2F(app|team).crunch.io", "") ## Shorten encoded URL }) set_requester(function(request) { request$fields <- NULL request %>% gsub_request("https.//(app|team).crunch.io", "") ## Shorten URL }) ## ----message=FALSE, results='hide'------------------------------------------------------------------------------------ my_project <- newProject("examples/rcrunch vignette data") ds <- newDataset(SO_survey, "stackoverflow_survey", project = my_project) ## ----forcevarcat1, include=FALSE-------------------------------------------------------------------------------------- # Force catalog so that catalog is loaded at a consistent time ds <- forceVariableCatalog(ds) ds_to_delete[length(ds_to_delete) + 1] <- self(ds) ## ----fork dataset----------------------------------------------------------------------------------------------------- forked_ds <- forkDataset(ds, project = "examples/rcrunch vignette data") ## ----forcevarcat2, include=FALSE-------------------------------------------------------------------------------------- # Force catalog so that catalog is loaded at a consistent time forked_ds <- forceVariableCatalog(forked_ds) ds_to_delete[length(ds_to_delete) + 1] <- self(forked_ds) ## ----state change1, include=FALSE------------------------------------------------------------------------------------- change_state() ## ----create Multiple Response Variable-------------------------------------------------------------------------------- forked_ds$ImportantHiringCA <- makeArray(forked_ds[, c("ImportantHiringTechExp", "ImportantHiringPMExp")], name = "importantCatArray") ## ----forcevarcat3, include=FALSE-------------------------------------------------------------------------------------- # Force catalog so that catalog is loaded at a consistent time forked_ds <- forceVariableCatalog(forked_ds) ## ----compare datasets------------------------------------------------------------------------------------------------- all.equal(names(forked_ds), names(ds)) ## ----state change2, include=FALSE------------------------------------------------------------------------------------- change_state() ## ----merging---------------------------------------------------------------------------------------------------------- ds <- mergeFork(ds, forked_ds) ## ----check successful merge------------------------------------------------------------------------------------------- ds$ImportantHiringCA ## ----state change3, include=FALSE------------------------------------------------------------------------------------- change_state() ## ----upload wave 2, message=FALSE, results='hide'--------------------------------------------------------------------- wave2 <- newDataset(SO_survey, "SO_survey_wave2", project = my_project) ## ----state change4, include=FALSE------------------------------------------------------------------------------------- change_state() ds_to_delete[length(ds_to_delete) + 1] <- self(wave2) ## ----fork-and-append, results='hide'---------------------------------------------------------------------------------- ds_fork <- forkDataset(ds, project = "examples/rcrunch vignette data") ds_fork <- appendDataset(ds_fork, wave2) ## --------------------------------------------------------------------------------------------------------------------- nrow(ds) nrow(ds_fork) ## ----state change5, include=FALSE------------------------------------------------------------------------------------- change_state() ds_to_delete[length(ds_to_delete) + 1] <- self(ds_fork) ## --------------------------------------------------------------------------------------------------------------------- ds <- mergeFork(ds, ds_fork) ## --------------------------------------------------------------------------------------------------------------------- nrow(ds) ## ----state change6, include=FALSE------------------------------------------------------------------------------------- change_state() ## ----create recode data, message=FALSE, results='hide'---------------------------------------------------------------- house_table <- data.frame(Respondent = unique(as.vector(ds$Respondent))) house_table$HouseholdSize <- sample( 1:5, nrow(house_table), TRUE ) house_ds <- newDataset(house_table, "House Size", project = my_project) ## ----fork and merge recode, message=FALSE, results='hide'------------------------------------------------------------- ds_fork <- forkDataset(ds, project = "examples/rcrunch vignette data") ds_fork <- merge(ds_fork, house_ds, by = "Respondent") ## ----state change7, include=FALSE------------------------------------------------------------------------------------- change_state() ds_to_delete[length(ds_to_delete) + 1] <- self(house_ds) ds_to_delete[length(ds_to_delete) + 1] <- self(ds_fork) ## ----check new data--------------------------------------------------------------------------------------------------- crtabs(~ TabsSpaces + HouseholdSize, ds_fork) ## ----state change8, include=FALSE------------------------------------------------------------------------------------- change_state() ## ----final mergeFork-------------------------------------------------------------------------------------------------- ds <- mergeFork(ds, ds_fork) ds$HouseholdSize ## ----results='hide', echo=FALSE, message=FALSE------------------------------------------------------------------------ end_vignette() if (run_cleanup) { for (ds_id in ds_to_delete) with_consent(deleteDataset(ds_id)) with_consent(delete(my_project)) }