--- title: "Combining Cohorts" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{a08_combine_cohorts} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(CohortConstructor) library(CohortCharacteristics) library(ggplot2) ``` ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, eval = TRUE, message = FALSE, warning = FALSE, comment = "#>" ) library(CDMConnector) library(dplyr, warn.conflicts = FALSE) if (Sys.getenv("EUNOMIA_DATA_FOLDER") == ""){ Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomia"))} if (!dir.exists(Sys.getenv("EUNOMIA_DATA_FOLDER"))){ dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER")) downloadEunomiaData() } ``` For this example we'll use the Eunomia synthetic data from the CDMConnector package. ```{r} con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = c(prefix = "my_study_", schema = "main")) ``` Let's start by creating two drug cohorts, one for users of diclofenac and another for users of acetaminophen. ```{r} cdm$medications <- conceptCohort(cdm = cdm, conceptSet = list("diclofenac" = 1124300, "acetaminophen" = 1127433), name = "medications") cohortCount(cdm$medications) ``` To check whether there is an overlap between records in both cohorts using the function `intersectCohorts()`. ```{r} cdm$medintersect <- CohortConstructor::intersectCohorts( cohort = cdm$medications, name = "medintersect" ) cohortCount(cdm$medintersect) ``` There are 6 individuals who had overlapping records in the diclofenac and acetaminophen cohorts. ```{r, include=FALSE, warning=FALSE} con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", writeSchema = "main", writePrefix = "my_study_") cdm$medications <- conceptCohort(cdm = cdm, conceptSet = list("diclofenac" = 1124300, "acetaminophen" = 1127433), name = "medications") ``` We can choose the number of days between cohort entries using the `gap` argument. ```{r} cdm$medintersect <- CohortConstructor::intersectCohorts( cohort = cdm$medications, gap = 365, name = "medintersect" ) cohortCount(cdm$medintersect) ``` There are 94 individuals who had overlapping records (within 365 days) in the diclofenac and acetaminophen cohorts. We can also combine different cohorts using the function `unionCohorts()`. ```{r} cdm$medunion <- CohortConstructor::unionCohorts( cohort = cdm$medications, name = "medunion" ) cohortCount(cdm$medunion) ``` We have now created a new cohort which includes individuals in either the diclofenac cohort or the acetaminophen cohort. ```{r, include=FALSE, warning=FALSE} con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = c(prefix = "my_study_", schema = "main")) cdm$medications <- conceptCohort(cdm = cdm, conceptSet = list("diclofenac" = 1124300, "acetaminophen" = 1127433), name = "medications") ``` You can keep the original cohorts in the new table if you use the argument `keepOriginalCohorts = TRUE`. ```{r} cdm$medunion <- CohortConstructor::unionCohorts( cohort = cdm$medications, name = "medunion", keepOriginalCohorts = TRUE ) cohortCount(cdm$medunion) ``` You can also choose the number of days between two subsequent cohort entries to be merged using the `gap` argument. ```{r, include=FALSE, warning=FALSE} con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) cdm <- cdmFromCon(con, cdmSchema = "main", writeSchema = c(prefix = "my_study_", schema = "main")) cdm$medications <- conceptCohort(cdm = cdm, conceptSet = list("diclofenac" = 1124300, "acetaminophen" = 1127433), name = "medications") ``` ```{r} cdm$medunion <- CohortConstructor::unionCohorts( cohort = cdm$medications, name = "medunion", gap = 365, keepOriginalCohorts = TRUE ) cohortCount(cdm$medunion) ```