When creating multiple cohorts we might be interested in the overlap between them. That is, how many individuals appear in multiple cohorts. CohortCharacteristics provides functions to generate such estimates and then summarise these estimates in tables and plots.

To see how this works let’s create a few medication cohorts with the Eunomia synthetic dataset.

library(CDMConnector)
library(CodelistGenerator)
library(CohortCharacteristics)
library(dplyr)
library(ggplot2)

con <- DBI::dbConnect(duckdb::duckdb(),
  dbdir = CDMConnector::eunomia_dir()
)
cdm <- CDMConnector::cdm_from_con(con,
  cdm_schem = "main",
  write_schema = "main",
  cdm_name = "Eunomia"
)

meds_cs <- getDrugIngredientCodes(
  cdm = cdm,
  name = c(
    "acetaminophen",
    "morphine",
    "warfarin"
  )
)

cdm <- generateConceptCohortSet(
  cdm = cdm,
  name = "meds",
  conceptSet = meds_cs,
  end = "event_end_date",
  limit = "all",
  overwrite = TRUE
)

settings(cdm$meds)
#> # A tibble: 3 × 6
#>   cohort_definition_id cohort_name   limit prior_observation future_observation
#>                  <int> <chr>         <chr>             <dbl>              <dbl>
#> 1                    1 morphine      all                   0                  0
#> 2                    2 acetaminophen all                   0                  0
#> 3                    3 warfarin      all                   0                  0
#> # ℹ 1 more variable: end <chr>
cohortCount(cdm$meds)
#> # A tibble: 3 × 3
#>   cohort_definition_id number_records number_subjects
#>                  <int>          <int>           <int>
#> 1                    1             35              35
#> 2                    2          13908            2679
#> 3                    3            137             137

Now we have our cohorts we can summarise the overlap between them.

meds_overlap <- cdm$meds |>
  summariseCohortOverlap()
meds_overlap |>
  glimpse()
#> Rows: 36
#> Columns: 13
#> $ result_id        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ cdm_name         <chr> "Eunomia", "Eunomia", "Eunomia", "Eunomia", "Eunomia"…
#> $ group_name       <chr> "cohort_name_reference &&& cohort_name_comparator", "…
#> $ group_level      <chr> "acetaminophen &&& morphine", "acetaminophen &&& morp…
#> $ strata_name      <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ strata_level     <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ variable_name    <chr> "overlap", "reference", "comparator", "overlap", "ref…
#> $ variable_level   <chr> "number_subjects", "number_subjects", "number_subject…
#> $ estimate_name    <chr> "count", "count", "count", "count", "count", "count",…
#> $ estimate_type    <chr> "integer", "integer", "integer", "integer", "integer"…
#> $ estimate_value   <chr> "35", "2644", "0", "136", "1", "2543", "35", "0", "26…
#> $ additional_name  <chr> "overall", "overall", "overall", "overall", "overall"…
#> $ additional_level <chr> "overall", "overall", "overall", "overall", "overall"…

We have table and plotting functions to help view our results.

tableCohortOverlap(meds_overlap)
CDM name Cohort name reference Cohort name comparator Estimate name Only in reference cohort In both cohorts Only in comparator cohort
Eunomia Acetaminophen Morphine N (%) 2,644 (98.69%) 35 (1.31%) 0 (0.00%)
Warfarin N (%) 2,543 (94.89%) 136 (5.07%) 1 (0.04%)
Morphine Warfarin N (%) 29 (17.47%) 6 (3.61%) 131 (78.92%)
plotCohortOverlap(meds_overlap)

As well as generating these estimates for cohorts overall, we can also obtain stratified estimates. In this example we’ll add age groups to our cohort table, and then obtain estimates stratified by these groups.

cdm$meds <- cdm$meds |>
  PatientProfiles::addAge(ageGroup = list(c(0, 49), c(50, 150))) |>
  compute(temporary = FALSE, name = "meds") |>
  newCohortTable()
meds_overlap <- cdm$meds |>
  summariseCohortOverlap(strata = list("age_group"))

As with our overall results, we can quickly create tables and figures to view our stratified results.

tableCohortOverlap(meds_overlap)
Age group
CDM name Cohort name reference Cohort name comparator Estimate name 0 to 49 50 to 150 Overall
Only in reference cohort In both cohorts Only in comparator cohort Only in reference cohort In both cohorts Only in comparator cohort Only in reference cohort In both cohorts Only in comparator cohort
Eunomia Acetaminophen Morphine N (%) 2,636 (99.06%) 25 (0.94%) 0 (0.00%) 1,117 (99.11%) 10 (0.89%) 0 (0.00%) 2,644 (98.69%) 35 (1.31%) 0 (0.00%)
Warfarin N (%) 2,653 (99.66%) 8 (0.30%) 1 (0.04%) 1,025 (88.90%) 102 (8.85%) 26 (2.25%) 2,543 (94.89%) 136 (5.07%) 1 (0.04%)
Morphine Warfarin N (%) 0 (0.00%) 0 (0.00%) 0 (0.00%) 4 (3.03%) 6 (4.55%) 122 (92.42%) 29 (17.47%) 6 (3.61%) 131 (78.92%)
plotCohortOverlap(meds_overlap,
  facet = "strata_level"
)