Summarise cohort entries

1.1 Introduction

In this example we’re going to summarise the characteristics of individuals with an ankle sprain, ankle fracture, forearm fracture, or a hip fracture using the Eunomia synthetic data.

We’ll begin by creating our study cohorts.

library(CDMConnector)
library(CohortCharacteristics)
library(dplyr)
library(ggplot2)

con <- DBI::dbConnect(duckdb::duckdb(),
  dbdir = CDMConnector::eunomia_dir()
)
cdm <- CDMConnector::cdm_from_con(con,
  cdm_schem = "main",
  write_schema = "main",
  cdm_name = "Eunomia"
)

cdm <- generateConceptCohortSet(
  cdm = cdm,
  name = "injuries",
  conceptSet = list(
    "ankle_sprain" = 81151,
    "ankle_fracture" = 4059173,
    "forearm_fracture" = 4278672,
    "hip_fracture" = 4230399
  ),
  end = "event_end_date",
  limit = "all"
)

1.2 Summarising cohort counts

We can first quickly summarise and present the overall counts of our cohorts.

cohort_counts <- summariseCohortCount(cdm[["injuries"]])
tableCohortCount(cohort_counts)

CDM name	Variable name	Estimate name	Cohort name
CDM name	Variable name	Estimate name	Hip fracture	Forearm fracture	Ankle sprain	Ankle fracture
Eunomia	Number records	N	138	569	1,915	464
	Number subjects	N	132	510	1,357	427

Moreover, we can also easily stratify these counts. For example, here we add age groups and then stratify our counts by t We can summarise the overall counts of our cohorts.

cdm[["injuries"]] <- cdm[["injuries"]] |>
  PatientProfiles::addAge(ageGroup = list(
    c(0, 3),
    c(4, 17),
    c(18, Inf)
  )) |>
  compute(temporary = FALSE, name = "injuries")

cohort_counts <- summariseCohortCount(cdm[["injuries"]], strata = "age_group")
tableCohortCount(cohort_counts)

CDM name	Age group	Variable name	Estimate name	Cohort name
CDM name	Age group	Variable name	Estimate name	Ankle fracture	Hip fracture	Ankle sprain	Forearm fracture
Eunomia	Overall	Number records	N	464	138	1,915	569
	0 to 3	Number records	N	49	7	202	51
	18 or above	Number records	N	213	88	1,047	268
	4 to 17	Number records	N	202	43	666	250
	Overall	Number subjects	N	427	132	1,357	510
	0 to 3	Number subjects	N	49	7	196	51
	18 or above	Number subjects	N	204	83	847	249
	4 to 17	Number subjects	N	195	43	597	239

We can also apply minimum cell count suppression to our cohort counts. In this case we will obscure any counts below 10.

cohort_counts <- suppress(cohort_counts, minCellCount = 10)
tableCohortCount(cohort_counts)

CDM name	Age group	Variable name	Estimate name	Cohort name
CDM name	Age group	Variable name	Estimate name	Ankle fracture	Hip fracture	Ankle sprain	Forearm fracture
Eunomia	Overall	Number records	N	464	138	1,915	569
	0 to 3	Number records	N	49	<10	202	51
	18 or above	Number records	N	213	88	1,047	268
	4 to 17	Number records	N	202	43	666	250
	Overall	Number subjects	N	427	132	1,357	510
	0 to 3	Number subjects	N	49	<10	196	51
	18 or above	Number subjects	N	204	83	847	249
	4 to 17	Number subjects	N	195	43	597	239

1.3 Summarising cohort attrition

Say we specify two inclusion criteria. First, we keep only cohort entries after the year 2000. Second, we keep only cohort entries for those aged 18 or older. We can easily create plots summarising our cohort attrition.

cdm <- generateConceptCohortSet(
  cdm = cdm,
  name = "ankle_sprain",
  conceptSet = list("ankle_sprain" = 81151),
  end = "event_end_date",
  limit = "all"
)

cdm[["ankle_sprain"]] <- cdm[["ankle_sprain"]] |>
  filter(year(cohort_start_date) >= 2000) |>
  recordCohortAttrition("Restrict to cohort_start_date >= 2000") |>
  compute(temporary = FALSE, name = "ankle_sprain")

attrition_summary <- summariseCohortAttrition(cdm[["ankle_sprain"]])

plotCohortAttrition(attrition_summary)

cdm[["ankle_sprain"]] <- cdm[["ankle_sprain"]] |>
  PatientProfiles::addAge() |>
  filter(age >= 18) |>
  compute(temporary = FALSE, name = "ankle_sprain") |>
  recordCohortAttrition("Restrict to age >= 18")

attrition_summary <- summariseCohortAttrition(cdm[["ankle_sprain"]])

plotCohortAttrition(attrition_summary, cohortId = 1)

We could, of course, have applied these requirements the other way around.

cdm <- generateConceptCohortSet(
  cdm = cdm,
  name = "ankle_sprain",
  conceptSet = list("ankle_sprain" = 81151),
  end = "event_end_date",
  limit = "all"
)

cdm[["ankle_sprain"]] <- cdm[["ankle_sprain"]] |>
  PatientProfiles::addAge() |>
  filter(age >= 18) |>
  compute(temporary = FALSE, name = "ankle_sprain") |>
  recordCohortAttrition("Restrict to age >= 18")

cdm[["ankle_sprain"]] <- cdm[["ankle_sprain"]] |>
  filter(year(cohort_start_date) >= 2000) |>
  recordCohortAttrition("Restrict to cohort_start_date >= 2000") |>
  compute(temporary = FALSE, name = "ankle_sprain")


attrition_summary <- summariseCohortAttrition(cdm[["ankle_sprain"]])

plotCohortAttrition(attrition_summary, cohortId = 1)

As well as plotting cohort attrition, we can also create a table of our results.

tableCohortAttrition(attrition_summary)

	CDM name
	Eunomia
Reason	Variable
Reason	Number records	Number subjects	Excluded records	Excluded subjects
ankle_sprain
Initial qualifying events	1915	1357	0	0
Restrict to age >= 18	1047	847	868	510
Restrict to cohort_start_date >= 2000	454	420	593	427