---
title: "Worked Example: Complete Analysis"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Worked Example: Complete Analysis}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE, purl = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE,
  message = FALSE,
  warning = FALSE
)
```

## Scenario

We have IPD from a trial of **Drug A** (index treatment, N=500) and
published AgD from a trial of **Drug B** (comparator, N=400). We want
to estimate the treatment effect of Drug A vs Drug B on a binary endpoint.

Two binary covariates (age group and sex) are available in both trials
but their distributions differ:

- Drug A trial: 40% elderly, 55% female
- Drug B trial: 35% elderly, 50% female

## Step 1: Simulate example data

```{r, eval = TRUE, purl = FALSE}
library(mlumr)
set.seed(2026)

# --- IPD for Drug A ---
n_A <- 500
age_A <- rbinom(n_A, 1, 0.40)
sex_A <- rbinom(n_A, 1, 0.55)

# True model: logit(p) = -0.5 + 0.8*age - 0.3*sex
logit_p_A <- -0.5 + 0.8 * age_A - 0.3 * sex_A
y_A <- rbinom(n_A, 1, plogis(logit_p_A))

ipd_df <- data.frame(
  trt = "Drug_A",
  outcome = y_A,
  age_group = age_A,
  sex = sex_A
)

# --- AgD for Drug B ---
# True comparator model: logit(p) = -0.8 + 0.8*age - 0.3*sex
# (same covariate effects, different intercept)
n_B <- 400
age_B <- rbinom(n_B, 1, 0.35)
sex_B <- rbinom(n_B, 1, 0.50)
logit_p_B <- -0.8 + 0.8 * age_B - 0.3 * sex_B
y_B <- rbinom(n_B, 1, plogis(logit_p_B))

agd_df <- data.frame(
  trt = "Drug_B",
  n_total = n_B,
  n_events = sum(y_B),
  age_group_mean = mean(age_B),
  sex_mean = mean(sex_B)
)
```

## Step 2: Prepare data objects

```{r, eval = TRUE, purl = FALSE}
ipd <- set_ipd(ipd_df, treatment = "trt", outcome = "outcome",
               covariates = c("age_group", "sex"))

agd <- set_agd(agd_df, treatment = "trt",
               outcome_n = "n_total", outcome_r = "n_events",
               cov_means = c("age_group_mean", "sex_mean"),
               cov_types = c("binary", "binary"))

dat <- combine_data(ipd, agd)
print(dat)
```

## Step 3: Add integration points

```{r, eval = TRUE, purl = FALSE}
dat <- add_integration(
  dat,
  n_int = 64,
  age_group = distr(qbern, prob = age_group_mean),
  sex = distr(qbern, prob = sex_mean)
)

check_integration(
  dat,
  age_group = distr(qbern, prob = age_group_mean),
  sex = distr(qbern, prob = sex_mean)
)
```

## Step 4: Naive estimate (benchmark)

```{r, eval = TRUE, purl = FALSE}
naive_result <- naive(dat)
print(naive_result)
```

## Step 5: STC estimate

```{r, eval = TRUE, purl = FALSE}
stc_result <- stc(dat)
print(stc_result)
```

## Step 6: ML-UMR SPFA model

```{r, eval = TRUE, purl = FALSE}
fit_spfa <- mlumr(
  dat,
  model = "spfa",
  prior_intercept = prior_normal(0, 10),
  prior_beta = prior_normal(0, 2.5),
  chains = 2,
  iter = 500,
  warmup = 250,
  seed = 42,
  refresh = 0,
  verbose = FALSE
)

summary(fit_spfa)
prior_summary(fit_spfa)
```

## Step 7: ML-UMR Relaxed model

```{r, eval = TRUE, purl = FALSE}
fit_relaxed <- mlumr(
  dat,
  model = "relaxed",
  prior_intercept = prior_normal(0, 10),
  prior_beta = prior_normal(0, 2.5),
  chains = 2,
  iter = 500,
  warmup = 250,
  seed = 43,
  refresh = 0,
  verbose = FALSE
)

summary(fit_relaxed)
prior_summary(fit_relaxed)
```

## Step 8: Model comparison

```{r, eval = TRUE, purl = FALSE}
compare_models(fit_spfa, fit_relaxed)
```

## Step 9: Extract and compare results

```{r, eval = TRUE, purl = FALSE}
# ML-UMR marginal effects
me_spfa <- marginal_effects(fit_spfa, effect = "lor")
me_relaxed <- marginal_effects(fit_relaxed, effect = "lor")

# Build comparison table
results <- data.frame(
  Method = c("Naive", "STC",
             "ML-UMR SPFA (Index)", "ML-UMR SPFA (Comparator)",
             "ML-UMR Relaxed (Index)", "ML-UMR Relaxed (Comparator)"),
  LOR = c(
    naive_result$link_effect,
    stc_result$link_effect,
    me_spfa$mean[me_spfa$population == "Index"],
    me_spfa$mean[me_spfa$population == "Comparator"],
    me_relaxed$mean[me_relaxed$population == "Index"],
    me_relaxed$mean[me_relaxed$population == "Comparator"]
  ),
  SE = c(
    naive_result$se,
    stc_result$se,
    me_spfa$sd[me_spfa$population == "Index"],
    me_spfa$sd[me_spfa$population == "Comparator"],
    me_relaxed$sd[me_relaxed$population == "Index"],
    me_relaxed$sd[me_relaxed$population == "Comparator"]
  )
)
results$CI_lower <- c(
  naive_result$ci_lower,
  stc_result$ci_lower,
  me_spfa$q2.5[me_spfa$population == "Index"],
  me_spfa$q2.5[me_spfa$population == "Comparator"],
  me_relaxed$q2.5[me_relaxed$population == "Index"],
  me_relaxed$q2.5[me_relaxed$population == "Comparator"]
)
results$CI_upper <- c(
  naive_result$ci_upper,
  stc_result$ci_upper,
  me_spfa$q97.5[me_spfa$population == "Index"],
  me_spfa$q97.5[me_spfa$population == "Comparator"],
  me_relaxed$q97.5[me_relaxed$population == "Index"],
  me_relaxed$q97.5[me_relaxed$population == "Comparator"]
)

print(results, digits = 3)
```

## Step 10: Predicted probabilities

```{r, eval = TRUE, purl = FALSE}
# Predicted event probabilities for each treatment in each population
preds <- predict(fit_spfa, population = "both")
print(preds)
```

## Step 11: Conditional effects at covariate profiles

Population-level effects average over the covariate distribution. Conditional
effects evaluate the treatment effect at specific covariate values.

```{r, eval = TRUE, purl = FALSE}
profiles <- data.frame(
  age_group = c(0, 1),
  sex = c(0, 1)
)

conditional_predict(fit_spfa, newdata = profiles)
conditional_effects(fit_spfa, newdata = profiles)
```

## Interpretation

In this simulated example:

- The **true LOR** is approximately `logit(p_A) - logit(p_B)` where
  treatment effects are captured by the intercept difference (0.3 on logit
  scale).
- The **naive estimate** is biased because of covariate imbalance between
  populations.
- **STC** partially corrects for this by adjusting via outcome regression.
- **ML-UMR SPFA** provides population-specific treatment effects in both
  the index and comparator populations, fully accounting for covariate
  differences through QMC integration.
- **ML-UMR Relaxed** additionally allows for effect modification, though
  in this simulated case (same covariate effects), SPFA and Relaxed should
  give similar results.
