## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----parity-helpers, include = FALSE------------------------------------------
# Prefer local package sources when rendering inside the repository so
# parity checks target the code under development.
find_local_pkg_dir <- function() {
  roots <- c(".", "..", "../..")

  for (root in roots) {
    pkg_desc <- file.path(root, "pkg", "DESCRIPTION")
    root_desc <- file.path(root, "DESCRIPTION")

    if (file.exists(pkg_desc)) {
      return(file.path(root, "pkg"))
    }

    if (file.exists(root_desc)) {
      desc <- tryCatch(read.dcf(root_desc), error = function(e) NULL)
      if (!is.null(desc) && "Package" %in% colnames(desc) && identical(desc[1, "Package"], "Rfuzzydid")) {
        return(root)
      }
    }
  }

  NULL
}

local_pkg_dir <- find_local_pkg_dir()

if (!is.null(local_pkg_dir) && requireNamespace("pkgload", quietly = TRUE)) {
  pkgload::load_all(local_pkg_dir, helpers = FALSE, export_all = FALSE, quiet = TRUE)
} else {
  library(Rfuzzydid)
}

stata_partial <- c(TC_inf = 0.01220596000000, TC_sup = 3.08177130000000)

stata_eqtest <- data.frame(
  contrast = c("W_DID_W_TC", "W_DID_W_CIC", "W_TC_W_CIC"),
  estimate = c(-0.0112300674614629, -0.145514320262384, -0.134284252800921),
  stringsAsFactors = FALSE
)

stata_lqte <- data.frame(
  quantile = seq(0.05, 0.95, by = 0.05),
  estimate = c(
    1.67274034023285, 1.79422724246979, 1.81602716445923,
    1.94611406326294, 1.95544064044952, 1.94727826118469,
    1.97404694557190, 2.09413313865662, 2.13104295730591,
    2.03220224380493, 2.10763168334961, -0.360978126525879,
    -0.325689792633057, -0.228986263275146, -0.172835350036621,
    -0.146114349365234, -0.098971366882324, -0.079638004302979,
    -0.034532070159912
  )
)

native_cluster <- c(n_reps = 20L, n_misreps = 3L, share_failures = 0.15)

## ----sim-data-----------------------------------------------------------------
set.seed(50321)

n_cell <- 120  # observations per cell

# Helper function to create each (G, T) cell
make_cell <- function(g, t, prob_d) {
  d <- rbinom(n_cell, size = 1, prob = prob_d)
  # Outcome: base + group effect + time effect + treatment effect + noise
  y <- 1 + 0.5 * g + 0.4 * t + 2.0 * d + sin(seq_len(n_cell) / 7)
  data.frame(y = y, g = g, t = t, d = d)
}

# Build the four cells
df <- rbind(
  make_cell(g = 0, t = 0, prob_d = 0.20),  # Control group, baseline
  make_cell(g = 0, t = 1, prob_d = 0.40),  # Control group, follow-up  
  make_cell(g = 1, t = 0, prob_d = 0.30),  # Treatment group, baseline
  make_cell(g = 1, t = 1, prob_d = 0.80)   # Treatment group, follow-up
)

# Verify cell sizes and treatment rates
aggregate(cbind(n = d) ~ g + t, data = df, FUN = function(x) c(n = length(x), mean = mean(x)))

# Save fixture used by most examples for direct Stata comparison
if (requireNamespace("haven", quietly = TRUE)) {
  haven::write_dta(df, "stata-parity-sim-fixture.dta")
}

## ----basic-est----------------------------------------------------------------
fit <- fuzzydid(
  data = df,
  formula = y ~ d,
  group = "g",
  time = "t",
  did = TRUE,
  tc = TRUE,
  cic = TRUE,
  breps = 50
)

summary(fit)

stata_basic <- c(
  W_DID = 2.00000000000000,
  W_TC = 1.97320415000280,
  W_CIC = 2.22209836585117
)
r_basic <- setNames(fit$late$estimate, fit$late$estimator)[names(stata_basic)]
basic_cmp <- data.frame(
  estimator = names(stata_basic),
  R = unname(r_basic),
  Stata = unname(stata_basic),
  abs_diff = abs(unname(r_basic) - unname(stata_basic))
)
knitr::kable(basic_cmp, digits = 8)

## ----tidy-output, eval = requireNamespace("modelsummary", quietly = TRUE)-----
library(modelsummary)

modelsummary::modelsummary(fit)

## ----parity-check-------------------------------------------------------------
n_cell <- 120L

make_parity_cell <- function(g, t, ones) {
  data.frame(
    g = rep.int(g, n_cell),
    t = rep.int(t, n_cell),
    d = c(rep.int(1L, ones), rep.int(0L, n_cell - ones))
  )
}

out <- rbind(
  make_parity_cell(0L, 0L, 24L),
  make_parity_cell(0L, 1L, 48L),
  make_parity_cell(1L, 0L, 36L),
  make_parity_cell(1L, 1L, 96L)
)
out$id <- seq_len(nrow(out))
out$y <- 1 + 0.5 * out$g + 0.4 * out$t + 2 * out$d + sin(out$id / 7)
parity_fixture <- out[, c("y", "g", "t", "d")]

fit_parity <- fuzzydid(
  data = parity_fixture,
  formula = y ~ d,
  group = "g",
  time = "t",
  did = TRUE,
  tc = TRUE,
  cic = TRUE,
  breps = 5,
  seed = 1
)

stata_core <- c(
  W_DID = 2.17430877504668,
  W_TC = 2.18553887285118,
  W_CIC = 2.31982319056988
)

r_core <- setNames(fit_parity$late$estimate, fit_parity$late$estimator)[c("W_DID", "W_TC", "W_CIC")]

core_cmp <- data.frame(
  estimator = names(stata_core),
  R = unname(r_core),
  Stata = unname(stata_core),
  abs_diff = abs(unname(r_core) - unname(stata_core))
)

knitr::kable(core_cmp, digits = 8)

# Save exact parity fixture for Stata users
if (requireNamespace("haven", quietly = TRUE)) {
  haven::write_dta(parity_fixture, "stata-parity-core-fixture.dta")
}

## ----seed-parity--------------------------------------------------------------
# These produce identical SE/CI because the same bootstrap seed is supplied
fit1 <- fuzzydid(df, y ~ d, group = "g", time = "t", did = TRUE, breps = 50, seed = 1)
fit2 <- fuzzydid(df, y ~ d, group = "g", time = "t", did = TRUE, breps = 50, seed = 1)

all.equal(fit1$late$std.error, fit2$late$std.error)

stata_seed_se <- 0.66310988000000
seed_cmp <- data.frame(
  estimator = "W_DID",
  R_seed_1_run_1 = fit1$late$std.error[fit1$late$estimator == "W_DID"],
  R_seed_1_run_2 = fit2$late$std.error[fit2$late$estimator == "W_DID"],
  Stata = stata_seed_se,
  stringsAsFactors = FALSE
)
seed_cmp$abs_diff_run_1_vs_Stata <- abs(seed_cmp$R_seed_1_run_1 - seed_cmp$Stata)
seed_cmp$abs_diff_run_2_vs_Stata <- abs(seed_cmp$R_seed_1_run_2 - seed_cmp$Stata)
knitr::kable(seed_cmp, digits = 8)

## ----restrictions, error = TRUE-----------------------------------------------
try({
df_restrict <- transform(df, x = rnorm(nrow(df)))

# CIC requires no covariates
try({
  fuzzydid(df_restrict, y ~ d + x, group = "g", time = "t", cic = TRUE)
})

# LQTE requires binary G, T, and D, plus no covariates
try({
  fuzzydid(df_restrict, y ~ d + x, group = "g", time = "t", lqte = TRUE)
})
})

## ----partial------------------------------------------------------------------
fit_partial <- fuzzydid(
  data = df,
  formula = y ~ d,
  group = "g",
  time = "t",
  tc = TRUE,
  partial = TRUE,
  breps = 50
)

knitr::kable(fit_partial$late)

r_partial <- setNames(fit_partial$late$estimate, fit_partial$late$estimator)[names(stata_partial)]
partial_cmp <- data.frame(
  estimator = names(stata_partial),
  R = unname(r_partial),
  Stata = unname(stata_partial),
  abs_diff = abs(unname(r_partial) - unname(stata_partial))
)
knitr::kable(partial_cmp, digits = 8)

## ----eqtest-------------------------------------------------------------------
fit_eq <- fuzzydid(
  data = parity_fixture,
  formula = y ~ d,
  group = "g",
  time = "t",
  did = TRUE,
  tc = TRUE,
  cic = TRUE,
  eqtest = TRUE,
  breps = 5,
  seed = 1
)

eq_cmp <- data.frame(
  contrast = stata_eqtest$contrast,
  R = fit_eq$eqtest$estimate,
  Stata = stata_eqtest$estimate,
  abs_diff = abs(fit_eq$eqtest$estimate - stata_eqtest$estimate)
)
knitr::kable(eq_cmp, digits = 8)

## ----covariates---------------------------------------------------------------
# Add covariates
set.seed(123)
df$x1 <- rnorm(nrow(df))
df$x2 <- factor(ifelse(runif(nrow(df)) > 0.5, "a", "b"))

fit_modelx <- fuzzydid(
  data = df,
  formula = y ~ d + x1 + x2,
  group = "g",
  time = "t",
  did = TRUE,
  tc = TRUE,
  modelx = c("ols", "ols"),
  nose = TRUE
)

knitr::kable(fit_modelx$late)

stata_modelx <- c(
  W_DID = 2.03222780000000,
  W_TC = 1.93062450000000
)
r_modelx <- setNames(fit_modelx$late$estimate, fit_modelx$late$estimator)[names(stata_modelx)]
modelx_cmp <- data.frame(
  estimator = names(stata_modelx),
  R = unname(r_modelx),
  Stata = unname(stata_modelx),
  abs_diff = abs(unname(r_modelx) - unname(stata_modelx))
)
knitr::kable(modelx_cmp, digits = 8)

if (requireNamespace("haven", quietly = TRUE)) {
  haven::write_dta(df[, c("y", "g", "t", "d", "x1", "x2")], "stata-parity-covariates-fixture.dta")
}

## ----sieves-------------------------------------------------------------------
fit_sieves <- fuzzydid(
  data = df,
  formula = y ~ d + x1,
  group = "g",
  time = "t",
  did = TRUE,
  tc = TRUE,
  sieves = TRUE,
  nose = TRUE
)

knitr::kable(fit_sieves$late)

stata_sieves <- c(
  W_DID = 2.00081280000000,
  W_TC = 1.93678270000000
)
r_sieves <- setNames(fit_sieves$late$estimate, fit_sieves$late$estimator)[names(stata_sieves)]
sieves_cmp <- data.frame(
  estimator = names(stata_sieves),
  R = unname(r_sieves),
  Stata = unname(stata_sieves),
  abs_diff = abs(unname(r_sieves) - unname(stata_sieves))
)
knitr::kable(sieves_cmp, digits = 8)

## ----lqte---------------------------------------------------------------------
fit_lqte <- fuzzydid(
  data = parity_fixture,
  formula = y ~ d,
  group = "g",
  time = "t",
  lqte = TRUE,
  nose = TRUE,
  seed = 1
)

lqte_cmp <- data.frame(
  quantile = stata_lqte$quantile,
  R = fit_lqte$lqte$estimate,
  Stata = stata_lqte$estimate,
  abs_diff = abs(fit_lqte$lqte$estimate - stata_lqte$estimate)
)
knitr::kable(head(lqte_cmp, 10), digits = 8)

## ----multiperiod--------------------------------------------------------------
set.seed(7)
n_mp <- 70

make_mp_cell <- function(gb, gf, t, prob, shift = 0) {
  d <- rbinom(n_mp, 1, prob)
  y <- 1 + 0.35 * t + 1.6 * d + shift + rnorm(n_mp, sd = 0.25)
  data.frame(y = y, d = d, gb = gb, gf = gf, t = t)
}

df_mp <- rbind(
  make_mp_cell(gb = 0, gf = 0, t = 0, prob = 0.20),
  make_mp_cell(gb = 0, gf = 0, t = 1, prob = 0.30),
  make_mp_cell(gb = 0, gf = 0, t = 2, prob = 0.35),
  make_mp_cell(gb = 0, gf = 1, t = 0, prob = 0.30, shift = 0.2),
  make_mp_cell(gb = 1, gf = 1, t = 1, prob = 0.60, shift = 0.8),
  make_mp_cell(gb = 1, gf = 0, t = 2, prob = 0.75, shift = 1.1)
)

fit_mp <- fuzzydid(
  data = df_mp,
  formula = y ~ d,
  group = "gb",
  group_forward = "gf",
  time = "t",
  did = TRUE,
  tc = TRUE,
  nose = TRUE
)

knitr::kable(fit_mp$late)

if (requireNamespace("haven", quietly = TRUE)) {
  haven::write_dta(df_mp, "stata-parity-multiperiod-fixture.dta")
}

## ----bootstrap-diag-----------------------------------------------------------
df_diag <- parity_fixture
df_diag$cl <- rep(seq_len(24L), each = 20L)

fit_diag <- fuzzydid(
  data = df_diag,
  formula = y ~ d,
  group = "g",
  time = "t",
  did = TRUE,
  tc = TRUE,
  cic = TRUE,
  cluster = "cl",
  breps = 20,
  seed = 1
)

diag_cmp <- data.frame(
  metric = c("N.reps", "N.misreps", "Share.failures"),
  R = c(fit_diag$n_reps, fit_diag$n_misreps, fit_diag$share_failures),
  Reference = c(
    native_cluster[["n_reps"]],
    native_cluster[["n_misreps"]],
    native_cluster[["share_failures"]]
  ),
  stringsAsFactors = FALSE
)

share_from_counts <- if (diag_cmp$R[1] > 0) diag_cmp$R[2] / diag_cmp$R[1] else NA_real_

# These diagnostics are deterministic in R when the same bootstrap seed is used.
diag_cmp$Delta <- diag_cmp$R - diag_cmp$Reference
knitr::kable(diag_cmp, digits = 8)

if (requireNamespace("haven", quietly = TRUE)) {
  haven::write_dta(df_diag, "stata-parity-cluster-fixture.dta")
}