## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  fig.width = 7,
  fig.height = 4.5,
  out.width = "100%"
)

## ----load-package-------------------------------------------------------------
library(dcce)
library(generics)

## ----pwt8-load----------------------------------------------------------------
data(pwt8, package = "dcce")
str(pwt8)

## ----cd-ols-------------------------------------------------------------------
# Pooled OLS on complete cases
pwt_cc <- na.omit(pwt8[, c("country", "year", "d_log_rgdpo",
                             "log_hc", "log_ck", "log_ngd")])
fit_ols <- lm(d_log_rgdpo ~ log_hc + log_ck + log_ngd, data = pwt_cc)

# CD test on OLS residuals
cd_ols <- pcd_test(
  residuals(fit_ols), data = pwt_cc,
  unit_index = "country", time_index = "year",
  test = "pesaran"
)
print(cd_ols)

## ----mg-pwt8------------------------------------------------------------------
fit_mg <- dcce(
  data            = pwt8,
  unit_index      = "country",
  time_index      = "year",
  formula         = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
  model           = "mg",
  cross_section_vars = NULL
)
print(fit_mg)

## ----cce-pwt8-----------------------------------------------------------------
fit_cce <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
  model              = "cce",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 0
)
print(fit_cce)

## ----dcce-pwt8----------------------------------------------------------------
fit_dcce <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
  model              = "dcce",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 3
)
print(fit_dcce)

## ----dcce-cd-test-------------------------------------------------------------
cd_dcce <- pcd_test(fit_dcce, test = "pesaran")
print(cd_dcce)

## ----rcce-pwt8----------------------------------------------------------------
fit_rcce <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
  model              = "rcce",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 0
)
print(fit_rcce)

## ----csdl-pwt8----------------------------------------------------------------
fit_csdl <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = log_rgdpo ~ log_ck + log_hc + log_ngd,
  model              = "csdl",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 3,
  csdl_xlags         = 3
)
print(fit_csdl)

## ----csardl-pwt8--------------------------------------------------------------
fit_csardl <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = log_rgdpo ~ L(log_rgdpo, 1)
                                   + log_hc + L(log_hc, 1)
                                   + log_ck + L(log_ck, 1)
                                   + log_ngd + L(log_ngd, 1),
  model              = "csardl",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 3
)
print(fit_csardl)

## ----pmg-pwt8-----------------------------------------------------------------
fit_pmg <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = log_rgdpo ~ L(log_rgdpo, 1)
                                   + log_hc + L(log_hc, 1)
                                   + log_ck + L(log_ck, 1)
                                   + log_ngd + L(log_ngd, 1),
  model              = "pmg",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 3
)
print(fit_pmg)

## ----compare-estimators-------------------------------------------------------
models <- list(
  MG      = fit_mg,
  CCE     = fit_cce,
  DCCE    = fit_dcce,
  CS_DL   = fit_csdl,
  CS_ARDL = fit_csardl
)

# Extract coefficient on physical capital (log_ck) across models
key_var <- "log_ck"
compare <- data.frame(
  estimator = names(models),
  estimate  = vapply(models, function(m) {
    b <- coef(m)
    if (key_var %in% names(b)) unname(b[key_var]) else NA_real_
  }, numeric(1)),
  std_error = vapply(models, function(m) {
    se <- m$se
    if (key_var %in% names(se)) unname(se[key_var]) else NA_real_
  }, numeric(1))
)
compare$t_stat <- compare$estimate / compare$std_error
print(compare, digits = 4)

## ----cd-all-tests-------------------------------------------------------------
# Run all five CD tests on DCCE residuals
set.seed(42)
cd_all <- pcd_test(fit_dcce,
                   test   = c("pesaran", "cdw", "cdwplus", "pea", "cdstar"),
                   n_reps = 199)
print(cd_all)

## ----bootstrap, eval=FALSE----------------------------------------------------
# # Cross-section bootstrap (not evaluated in vignette to limit build time)
# set.seed(42)
# boot_res <- bootstrap(fit_dcce, type = "crosssection", reps = 499)
# print(boot_res)

## ----bootstrap-sim------------------------------------------------------------
data(dcce_sim, package = "dcce")

fit_sim <- dcce(
  data               = dcce_sim,
  unit_index         = "unit",
  time_index         = "time",
  formula            = y ~ L(y, 1) + x,
  model              = "dcce",
  cross_section_vars = ~ y + x,
  cross_section_lags = 3
)

set.seed(42)
boot_sim <- bootstrap(fit_sim, type = "crosssection", reps = 199)
print(boot_sim)

## ----ex1-ols-cd---------------------------------------------------------------
dat <- na.omit(pwt8[, c("country", "year", "d_log_rgdpo",
                          "log_rgdpo", "log_hc", "log_ck", "log_ngd")])
fit_ols2 <- lm(d_log_rgdpo ~ log_hc + log_ck + log_ngd + log_rgdpo, data = dat)
cd_naive <- pcd_test(residuals(fit_ols2), data = dat,
                     unit_index = "country", time_index = "year",
                     test = "pesaran")
cat(sprintf("CD statistic (OLS): %.2f  p-value: %.4f\n",
            cd_naive$statistics$statistic[1],
            cd_naive$statistics$p_value[1]))

## ----ex2-mg-------------------------------------------------------------------
fit_mg2 <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
  model              = "mg",
  cross_section_vars = NULL
)

cd_mg <- pcd_test(fit_mg2, test = "pesaran")
cat(sprintf("MG: phi = %.3f, log_ck = %.3f\n",
            coef(fit_mg2)["L(log_rgdpo,1)"], coef(fit_mg2)["log_ck"]))
cat(sprintf("CD after MG: %.2f  p-value: %.4f\n",
            cd_mg$statistics$statistic[1], cd_mg$statistics$p_value[1]))

## ----ex3-dcce-----------------------------------------------------------------
fit_dcce2 <- dcce(
  data               = pwt8,
  unit_index         = "country",
  time_index         = "year",
  formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
  model              = "dcce",
  cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
  cross_section_lags = 3
)

cd_dcce2 <- pcd_test(fit_dcce2, test = "pesaran")
cat(sprintf("DCCE: phi = %.3f, log_ck = %.3f\n",
            coef(fit_dcce2)["L(log_rgdpo,1)"], coef(fit_dcce2)["log_ck"]))
cat(sprintf("CD after DCCE(3): %.2f  p-value: %.4f\n",
            cd_dcce2$statistics$statistic[1], cd_dcce2$statistics$p_value[1]))

## ----ex4-summary-table--------------------------------------------------------
# Coefficient table comparing MG and DCCE
vars <- c("L(log_rgdpo,1)", "log_hc", "log_ck", "log_ngd")
tab <- data.frame(
  Variable = vars,
  MG_coef  = round(coef(fit_mg2)[vars], 3),
  MG_se    = round(fit_mg2$se[vars], 3),
  DCCE_coef = round(coef(fit_dcce2)[vars], 3),
  DCCE_se   = round(fit_dcce2$se[vars], 3),
  row.names = NULL
)
tab$MG_sig   <- ifelse(abs(coef(fit_mg2)[vars] / fit_mg2$se[vars]) > 1.96, "*", "")
tab$DCCE_sig <- ifelse(abs(coef(fit_dcce2)[vars] / fit_dcce2$se[vars]) > 1.96, "*", "")
print(tab)

## ----ex5-unit-coefs, fig.alt="Histogram of unit-level speed of adjustment coefficients"----
b_unit <- coef(fit_dcce2, type = "unit")
ar_coefs <- b_unit$estimate[b_unit$term == "L(log_rgdpo,1)"]
rho_implied <- ar_coefs + 1  # rho = (phi + 1)

cat(sprintf("Implied rho: mean = %.3f, median = %.3f, [min, max] = [%.3f, %.3f]\n",
            mean(rho_implied), median(rho_implied),
            min(rho_implied), max(rho_implied)))
cat(sprintf("Fraction with 0 < rho < 1 (stationarity): %.1f%%\n",
            100 * mean(rho_implied > 0 & rho_implied < 1)))

hist(
  rho_implied,
  breaks = 20,
  main   = "Unit-level AR(1) coefficient (rho = phi + 1)",
  xlab   = expression(hat(rho)[i]),
  col    = "steelblue",
  border = "white"
)
abline(v = median(rho_implied), col = "firebrick", lwd = 2, lty = 2)
legend("topright", legend = "Median", col = "firebrick", lty = 2, lwd = 2, bty = "n")

## ----csd-exp------------------------------------------------------------------
# CSD exponent of log real GDP (levels) across countries
data(pwt8)
X_lev <- do.call(rbind, by(pwt8, pwt8$country, function(d) d$log_rgdpo))
# Keep only time periods with no missing values
X_lev <- X_lev[, colSums(is.na(X_lev)) == 0, drop = FALSE]
alpha_res <- csd_exp(X_lev, use_residuals = FALSE)
print(alpha_res)

## ----ic-selection-------------------------------------------------------------
# Compare static CCE at lag 0 across different CSA variable sets
# (IC criteria apply to the static CCE case)
lags_to_try <- 0:3
models_ic <- lapply(lags_to_try, function(p) {
  dcce(
    data               = pwt8,
    unit_index         = "country",
    time_index         = "year",
    formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
    model              = "cce",
    cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
    cross_section_lags = p
  )
})

ic_values <- sapply(models_ic, function(m) {
  res <- ic(m, models = models_ic)
  c(IC1 = res$IC1, IC2 = res$IC2)
})
ic_table <- data.frame(lags = lags_to_try, t(ic_values))
print(ic_table, digits = 5)
cat(sprintf("IC1 selects %d CSA lag(s)\n", lags_to_try[which.min(ic_table$IC1)]))
cat(sprintf("IC2 selects %d CSA lag(s)\n", lags_to_try[which.min(ic_table$IC2)]))

## ----rank-condition-----------------------------------------------------------
rc <- rank_condition(fit_cce)
cat(sprintf("Estimated factors (m): %d\n", rc$m))
cat(sprintf("Rank of avg loadings (g): %d\n", rc$g))
cat(sprintf("Rank condition (RC = 1 means satisfied): %d\n", rc$RC))

## ----cips-test----------------------------------------------------------------
X_lev <- do.call(rbind, by(pwt8, pwt8$country, function(d) d$log_rgdpo))
X_lev <- X_lev[, colSums(is.na(X_lev)) == 0, drop = FALSE]
cips_res <- cips_test(X_lev, lags = 1, trend = FALSE)
print(cips_res)

## ----swamy-test---------------------------------------------------------------
swamy_test(fit_cce)

## ----hausman-test-------------------------------------------------------------
hausman_test(fit_cce)

## ----broom-methods------------------------------------------------------------
tidy(fit_dcce2)
glance(fit_dcce2)

## ----broom-csardl-------------------------------------------------------------
# For a CS-ARDL fit, tidy() returns short-run, adjustment, and long-run rows
tidy(fit_csardl)

## ----confint-demo-------------------------------------------------------------
confint(fit_csardl, type = "lr", level = 0.90)
confint(fit_csardl, type = "adjustment")

## ----plot-coef, fig.alt="Histograms of unit-level coefficients for the DCCE growth regression"----
plot(fit_dcce2, which = "coef")

## ----amg----------------------------------------------------------------------
fit_amg <- dcce(
  data = pwt8, unit_index = "country", time_index = "year",
  formula = d_log_rgdpo ~ log_hc + log_ck + log_ngd,
  model = "amg", cross_section_vars = NULL
)
coef(fit_amg)

## ----ife----------------------------------------------------------------------
fit_ife <- dcce(
  data = pwt8, unit_index = "country", time_index = "year",
  formula = log_rgdpo ~ log_hc + log_ck + log_ngd,
  model = "ife", n_factors = 2L
)
print(fit_ife)

## ----ccep---------------------------------------------------------------------
fit_ccep <- dcce(
  data = pwt8, unit_index = "country", time_index = "year",
  formula = d_log_rgdpo ~ log_hc + log_ck + log_ngd,
  model = "ccep", cross_section_vars = ~ .
)
coef(fit_ccep)

## ----granger------------------------------------------------------------------
gc <- granger_test(
  data = pwt8, unit_index = "country", time_index = "year",
  y = "d_log_rgdpo", x = "log_ck", lags = 1L
)
print(gc)

## ----ips-llc------------------------------------------------------------------
X_lev <- do.call(rbind, by(pwt8, pwt8$country, function(d) d$log_rgdpo))
X_lev <- X_lev[, colSums(is.na(X_lev)) == 0, drop = FALSE]
panel_ur_test(X_lev, test = "ips", lags = 1)

## ----pedroni------------------------------------------------------------------
panel_coint_test(
  data = pwt8, unit_index = "country", time_index = "year",
  formula = log_rgdpo ~ log_hc + log_ck,
  test = "pedroni", lags = 1
)

## ----structural-break---------------------------------------------------------
brk <- structural_break_test(
  data = pwt8, unit_index = "country", time_index = "year",
  formula = d_log_rgdpo ~ log_hc + log_ck + log_ngd,
  model = "mg", cross_section_vars = NULL,
  type = "unknown", trim = 0.20
)
print(brk)

## ----irf-example--------------------------------------------------------------
data(dcce_sim)
fit_dyn <- dcce(
  data = dcce_sim, unit_index = "unit", time_index = "time",
  formula = y ~ L(y, 1) + x,
  model = "dcce", cross_section_vars = ~ ., cross_section_lags = 3
)
ir <- irf(fit_dyn, impulse = "x", horizon = 10, boot_reps = 99, seed = 42)
print(ir)

## ----workflow-demo, eval=FALSE------------------------------------------------
# dcce_workflow(
#   data = pwt8, unit_index = "country", time_index = "year",
#   formula = log_rgdpo ~ log_hc + log_ck + log_ngd
# )

## ----quickstart, eval=FALSE---------------------------------------------------
# library(dcce)
# data(pwt8)
# 
# # 1. Detect CSD
# fit_ols_qs <- lm(d_log_rgdpo ~ log_hc + log_ck + log_ngd, data = na.omit(pwt8))
# pcd_test(residuals(fit_ols_qs), data = na.omit(pwt8),
#          unit_index = "country", time_index = "year",
#          test = "pesaran")
# 
# # 2. Estimate with DCCE
# fit_qs <- dcce(
#   data               = pwt8,
#   unit_index         = "country",
#   time_index         = "year",
#   formula            = d_log_rgdpo ~ L(log_rgdpo, 1) + log_hc + log_ck + log_ngd,
#   model              = "dcce",
#   cross_section_vars = ~ log_rgdpo + log_hc + log_ck + log_ngd,
#   cross_section_lags = 3
# )
# 
# # 3. Check residuals
# pcd_test(fit_qs, test = "pesaran")
# 
# # 4. Tidy output
# tidy(fit_qs)
# glance(fit_qs)
# 
# # 5. Bootstrap
# dcce_bootstrap(fit_qs, reps = 499)