## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse   = TRUE,
  comment    = "#>",
  fig.width  = 7,
  fig.height = 5,
  warning    = FALSE,
  message    = FALSE,
  eval       = FALSE
)

## ----simulate-synthetic-------------------------------------------------------
# library(dicepro)
# set.seed(2101L)
# 
# sim_data <- simulation(
#   loi        = "gauss",
#   scenario   = "hierarchical",
#   nSample    = 30,
#   nGenes     = 200,
#   nCellsType = 10,
#   sigma_bio  = 0.07,
#   sigma_tech = 0.07,
#   seed       = 2101L
# )
# 
# cat("Reference  :", dim(sim_data$W), "\n")
# cat("Proportions:", dim(sim_data$p), "\n")
# cat("Bulk       :", dim(sim_data$B), "\n")
# cat("Row sums   :", round(range(rowSums(sim_data$p)), 4), "\n")

## ----sanity-check-synthetic---------------------------------------------------
# bulk_clean <- as.matrix(sim_data$W) %*% t(as.matrix(sim_data$p))
# 
# plot(
#   bulk_clean[seq_len(500)],
#   as.matrix(sim_data$B)[seq_len(500)],
#   xlab = "Clean bulk (first 500 entries)",
#   ylab = "Noisy bulk",
#   pch  = 19, cex = 0.4, col = "#2c7bb6",
#   main = "Noise model: clean vs noisy bulk"
# )
# abline(0, 1, col = "firebrick", lwd = 1.5)

## ----run-dicepro-synthetic----------------------------------------------------
# out <- dicepro(
#   reference             = as.matrix(sim_data$W)[, -c(1,5,10)],
#   bulk                  = as.matrix(sim_data$B),
#   methodDeconv          = "FARDEEP",
#   W_prime               = 0,
#   bulkName              = "SimBulk",
#   refName               = "SimRef",
#   hp_max_evals          = 500,
#   algo_select           = "random",
#   output_path           = tempdir(),
#   hspaceTechniqueChoose = "gamma_dominant",
#   normalize             = FALSE
# )

## ----best-hp-synthetic--------------------------------------------------------
# cat("Best lambda :", out$hyperparameters$lambda, "\n")
# cat("Best gamma  :", out$hyperparameters$gamma,  "\n")
# cat("Loss        :", out$metrics$loss,           "\n")
# cat("Constraint  :", out$metrics$constraint,     "\n")

## ----plot-hyperopt-synthetic, fig.height=9------------------------------------
# out$plot_hyperopt

## ----plot-pareto-synthetic----------------------------------------------------
# out$plot

## ----compare-props-synthetic--------------------------------------------------
# true_prop <- as.matrix(sim_data$p)
# pred_prop <- as.matrix(out$H)
# 
# common_ct   <- intersect(colnames(pred_prop), colnames(true_prop))
# true_common <- true_prop[, common_ct, drop = FALSE]
# pred_common <- pred_prop[, common_ct, drop = FALSE]
# 
# r_overall <- cor(as.vector(true_common), as.vector(pred_common))
# cat(sprintf("Overall Pearson r: %.3f\n", r_overall))
# 
# plot(
#   as.vector(true_common),
#   as.vector(pred_common),
#   xlab = "True proportions",
#   ylab = "Predicted proportions",
#   pch  = 19, cex = 0.5, col = "#2c7bb6aa",
#   main = sprintf("True vs Predicted  (r = %.3f)", r_overall)
# )
# abline(0, 1, col = "firebrick", lwd = 1.5)

## ----per-ct-cor-synthetic, fig.height=4---------------------------------------
# ct_cors <- vapply(common_ct, function(ct) {
#   cor(true_common[, ct], pred_common[, ct])
# }, numeric(1L))
# 
# par(mar = c(5, 10, 3, 1))
# barplot(
#   sort(ct_cors),
#   horiz = TRUE, las = 1,
#   col   = ifelse(sort(ct_cors) > 0.7, "#2c7bb6", "#d7191c"),
#   xlab  = "Pearson r",
#   main  = "Per-cell-type correlation (synthetic)",
#   xlim  = c(-0.2, 1)
# )
# abline(v = 0.7, lty = 2, col = "gray40")

## ----perf-metrics-synthetic---------------------------------------------------
# perf <- makeTable1Tool(pred_mat = pred_common, obs_mat = true_common)
# knitr::kable(perf$Perf, digits = 3,
#              caption = "Performance metrics -- fully synthetic data")

## ----simulate-bluecode--------------------------------------------------------
# library(dicepro)
# 
# sim_bc <- simulation_bluecode(
#   nSample    = 30,
#   sigma_bio  = 0.15,
#   sigma_tech = 0.02,
#   seed       = 2101L
# )
# 
# cat("Reference  :", dim(sim_bc$W), "\n")  # nGenes x 34
# cat("Proportions:", dim(sim_bc$p), "\n")  # 30 x 34
# cat("Bulk       :", dim(sim_bc$B), "\n")  # nGenes x 30
# cat("Row sums   :", round(range(rowSums(sim_bc$p)), 4), "\n")
# 
# # Real cell-type names from BlueCode
# head(colnames(sim_bc$p))

## ----compartment-overview-----------------------------------------------------
# # Cell-type to compartment mapping (mirrors .bluecode_cell_groups)
# compartment_map <- c(
#   rep("Immune",      9),
#   rep("Stromal",     8),
#   rep("Endothelial", 3),
#   rep("Epithelial",  5),
#   rep("Muscle",      9)
# )
# names(compartment_map) <- colnames(sim_bc$p)
# 
# # Aggregate proportions by compartment for each sample
# comp_props <- t(apply(sim_bc$p, 1, function(row) {
#   tapply(row, compartment_map[names(row)], sum)
# }))
# 
# boxplot(
#   comp_props,
#   col  = c("#4393c3", "#74c476", "#fd8d3c", "#9ecae1", "#fb6a4a"),
#   ylab = "Compartment proportion",
#   main = "Simulated compartment proportions (BlueCode)",
#   las  = 2
# )

## ----sanity-check-bluecode----------------------------------------------------
# bulk_clean_bc <- as.matrix(sim_bc$W) %*% t(as.matrix(sim_bc$p))
# 
# plot(
#   bulk_clean_bc[seq_len(500)],
#   as.matrix(sim_bc$B)[seq_len(500)],
#   xlab = "Clean bulk (first 500 entries)",
#   ylab = "Noisy bulk",
#   pch  = 19, cex = 0.4, col = "#74c476",
#   main = "Noise model: clean vs noisy bulk (BlueCode)"
# )
# abline(0, 1, col = "firebrick", lwd = 1.5)

## ----run-dicepro-bluecode-----------------------------------------------------
# out_bc <- dicepro(
#   reference             = as.matrix(sim_bc$W),
#   bulk                  = as.matrix(sim_bc$B),
#   methodDeconv          = "FARDEEP",
#   W_prime               = 0,
#   bulkName              = "BlueCodeBulk",
#   refName               = "BlueCode",
#   hp_max_evals          = 100,
#   algo_select           = "random",
#   output_path           = tempdir(),
#   hspaceTechniqueChoose = "gamma_dominant",
#   normalize             = FALSE
# )

## ----best-hp-bluecode---------------------------------------------------------
# cat("Best lambda :", out_bc$hyperparameters$lambda, "\n")
# cat("Best gamma  :", out_bc$hyperparameters$gamma,  "\n")
# cat("Loss        :", out_bc$metrics$loss,            "\n")
# cat("Constraint  :", out_bc$metrics$constraint,      "\n")

## ----plot-hyperopt-bluecode, fig.height=9-------------------------------------
# out_bc$plot_hyperopt

## ----plot-pareto-bluecode-----------------------------------------------------
# out_bc$plot

## ----compare-props-bluecode---------------------------------------------------
# true_prop_bc <- as.matrix(sim_bc$p)
# pred_prop_bc <- as.matrix(out_bc$H)
# 
# common_ct_bc   <- intersect(colnames(pred_prop_bc), colnames(true_prop_bc))
# true_common_bc <- true_prop_bc[, common_ct_bc, drop = FALSE]
# pred_common_bc <- pred_prop_bc[, common_ct_bc, drop = FALSE]
# 
# r_overall_bc <- cor(as.vector(true_common_bc), as.vector(pred_common_bc))
# cat(sprintf("Overall Pearson r: %.3f\n", r_overall_bc))
# 
# plot(
#   as.vector(true_common_bc),
#   as.vector(pred_common_bc),
#   xlab = "True proportions",
#   ylab = "Predicted proportions",
#   pch  = 19, cex = 0.5, col = "#74c47699",
#   main = sprintf("True vs Predicted -- BlueCode  (r = %.3f)", r_overall_bc)
# )
# abline(0, 1, col = "firebrick", lwd = 1.5)

## ----per-ct-cor-bluecode, fig.height=6----------------------------------------
# ct_cors_bc <- vapply(common_ct_bc, function(ct) {
#   cor(true_common_bc[, ct], pred_common_bc[, ct])
# }, numeric(1L))
# 
# par(mar = c(5, 14, 3, 1))
# barplot(
#   sort(ct_cors_bc),
#   horiz = TRUE, las = 1,
#   col   = ifelse(sort(ct_cors_bc) > 0.7, "#2c7bb6", "#d7191c"),
#   xlab  = "Pearson r",
#   main  = "Per-cell-type correlation (BlueCode)",
#   xlim  = c(-0.2, 1)
# )
# abline(v = 0.7, lty = 2, col = "gray40")

## ----perf-metrics-bluecode----------------------------------------------------
# perf_bc <- makeTable1Tool(pred_mat = pred_common_bc, obs_mat = true_common_bc)
# knitr::kable(perf_bc$Perf, digits = 3,
#              caption = "Performance metrics -- BlueCode simulation")

## ----compare-strategies-------------------------------------------------------
# cat(sprintf(
#   "Overall Pearson r\n  Synthetic  : %.3f\n  BlueCode   : %.3f\n",
#   r_overall, r_overall_bc
# ))

## ----session-info-------------------------------------------------------------
# sessionInfo()