Advanced: Mixed models and multi-environment trials

Multi-environment mixed model

In multi-environment trials (MET) yield is affected by genotype (G), environment (E), and their interaction (GEI). A mixed model is the standard approach:

set.seed(2024)
# Simulate a 3-genotype × 4-environment trial with 3 reps
met <- expand.grid(
  genotype    = paste0("G", 1:3),
  environment = paste0("E", 1:4),
  rep         = 1:3,
  stringsAsFactors = FALSE
)
# True effects
g_eff <- c(G1 = 0, G2 = 0.3, G3 = -0.2)
e_eff <- c(E1 = 0, E2 = 0.5, E3 = -0.3, E4 = 0.1)
set.seed(42)
met$yield <- 4 +
  g_eff[met$genotype] +
  e_eff[met$environment] +
  rnorm(nrow(met), 0, 0.15)   # residual

head(met)
#>   genotype environment rep    yield
#> 1       G1          E1   1 4.205644
#> 2       G2          E1   1 4.215295
#> 3       G3          E1   1 3.854469
#> 4       G1          E2   1 4.594929
#> 5       G2          E2   1 4.860640
#> 6       G3          E2   1 4.284081

Fixed genotype, random environment

# Environment as random, genotype as fixed
m_met <- fit_linear(met,
                    formula = "yield ~ genotype",
                    random  = "(1|environment) + (1|environment:rep)")
summary(m_met)
#> ==============================
#>  agriReg: Linear Model Summary
#> ==============================
#> Engine : lmer 
#> 
#> Formula: yield ~ genotype 
#> Random : (1|environment) + (1|environment:rep) 
#> 
#> Fixed effects:
#> (Intercept)  genotypeG2  genotypeG3 
#>   4.0557062   0.3552232  -0.1669417 
#> 
#> Random effects (variance components):
#>              grp        var1     vcov  sdcor
#>  environment:rep (Intercept) 0.001448 0.0381
#>      environment (Intercept) 0.074141 0.2723
#>         Residual        <NA> 0.034104 0.1847
#> 
#> Number of obs: 36 | Groups: environment:rep, environment, Residual 
#> 
#> --- Goodness-of-fit ---
#> R2       : 0.7957
#> Adj-R2   : 0.7833
#> RMSE     : 0.1664
#> MAE      : 0.1332
#> AIC      : 11.67
#> BIC      : 21.17

Post-hoc comparisons with emmeans

library(emmeans)
emm <- emmeans(m_met$fit, ~ genotype)
contrast(emm, method = "pairwise", adjust = "tukey")
#>  contrast estimate     SE df t.ratio p.value
#>  G1 - G2    -0.355 0.0754 22  -4.712  0.0003
#>  G1 - G3     0.167 0.0754 22   2.214  0.0909
#>  G2 - G3     0.522 0.0754 22   6.926 <0.0001
#> 
#> Degrees-of-freedom method: kenward-roger 
#> P value adjustment: tukey method for comparing a family of 3 estimates

Fitting multiple growth curves by group

Use lapply to fit the logistic model separately to each treatment:

maize <- load_example_data("maize_growth")

fits_by_trt <- lapply(
  split(maize, maize$treatment),
  function(sub) {
    fit_nonlinear(sub, x_col = "days", y_col = "biomass_g",
                  model = "logistic")
  }
)

# Compare parameter estimates between treatments
lapply(fits_by_trt, function(m) round(coef(m), 2))
#> $DS
#>   Asym   xmid   scal 
#> 346.61  39.60   9.04 
#> 
#> $WW
#>   Asym   xmid   scal 
#> 497.86  47.07   8.55

Overlay curves on a single plot

library(ggplot2)

# Build prediction ribbons for each treatment
preds <- do.call(rbind, lapply(names(fits_by_trt), function(trt) {
  m     <- fits_by_trt[[trt]]
  xseq  <- seq(1, 100, length.out = 200)
  nd    <- data.frame(days = xseq)
  data.frame(
    days      = xseq,
    biomass_g = predict(m$fit, newdata = nd),
    treatment = trt
  )
}))

raw <- maize[, c("days", "biomass_g", "treatment")]

ggplot(preds, aes(x = days, y = biomass_g, colour = treatment)) +
  geom_line(linewidth = 1.1) +
  geom_point(data = raw, alpha = 0.3, size = 1.2) +
  scale_color_manual(values = c(WW = "#1D9E75", DS = "#D85A30")) +
  labs(title = "Logistic growth by water treatment",
       x = "Days after emergence", y = "Biomass (g/plant)") +
  theme_minimal()

Comparing dose-response curves across species

herb <- load_example_data("herbicide_trial")

# Fit one model per species
drc_fits <- lapply(
  split(herb, herb$species),
  function(sub) {
    fit_dose_response(sub,
                      dose_col = "dose_g_ha",
                      resp_col = "weed_control_pct",
                      fct      = "LL.4")
  }
)

# ED50 per species
lapply(names(drc_fits), function(sp) {
  cat(sp, ":\n"); ed_estimates(drc_fits[[sp]], respLev = 50)
})
#> Amaranth :
#> 
#> Estimated effective doses
#> 
#>        Estimate Std. Error   Lower   Upper
#> e:1:50  82.2591     2.3345 77.5331 86.9851
#>        Estimate Std. Error    Lower    Upper
#> e:1:50 82.25913   2.334523 77.53314 86.98513
#> Ryegrass :
#> 
#> Estimated effective doses
#> 
#>        Estimate Std. Error    Lower    Upper
#> e:1:50 207.4966     6.0475 195.2541 219.7392
#>        Estimate Std. Error    Lower    Upper
#> e:1:50 207.4966   6.047501 195.2541 219.7392
#> [[1]]
#>        Estimate Std. Error    Lower    Upper
#> e:1:50 82.25913   2.334523 77.53314 86.98513
#> 
#> [[2]]
#>        Estimate Std. Error    Lower    Upper
#> e:1:50 207.4966   6.047501 195.2541 219.7392

Exporting a comparison table to Word / Excel

# Requires the openxlsx package
library(openxlsx)

m1 <- fit_linear(load_example_data("wheat_trial"), "yield ~ nitrogen")
m2 <- fit_nonlinear(load_example_data("wheat_trial"), "nitrogen","yield","quadratic")
cmp <- compare_models(linear = m1, quadratic = m2)

write.xlsx(cmp, "model_comparison.xlsx", rowNames = FALSE)