This vignette mirrors the main package example (see
README) and compares a PLS baseline to
soilVAE on the included dataset
datsoilspc.
CRAN/CI note: this vignette is written to build on systems without Python/TensorFlow.
The VAE training chunks are automatically skipped unless TensorFlow/Keras are available.
# IMPORTANT: do NOT use devtools in vignettes (it is not available on CRAN/CI runners).
# Only load packages that are in Imports/Suggests.
for (p in c("prospectr", "pls", "reticulate", "soilVAE")) {
if (!requireNamespace(p, quietly = TRUE)) {
stop("Package '", p, "' is required to build this vignette. ",
"Please install it (it should be installed automatically from Suggests/Imports during checks).")
}
}
library(prospectr)
library(pls)
library(reticulate)
library(soilVAE)data("datsoilspc", package = "soilVAE")
str(datsoilspc, max.level = 1)
#> 'data.frame': 391 obs. of 5 variables:
#> $ clay : num 49 7 56 14 53 24 9 18 33 27 ...
#> $ silt : num 10 24 17 19 7 21 9 20 13 19 ...
#> $ sand : num 42 69 27 67 40 55 83 61 54 55 ...
#> $ TotalCarbon: num 0.15 0.12 0.17 1.06 0.69 2.76 0.66 1.36 0.19 0.16 ...
#> $ spc : num [1:391, 1:2151] 0.0898 0.1677 0.0778 0.0958 0.0359 ...
#> ..- attr(*, "dimnames")=List of 2
#> - attr(*, "na.action")= 'omit' Named int 392
#> ..- attr(*, "names")= chr "63"eval_quant <- function(y, yhat) {
y <- as.numeric(y); yhat <- as.numeric(yhat)
ok <- is.finite(y) & is.finite(yhat)
y <- y[ok]; yhat <- yhat[ok]
if (length(y) < 3) {
return(list(n = length(y), ME = NA_real_, MAE = NA_real_, RMSE = NA_real_,
R2 = NA_real_, RPIQ = NA_real_, RPD = NA_real_))
}
err <- yhat - y
me <- mean(err)
mae <- mean(abs(err))
rmse <- sqrt(mean(err^2))
ss_res <- sum((y - yhat)^2)
ss_tot <- sum((y - mean(y))^2)
r2 <- if (ss_tot == 0) NA_real_ else 1 - ss_res / ss_tot
rpiq <- stats::IQR(y) / rmse
rpd <- stats::sd(y) / rmse
list(n = length(y), ME = me, MAE = mae, RMSE = rmse, R2 = r2, RPIQ = rpiq, RPD = rpd)
}
as_df_metrics <- function(x) {
data.frame(
n = x$n,
ME = round(x$ME, 2),
MAE = round(x$MAE, 2),
RMSE = round(x$RMSE, 2),
R2 = round(x$R2, 2),
RPIQ = round(x$RPIQ, 2),
RPD = round(x$RPD, 2),
stringsAsFactors = FALSE
)
}# Reflectance → absorbance
spcA <- log(1 / as.matrix(datsoilspc$spc))
# Resample to 5 nm
oldWavs <- as.numeric(colnames(spcA))
newWavs <- seq(min(oldWavs), max(oldWavs), by = 5)
spcARs <- prospectr::resample(
X = spcA,
wav = oldWavs,
new.wav = newWavs,
interpol = "linear"
)
# SNV + moving average
spcASnv <- prospectr::standardNormalVariate(spcARs)
spcAMovav <- prospectr::movav(spcASnv, w = 11)
# Store in object to match book-style workflows
datsoilspc$spcAMovav <- spcAMovavmatplot(
x = as.numeric(colnames(datsoilspc$spcAMovav)),
y = t(datsoilspc$spcAMovav),
xlab = "Wavelength / nm",
ylab = "Absorbance (SNV + movav)",
type = "l", lty = 1,
col = rgb(0.5, 0.5, 0.5, alpha = 0.25)
)Preprocessed spectra (SNV + movav)
maxc <- 30
pls_fit <- pls::plsr(
TotalCarbon ~ spcAMovav,
data = datC,
method = "oscorespls",
ncomp = maxc,
validation = "CV"
)
plot(pls_fit, "val", main = "PLS CV performance", xlab = "Number of components")PLS CV performance
nc <- 14
pls_pred_C <- as.numeric(predict(pls_fit, ncomp = nc, newdata = datC$spcAMovav))
pls_pred_V <- as.numeric(predict(pls_fit, ncomp = nc, newdata = datV$spcAMovav))
pls_cal <- eval_quant(datC$TotalCarbon, pls_pred_C)
pls_tst <- eval_quant(datV$TotalCarbon, pls_pred_V)
rbind(
cbind(Model = "PLS", Split = "Calibration (datC)", as_df_metrics(pls_cal)),
cbind(Model = "PLS", Split = "TEST (datV)", as_df_metrics(pls_tst))
)
#> Model Split n ME MAE RMSE R2 RPIQ RPD
#> 1 PLS Calibration (datC) 293 0.00 0.37 0.56 0.86 2.04 2.63
#> 2 PLS TEST (datV) 98 0.02 0.36 0.52 0.69 2.34 1.81tab <- rbind(
cbind(Model = "PLS", Split = "Calibration (datC)", as_df_metrics(pls_cal)),
cbind(Model = "PLS", Split = "TEST (datV)", as_df_metrics(pls_tst)),
cbind(Model = "soilVAE",Split = "Train (internal)", as_df_metrics(vae_tr)),
cbind(Model = "soilVAE",Split = "Val (internal)", as_df_metrics(vae_va)),
cbind(Model = "soilVAE",Split = "TEST (datV)", as_df_metrics(vae_te))
)
row.names(tab) <- NULL
tab