README

This package provides functions for performing split robust least angle regression.

Installation

{r installation, eval = FALSE} install.packages("srlars", dependencies = TRUE)

library(devtools)
devtools::install_github("AnthonyChristidis/srlars")

Usage

library(srlars)
library(mvnfast)

# --- 1. Simulation Parameters ---

n <- 50
p <- 100
rho.within <- 0.8
rho.between <- 0.2
p.active <- 20
group.size <- 5
snr <- 3
contamination.prop <- 0.1

# Setting the seed
set.seed(0)

# --- 2. Data Generation ---

# Block correlation structure
sigma.mat <- matrix(0, p, p)
sigma.mat[1:p.active, 1:p.active] <- rho.between
for(group in 0:(p.active/group.size - 1))
  sigma.mat[(group*group.size+1):(group*group.size+group.size),
  (group*group.size+1):(group*group.size+group.size)] <- rho.within
diag(sigma.mat) <- 1

# True coefficient vector
true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active))

# Noise level
sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr))

# Generate uncontaminated training data
x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat)
colnames(x) <- paste0("V", 1:p)
y <- as.numeric(x %*% true.beta + rnorm(n, 0, sigma))

# Generate test data
m <- 2e3
x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat)
colnames(x_test) <- paste0("V", 1:p)
y_test <- as.numeric(x_test %*% true.beta + rnorm(m, 0, sigma))

# --- 3. Introduce Contamination ---

# Cellwise contamination
contamination_indices <- sample(1:(n * p), round(n * p * contamination.prop))
x_train <- x
x_train[contamination_indices] <- runif(length(contamination_indices), -10, 10)
y_train <- y

# --- 4. Fit srlars Model ---

# Fit the FSCRE ensemble
# We use 5 sub-models and the new robust configurations
fit <- srlars(x_train, y_train,
              n_models = 5,
              tolerance = 1e-4,
              x_preprocess = "ddc",
              y_preprocess = "wrap",
              cor_estimator = "wrap",
              cv_preprocess = "global",
              cv_fit = "ls",
              cv_loss = "huber",
              compute_coef = TRUE)

# --- 5. Prediction and Evaluation ---

# Predict on new data
# This automatically applies the trained DDC transform to the test predictors
preds <- predict(fit, newx = x_test)

# Evaluate MSPE
mspe <- mean((y_test - preds)^2) / sigma^2
print(paste("MSPE:", round(mspe, 3)))

# Extract Coefficients (averaged over the ensemble)
coefs <- coef(fit)

# Variable Selection Metrics
selected_indices <- which(coefs[-1] != 0)
true_indices <- which(true.beta != 0)

recall <- length(intersect(selected_indices, true_indices)) / length(true_indices)

if (length(selected_indices) > 0) {
  precision <- length(intersect(selected_indices, true_indices)) / length(selected_indices)
} else {
  precision <- 0
}

print(paste("Recall:", round(recall, 3)))
print(paste("Precision:", round(precision, 3)))

srlars

Installation

Usage

License