Analyzing Microbial Social Behavior with bsocialv2

Curated Workflow

Loading example data

The package ships with example data in inst/extdata/. Two files are needed for the curated workflow:

consortia.csv – a presence/absence matrix indicating which strains are in each consortium.
curated_MTBE.csv – pre-processed growth parameters (Consortia, LogPhase, NGen, GR).

library(bsocialv2)

# Read the consortia presence/absence matrix
consortia <- read.csv(
  system.file("extdata", "consortia.csv", package = "bsocialv2")
)
if (!"Consortia" %in% colnames(consortia)) colnames(consortia)[1] <- "Consortia"

# Read the curated (pre-processed) growth parameters
curated <- read.csv(
  system.file("extdata", "curated_MTBE.csv", package = "bsocialv2")
)
if (!"Consortia" %in% colnames(curated)) colnames(curated)[1] <- "Consortia"

head(consortia)
#>   Consortia A5 A6 DD1 DD8 EE6 MS2 SH7
#> 1        X1  1 NA  NA  NA  NA  NA  NA
#> 2        X2 NA  2  NA  NA  NA  NA  NA
#> 3        X3 NA NA   3  NA  NA  NA  NA
#> 4        X4 NA NA  NA   4  NA  NA  NA
#> 5        X5 NA NA  NA  NA   5  NA  NA
#> 6        X6 NA NA  NA  NA  NA   6  NA
head(curated)
#>   Consortia  LogPhase     NGen         GR
#> 1        X1 21.333333 3.147634 0.61154986
#> 2        X2 51.333333 3.487967 0.06699273
#> 3        X3 28.666667 3.601326 0.12580285
#> 4        X4  7.333333 1.616069 0.30840651
#> 5        X5 19.000000 3.197332 0.15723236
#> 6        X6 29.000000 4.820328 0.16668731

Creating the bsocial object

Create a new bsocial object, set the project identifier, define the strain names, and populate the raw-data slots:

obj <- new("bsocial")
obj@id_proyecto <- "MTBE_example"

# Strain names are all columns in consortia except "Consortia"
obj@cepas_seleccionadas <- setdiff(colnames(consortia), "Consortia")

# Populate the raw-data list
obj@datos_crudos <- list(
  consortia = consortia,
  curated   = curated
)

Step 1: Transform curated data

transform_curated_data() joins the curated growth parameters with the consortia presence/absence matrix and stores the result in datos_procesados:

obj <- transform_curated_data(obj)
#> [2026-04-28 19:27:46][INFO] transform_curated_data()
head(obj@datos_procesados)
#>   Consortia group_id A5 A6 DD1 DD8 EE6 MS2 SH7  LogPhase     NGen         GR
#> 1        X1       NA  1 NA  NA  NA  NA  NA  NA 21.333333 3.147634 0.61154986
#> 2        X2       NA NA  2  NA  NA  NA  NA  NA 51.333333 3.487967 0.06699273
#> 3        X3       NA NA NA   3  NA  NA  NA  NA 28.666667 3.601326 0.12580285
#> 4        X4       NA NA NA  NA   4  NA  NA  NA  7.333333 1.616069 0.30840651
#> 5        X5       NA NA NA  NA  NA   5  NA  NA 19.000000 3.197332 0.15723236
#> 6        X6       NA NA NA  NA  NA  NA   6  NA 29.000000 4.820328 0.16668731

Step 2: Analyze growth

analyze_growth() creates a scatter plot of LogPhase vs NGen colored by consortium diversity (number of strains), and generates top-10 tables ranked by NGen and GR:

obj <- analyze_growth(obj)
#> [2026-04-28 19:27:46][INFO] analyze_growth()

# View the scatter plot
obj@graficos$growth_scatter


# Top 10 consortia by number of generations
obj@resultados_analisis$best_10_ngen
#>     Consortia group_id LogPhase     NGen        GR
#> 7          X7       NA 33.33333 5.858597 0.1724535
#> 6          X6       NA 29.00000 4.820328 0.1666873
#> 8          X8       NA 21.66667 4.764694 0.2382520
#> 36        X36       NA 24.00000 4.641515 0.1935738
#> 15        X15       NA 23.33333 4.552552 0.2032683
#> 9          X9       NA 42.66667 4.227527 0.1000704
#> 29        X29       NA 22.66667 4.113447 0.1813120
#> 57        X57       NA 22.66667 4.081036 0.1792061
#> 106       X79       NA 26.66667 4.072223 0.1529075
#> 113       X86       NA 20.66667 4.032337 0.1950273

Step 3: Analyze social behavior

analyze_social_behavior() compares each strain’s fitness in consortia versus its monoculture baseline. It produces boxplots showing relative fitness split by whether each strain is present or absent:

obj <- analyze_social_behavior(obj)
#> [2026-04-28 19:27:46][INFO] analyze_social_behavior(): iniciando

# Fitness boxplots (NGen and GR)
obj@resultados_analisis$social_behavior$social_generations_plot

obj@resultados_analisis$social_behavior$social_gr_plot

Step 4: Classify strains

summarize_social_behavior() uses pairwise t-tests and median comparisons to classify each strain as a cooperator (positive), cheater (negative), or neutral:

obj <- summarize_social_behavior(obj)
#> [2026-04-28 19:27:48][INFO] summarize_social_behavior(): iniciando

# Classification based on number of generations
obj@resultados_analisis$summary_gen
#> $positives
#> character(0)
#> 
#> $negatives
#> [1] "A5"  "DD1" "DD8"
#> 
#> $neutrals
#> [1] "A6"  "EE6" "MS2" "SH7"

# Classification based on growth rate
obj@resultados_analisis$summary_gr
#> $positives
#> [1] "MS2"
#> 
#> $negatives
#> [1] "A6"  "DD1"
#> 
#> $neutrals
#> [1] "A5"  "DD8" "EE6" "SH7"

Step 5: Analyze diversity effects

analyze_diversity() examines the relationship between consortium diversity (number of strains) and fitness. It produces boxplots of relative fitness across diversity levels and a “top-k” analysis:

obj <- analyze_diversity(obj)
#> [2026-04-28 19:27:48][INFO] analyze_diversity()

# Diversity effect on fitness (NGen)
obj@graficos$diversity_gen_plot


# Diversity effect on fitness (GR)
obj@graficos$diversity_gr_plot

Step 6: Analyze stability

analyze_stability() calculates how consistent growth metrics are across diversity levels. For curated data it groups consortia by diversity and produces violin plots:

obj <- analyze_stability(obj)
#> [2026-04-28 19:27:48][INFO] analyze_stability(): iniciando
#> [2026-04-28 19:27:48][INFO] analyze_stability(): calculando CV agrupando por diversidad (datos curated)
#> Warning in cor.test.default(plot_data[[x_col]], plot_data[[y_col]], method =
#> "spearman"): Cannot compute exact p-value with ties

#> Warning in cor.test.default(plot_data[[x_col]], plot_data[[y_col]], method =
#> "spearman"): Cannot compute exact p-value with ties
#> [2026-04-28 19:27:48][INFO] analyze_stability(): completado

# Stability violin plots
obj@graficos$stability_ngen_plot
#> Warning: Groups with fewer than two datapoints have been dropped.
#> i Set `drop = FALSE` to consider such groups for position adjustment purposes.
#> `geom_smooth()` using formula = 'y ~ x'
#> Warning: `position_dodge()` requires non-overlapping x intervals.

obj@graficos$stability_gr_plot
#> Warning: Groups with fewer than two datapoints have been dropped.
#> i Set `drop = FALSE` to consider such groups for position adjustment purposes.
#> `geom_smooth()` using formula = 'y ~ x'
#> Warning: `position_dodge()` requires non-overlapping x intervals.

Step 7: Find biofilm assembly sequences

analyze_biofilm_sequence() builds a directed graph of consortium assembly paths. It finds shortest paths from monocultures to the full consortium, based on increasing diversity and fitness:

obj <- analyze_biofilm_sequence(obj)

# The paths are stored as named lists
str(obj@resultados_analisis$biofilm_gen_paths, max.level = 2)
#> List of 7
#>  $ X1: list()
#>  $ X2: list()
#>  $ X3: list()
#>  $ X4:List of 8
#>   ..$ : chr [1:7] "X4" "X11" "X47" "X112" ...
#>   ..$ : chr [1:7] "X4" "X11" "X49" "X112" ...
#>   ..$ : chr [1:7] "X4" "X11" "X39" "X99" ...
#>   ..$ : chr [1:7] "X4" "X11" "X49" "X99" ...
#>   ..$ : chr [1:7] "X4" "X11" "X39" "X99" ...
#>   ..$ : chr [1:7] "X4" "X11" "X49" "X99" ...
#>   ..$ : chr [1:7] "X4" "X11" "X47" "X112" ...
#>   ..$ : chr [1:7] "X4" "X11" "X49" "X112" ...
#>  $ X5: list()
#>  $ X6: list()
#>  $ X7: list()

# Plot the assembly graph (base R plot)
obj@graficos$biofilm_gen_plot_func()

Raw Workflow (Overview)

When working with raw plate reader data the workflow starts with CSV files exported from the plate reader. Each plate CSV contains OD readings over time. The six example plates are included in the package (plate1.csv through plate6.csv).

The following code is not executed here because it requires raw plate files with specific formatting. It illustrates the additional preprocessing steps.

library(bsocialv2)
library(readr)

obj <- new("bsocial")
obj@id_proyecto <- "raw_example"

# Read consortia matrix
consortia <- read.csv(
  system.file("extdata", "consortia.csv", package = "bsocialv2")
)
if (!"Consortia" %in% colnames(consortia)) colnames(consortia)[1] <- "Consortia"

obj@cepas_seleccionadas <- setdiff(colnames(consortia), "Consortia")

# Read plate CSVs
plate_files <- paste0("plate", 1:6, ".csv")
plates <- lapply(plate_files, function(f) {
  read_csv(system.file("extdata", f, package = "bsocialv2"), show_col_types = FALSE)
})

obj@datos_crudos <- list(
  consortia = consortia,
  plates    = plates,
  type      = "raw"
)

# Step 1: Preprocess raw data (background correction + replicate aggregation)
#   groups    - integer vector assigning plates to replicate groups
#   bg_type   - "blank" (subtract blank well) or "threshold" (subtract OD value)
#   bg_param  - blank well ID (for "blank") or numeric threshold (for "threshold")
obj <- transform_raw_data(obj,
  groups   = c(1, 1, 2, 2, 3, 3),
  bg_type  = "blank",
  bg_param = "BLANK"
)

# Step 2: Calculate growth parameters
obj <- calculate_growth_params(obj, method = "growthcurver")

# Optional: visualize processed curves
obj <- plot_processed_curves(obj)
obj@graficos$processed_curves_plot

# Step 3 onwards: same pipeline as curated workflow
obj <- analyze_growth(obj)
obj <- analyze_social_behavior(obj)
obj <- summarize_social_behavior(obj)
obj <- analyze_diversity(obj)
obj <- analyze_stability(obj)
obj <- analyze_biofilm_sequence(obj)

Accessing Results

After running the full pipeline, results are stored in the bsocial object’s slots. Here is a summary of key outputs:

Slot	Key	Description
`datos_procesados`	(data frame)	Merged consortia + growth parameters
`resultados_analisis`	`best_10_ngen`	Top 10 consortia by NGen
`resultados_analisis`	`best_10_gr`	Top 10 consortia by GR
`resultados_analisis`	`social_behavior`	Fitness comparison data and plots
`resultados_analisis`	`summary_gen`	Strain classification (NGen)
`resultados_analisis`	`summary_gr`	Strain classification (GR)
`resultados_analisis`	`diversity_gen_table`	Diversity effect matrix (NGen)
`resultados_analisis`	`diversity_gr_table`	Diversity effect matrix (GR)
`resultados_analisis`	`stability_cv_data`	Stability CV data
`resultados_analisis`	`biofilm_gen_paths`	Assembly paths (NGen)
`resultados_analisis`	`biofilm_gr_paths`	Assembly paths (GR)
`graficos`	`growth_scatter`	LogPhase vs NGen scatter (ggplot)
`graficos`	`diversity_gen_plot`	Diversity boxplot, NGen (ggplot)
`graficos`	`diversity_gr_plot`	Diversity boxplot, GR (ggplot)
`graficos`	`stability_ngen_plot`	Stability violin, NGen (ggplot)
`graficos`	`stability_gr_plot`	Stability violin, GR (ggplot)
`graficos`	`biofilm_gen_plot_func`	Assembly graph plot function (NGen)
`graficos`	`biofilm_gr_plot_func`	Assembly graph plot function (GR)