The bayesiansurpriser package provides seamless ggplot2
integration through custom scales and computed surprise values that can
be mapped to aesthetics.
The recommended workflow is to compute surprise first, then use ggplot2:
# Compute surprise
result <- surprise(nc, observed = SID74, expected = BIR74)
# Plot with ggplot2 using geom_sf
ggplot(result) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise() +
labs(title = "Bayesian Surprise Map")For absolute surprise values (always positive):
ggplot(result) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise(option = "inferno") +
labs(title = "Inferno Palette")Available viridis options: “viridis”, “magma”, “plasma”, “inferno”, “cividis”, “rocket”, “mako”, “turbo”
p1 <- ggplot(result) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise(option = "viridis") +
labs(title = "Viridis")
p2 <- ggplot(result) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise(option = "plasma") +
labs(title = "Plasma")
p1
p2For signed surprise (positive = over-representation, negative = under-representation):
ggplot(result) +
geom_sf(aes(fill = signed_surprise)) +
scale_fill_surprise_diverging() +
labs(title = "Diverging Scale for Signed Surprise")Custom colors:
ggplot(result) +
geom_sf(aes(fill = signed_surprise)) +
scale_fill_surprise_diverging(
low = "#2166AC", # Blue
mid = "#F7F7F7", # Light gray
high = "#B2182B" # Red
) +
labs(title = "Custom Diverging Colors")# Top 5 most surprising counties
top5 <- result[order(-result$surprise), ][1:5, ]
ggplot(result) +
geom_sf(aes(fill = surprise)) +
geom_sf_text(data = top5, aes(label = NAME), size = 3) +
scale_fill_surprise() +
labs(title = "Top 5 Most Surprising Counties Labeled")
#> Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
#> give correct results for longitude/latitude data# Compare two time periods
result74 <- surprise(nc, observed = SID74, expected = BIR74)
result79 <- surprise(nc, observed = SID79, expected = BIR79)
result74$period <- "1974-78"
result79$period <- "1979-84"
combined <- rbind(result74, result79)
ggplot(combined) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise() +
facet_wrap(~period) +
labs(title = "Surprise by Time Period")ggplot(result) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise(name = "Surprise\n(bits)") +
labs(
title = "Bayesian Surprise: NC SIDS Data",
subtitle = "Identifying unexpectedly high/low SIDS rates",
caption = "Data: NC SIDS 1974-78"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.key.width = unit(2, "cm")
)For non-spatial data, use standard ggplot2 geoms after computing surprise:
# Create example data
df <- data.frame(
region = LETTERS[1:10],
observed = c(50, 120, 80, 200, 45, 150, 90, 180, 60, 110),
expected = c(100, 100, 100, 100, 100, 100, 100, 100, 100, 100) * 10
)
result_df <- surprise(df, observed = observed, expected = expected)
ggplot(result_df, aes(x = reorder(region, -surprise), y = surprise)) +
geom_col(aes(fill = surprise)) +
scale_fill_surprise() +
labs(x = "Region", y = "Surprise (bits)",
title = "Surprise by Region") +
theme_minimal()