The bayesiansurpriser package is designed for seamless
integration with the sf package for spatial data. This
vignette demonstrates workflows for analyzing spatial data.
# Load the NC SIDS dataset
nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
# Examine the data
head(nc[, c("NAME", "SID74", "BIR74", "SID79", "BIR79")])
#> Simple feature collection with 6 features and 5 fields
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: -81.74107 ymin: 36.07282 xmax: -75.77316 ymax: 36.58965
#> Geodetic CRS: NAD27
#> NAME SID74 BIR74 SID79 BIR79 geometry
#> 1 Ashe 1 1091 0 1364 MULTIPOLYGON (((-81.47276 3...
#> 2 Alleghany 0 487 3 542 MULTIPOLYGON (((-81.23989 3...
#> 3 Surry 5 3188 6 3616 MULTIPOLYGON (((-80.45634 3...
#> 4 Currituck 1 508 2 830 MULTIPOLYGON (((-76.00897 3...
#> 5 Northampton 9 1421 3 1606 MULTIPOLYGON (((-77.21767 3...
#> 6 Hertford 7 1452 5 1838 MULTIPOLYGON (((-76.74506 3...The surprise() function automatically detects sf objects
and uses the appropriate method:
# Get surprise values
surprise_vals <- get_surprise(result, "surprise")
head(surprise_vals)
#> [1] 0.1952858 0.1107616 0.2909684 0.1171487 0.5777373 0.5143742
# Get signed surprise values
signed_vals <- get_surprise(result, "signed")
head(signed_vals)
#> [1] -0.1952858 -0.1107616 -0.2909684 -0.1171487 0.5777373 0.5143742The result is a full sf object, so all sf operations work:
# Filter high-surprise regions
high_surprise <- result[result$surprise > median(result$surprise), ]
cat("Regions with above-median surprise:", nrow(high_surprise), "\n")
#> Regions with above-median surprise: 50
# Compute centroids
centroids <- st_centroid(result)
#> Warning: st_centroid assumes attributes are constant over geometriesggplot(result) +
geom_sf(aes(fill = surprise)) +
scale_fill_surprise() +
labs(title = "Bayesian Surprise: NC SIDS 1974")ggplot(result) +
geom_sf(aes(fill = signed_surprise)) +
scale_fill_surprise_diverging() +
labs(title = "Signed Surprise",
subtitle = "Red = over-representation, Blue = under-representation")# Compute surprise for two time periods
result_74 <- surprise(nc, observed = SID74, expected = BIR74)
result_79 <- surprise(nc, observed = SID79, expected = BIR79)
# Compare
nc$surprise_74 <- result_74$surprise
nc$surprise_79 <- result_79$surprise
nc$surprise_change <- nc$surprise_79 - nc$surprise_74
# Plot change
ggplot(nc) +
geom_sf(aes(fill = surprise_change)) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red",
name = "Change in\nSurprise") +
labs(title = "Change in Surprise: 1974 to 1979")# Create sophisticated model space
custom_space <- model_space(
bs_model_uniform(),
bs_model_baserate(nc$BIR74),
bs_model_gaussian(),
bs_model_funnel(nc$BIR74),
prior = c(0.1, 0.3, 0.2, 0.4)
)
result_custom <- surprise(nc, observed = SID74, expected = BIR74,
models = custom_space)
# Compute a global posterior model update explicitly if needed
updated_space <- bayesian_update(custom_space, nc$SID74)
cat("Prior:", custom_space$prior, "\n")
#> Prior: 0.1 0.3 0.2 0.4
cat("Posterior:", updated_space$posterior, "\n")
#> Posterior: 0.1986683 0.7875552 2.028479e-151 0.01377646sample_frac can be
faster