endogenr

The goal of endogenr is to make it easy to simulate dynamic systems from regression models, mathematical equations, and exogenous inputs (either based on a stochastic distribution, or given by some data). We assume a panel-data structure, with two variables denoting the time and the space dimensions.

The simulator works by identifying the dependency graph of the models added to the system, and deriving the order of calculation based on this graph.

It works with parallel simulation using the future::multisession approach.

Installation

You can install the development version of endogenr from GitHub with:

# install.packages("pak")
pak::pak("prio-data/endogenr")
# alternatively with
renv::install("prio-data/endogenr")

You can also clone (download) the repository, open it as a project in RStudio, find the “Build” tab, and press “Install”.

Example

library(endogenr)
library(fable)
#> Loading required package: fabletools
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
df <- endogenr::example_data
df <- tsibble::as_tsibble(df, key = "gwcode", index = "year")

e1 <- gdppc_grwt ~ lag(yjbest) + lag(gdppc_grwt) + lag(log(gdppc)) + lag(psecprop) + lag(zoo::rollmean(gdppc_grwt, k = 3, fill = NA, align = "right"))
c1 <- yjbest ~ lag(yjbest) + lag(log(gdppc)) + lag(log(population)) + lag(psecprop) + lag(dem) + lag(gdppc_grwt) + lag(zoo::rollmean(yjbest, k = 5, fill = NA, align = "right"))
d1 <- dem ~ lag(dem) + lag(gdppc_grwt) + lag(log(gdppc)) + lag(yjbest) + lag(psecprop) + lag(zoo::rollmean(dem, k = 3, fill = NA, align = "right"))

model_system <- list(
  build_model("deterministic",formula = gdppc ~ I(abs(lag(gdppc)*(1+gdppc_grwt)))),
  build_model("deterministic", formula = gdp ~ I(abs(gdppc*population))),
  build_model("parametric_distribution", formula = ~gdppc_grwt, distribution = "norm"),
  build_model("linear", formula = c1, boot = "resid"),
  build_model("univariate_fable", formula = dem ~ error("A") + trend("N") + season("N"), method = "ets"),
  build_model("exogen", formula = ~psecprop),
  build_model("exogen", formula = ~population)
)

simulator_setup <- setup_simulator(models = model_system,
                                   data = df,
                                   train_start = 1970,
                                   test_start = 2010,
                                   horizon = 12,
                                   groupvar = "gwcode",
                                   timevar = "year",
                                   inner_sims = 2,
                                   min_window = 10)

set.seed(42)
res <- simulate_endogenr(nsim = 2, simulator_setup = simulator_setup, parallel = F)

# Example that you can back-transform variables you have simulated as transformed variables
scaled_logit <- function(x, lower=0, upper=1){
  log((x-lower)/(upper-x))
}
inv_scaled_logit <- function(x, lower=0, upper=1){
  (upper-lower)*exp(x)/(1+exp(x)) + lower
}
my_scaled_logit <- fabletools::new_transformation(scaled_logit, inv_scaled_logit)

yj <- scales::transform_yj(p = 0.4) # This was transformed using lambda 0.4 when creating the data.
# Back-transform
res <- res |> dplyr::mutate(v2x_libdem = inv_scaled_logit(dem),
                            best = yj$inverse(yjbest))

res <- tsibble::tsibble(res, key = c(simulator_setup$groupvar, ".sim"), index = simulator_setup$timevar) |>
  dplyr::filter(year >= simulator_setup$test_start)

acc <- get_accuracy(res, "gdppc_grwt", df)
#> Warning: The dimnames of the fable's distribution are missing and have been set
#> to match the response variables.
acc |> summarize(across(crps:winkler, ~ mean(.x))) |> arrange(crps) |> knitr::kable()

crps	mae	winkler
0.034463	0.0430068	0.149843

plotsim(res, "gdppc", c(2, 20, 530), df)

plotsim(res, "gdppc_grwt", c(2, 20, 530), df)

plotsim(res, "v2x_libdem", c(2, 20, 530), df)

plotsim(res, "best", c(2, 20, 530), df)

Spatial Lag

endogenr supports spatial lag variables in the simulation. The setup involves two steps: (1) computing the spatial lag for the historical data, and (2) registering a spatial_lag model in the model system so the lag is recomputed at each simulated time step.

Step 1: Prepare the spatial weights and compute the historical lag

The convenience function st_weights_from_sf creates a neighborhood list and weights from an sf object. It defaults to queen contiguity. For more advanced weight schemes, use the sfdep package directly and supply the neighborhood list (nb), weights (wt), and the unit IDs in the order they appear in the spatial object.

Note that computing the spatial lag for the historical data can be fiddly when the panel is unbalanced or has missing observations. The example below filters the data to units present in the neighborhood structure before computing the lag.

library(endogenr)
library(dplyr)

df <- endogenr::example_data
df <- tsibble::as_tsibble(df, key = "gwcode", index = "year")

# Load a map and filter to units present in the data
map <- poldat::cshp_gw_modifications(france_overseas = FALSE) |>
  dplyr::filter(end == as.Date("2019-12-31"))
map <- map |> dplyr::filter(gwcode %in% unique(df$gwcode))

# Build spatial weights (queen contiguity by default)
sf::sf_use_s2(FALSE)
neigh <- st_weights_from_sf(map, "gwcode", weights_args = list(allow_zero = TRUE))

# Compute the spatial lag of yjbest for each year in the historical data
df <- df |>
  dplyr::filter(gwcode %in% neigh$unit_ids) |>
  as_tibble() |>
  group_by(year) |>
  dplyr::mutate(
    sl_yjbest = sfdep::st_lag(yjbest, neigh$nb, neigh$wt, allow_zero = TRUE)
  )

df <- tsibble::tsibble(df, key = "gwcode", index = "year")

Step 2: Add a `spatial_lag` model to the system

A spatial_lag model works like a deterministic model — it is a transformation applied at each simulated time step t. This ensures the spatial lag variable is updated as the underlying variable evolves during simulation.

One important consideration is how the spatial lag variable enters other model formulas. Using yjbest ~ sl_yjbest will not work because sl_yjbest is computed in the same period as yjbest, creating a circular dependency. Instead, use lag(sl_yjbest) so the conflict model uses the spatial lag from the previous period.

e1 <- gdppc_grwt ~ lag(yjbest) + lag(gdppc_grwt) + lag(log(gdppc)) + lag(psecprop) +
  lag(zoo::rollmean(gdppc_grwt, k = 3, fill = NA, align = "right"))
c1 <- yjbest ~ lag(yjbest) + lag(sl_yjbest) + lag(log(gdppc)) + lag(log(population)) +
  lag(psecprop) + lag(dem) + lag(gdppc_grwt) +
  lag(zoo::rollmean(yjbest, k = 5, fill = NA, align = "right"))
d1 <- dem ~ lag(dem) + lag(gdppc_grwt) + lag(log(gdppc)) + lag(yjbest) + lag(psecprop) +
  lag(zoo::rollmean(dem, k = 3, fill = NA, align = "right"))

model_system <- list(
  # spatial_lag recomputes sl_yjbest from yjbest at each simulated t
  build_model("spatial_lag", formula = sl_yjbest ~ yjbest,
              nb = neigh$nb, wt = neigh$wt, unit_ids = neigh$unit_ids,
              island_default = 0),
  build_model("deterministic", formula = gdppc ~ I(abs(lag(gdppc) * (1 + gdppc_grwt)))),
  build_model("deterministic", formula = gdp ~ I(abs(gdppc * population))),
  build_model("parametric_distribution", formula = ~gdppc_grwt, distribution = "norm"),
  build_model("linear", formula = c1, boot = "resid"),
  build_model("univariate_fable", formula = dem ~ error("A") + trend("N") + season("N"),
              method = "ets"),
  build_model("exogen", formula = ~psecprop),
  build_model("exogen", formula = ~population)
)

simulator_setup <- setup_simulator(
  models = model_system,
  data = df,
  train_start = 1970,
  test_start = 2010,
  horizon = 12,
  groupvar = "gwcode",
  timevar = "year",
  inner_sims = 2,
  min_window = 10
)

set.seed(42)
res <- simulate_endogenr(nsim = 2, simulator_setup = simulator_setup, parallel = FALSE)

Name		Name	Last commit message	Last commit date
Latest commit History 47 Commits
R		R
data		data
man		man
renv		renv
.Rbuildignore		.Rbuildignore
.Rprofile		.Rprofile
.gitignore		.gitignore
DESCRIPTION		DESCRIPTION
LICENSE.md		LICENSE.md
NAMESPACE		NAMESPACE
README.Rmd		README.Rmd
README.md		README.md
endogenr.Rproj		endogenr.Rproj
lm_prediction_interval_test.R		lm_prediction_interval_test.R
renv.lock		renv.lock

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

endogenr

Installation

Example

Spatial Lag

Step 1: Prepare the spatial weights and compute the historical lag

Step 2: Add a `spatial_lag` model to the system

About

Uh oh!

Releases

Packages

Uh oh!

Contributors

Uh oh!

Languages

Folders and files

Latest commit

History

Repository files navigation

endogenr

Installation

Example

Spatial Lag

Step 1: Prepare the spatial weights and compute the historical lag

Step 2: Add a spatial_lag model to the system

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Uh oh!

Contributors

Uh oh!

Languages

Step 2: Add a `spatial_lag` model to the system

Packages