diff --git a/examples/3_rowcrop/01_ERA5_nc_to_clim.R b/examples/3_rowcrop/01_ERA5_nc_to_clim.R
index 0fc5e51..42aa45e 100755
--- a/examples/3_rowcrop/01_ERA5_nc_to_clim.R
+++ b/examples/3_rowcrop/01_ERA5_nc_to_clim.R
@@ -84,8 +84,8 @@ furrr::future_pwalk(
         args$site_era5_path,
         paste("ERA5", site_id, ens_id, sep = "_")
       ),
-      start_date = args$start_date,
-      end_date = args$end_date,
+      start_date = start_date,
+      end_date = end_date,
       in.prefix = paste0("ERA5.", ens_id),
       outfolder = file.path(args$site_sipnet_met_path, site_id)
     )
diff --git a/examples/3_rowcrop/02_ic_build.R b/examples/3_rowcrop/02_ic_build.R
index 9c07737..f333760 100755
--- a/examples/3_rowcrop/02_ic_build.R
+++ b/examples/3_rowcrop/02_ic_build.R
@@ -10,7 +10,7 @@ options <- list(
     help = "CSV giving ids, locations, and PFTs for sites of interest"
   ),
   optparse::make_option("--field_shape_path",
-    default = "data_raw/dwr_map/i15_Crop_Mapping_2018.gdb",
+    default = "data_raw/management/crops/v4.1/parcels-consolidated.gpkg",
     help = "file containing site geometries, used for extraction from rasters"
   ),
   optparse::make_option("--ic_ensemble_size",
@@ -49,7 +49,7 @@ options <- list(
     )
   ),
   optparse::make_option("--params_read_from_pft",
-    default = "SLA,leafC", # SLA units are m2/kg, leafC units are %
+    default = "SLA,leafC,leafGrowth", # Units: SLA m2/kg, leafC percent, leafGrowth g C/m2
     help = "Parameters to read from the PFT file, comma separated"
   ),
   optparse::make_option("--landtrendr_raw_files",
@@ -124,7 +124,7 @@ additional_params <- args$additional_params |>
 
 site_info <- read.csv(
   args$site_info_path,
-  colClasses = c(field_id = "character")
+  colClasses = c(id = "character", field_id = "character")
 )
 site_info$start_date <- args$run_start_date
 site_info$LAI_date <- args$run_LAI_date
@@ -136,7 +136,11 @@ PEcAn.logger::logger.info("Getting estimated soil carbon from SoilGrids 250m")
 soilc_csv_path <- file.path(args$data_dir, "soilgrids_soilC_data.csv")
 if (file.exists(soilc_csv_path)) {
   PEcAn.logger::logger.info("using existing soil C file", soilc_csv_path)
-  soil_carbon_est <- read.csv(soilc_csv_path, check.names = FALSE)
+  soil_carbon_est <- read.csv(
+    soilc_csv_path,
+    check.names = FALSE,
+    colClasses = c(Site_ID = "character", Site_Name = "character")
+  )
   sites_needing_soilc <- site_info |>
     filter(!id %in% soil_carbon_est$Site_ID)
 } else {
@@ -166,7 +170,9 @@ sm_outdir <- file.path(args$data_dir, "soil_moisture") |>
 sm_csv_path <- file.path(args$data_dir, "sm.csv") # name is hardcorded by fn
 if (file.exists(sm_csv_path)) {
   PEcAn.logger::logger.info("using existing soil moisture file", sm_csv_path)
-  soil_moisture_est <- read.csv(sm_csv_path)
+  soil_moisture_est <- read.csv(
+    sm_csv_path,
+    colClasses = c(site.id = "character"))
   sites_needing_soilmoist <- site_info |>
     filter(!id %in% soil_moisture_est$site.id)
 } else {
@@ -199,7 +205,11 @@ PEcAn.logger::logger.info("LAI")
 lai_csv_path <- file.path(args$data_dir, "LAI_bysite.csv")
 if (file.exists(lai_csv_path)) {
   PEcAn.logger::logger.info("using existing LAI file", lai_csv_path)
-  lai_est <- read.csv(lai_csv_path, check.names = FALSE) # TODO edit MODIS_LAI_prep to use valid colnames?
+  lai_est <- read.csv(
+    lai_csv_path,
+    check.names = FALSE, # TODO edit MODIS_LAI_prep to use valid colnames?
+    colClasses = c(site_id = "character")
+  )
   sites_needing_lai <- site_info |>
     filter(!id %in% lai_est$site_id)
 } else {
@@ -235,7 +245,10 @@ if (file.exists(landtrendr_csv_path)) {
     "using existing LandTrendr AGB file",
     landtrendr_csv_path
   )
-  agb_est <- read.csv(landtrendr_csv_path)
+  agb_est <- read.csv(
+    landtrendr_csv_path,
+    colClasses = c(site_id = "character")
+  )
   sites_needing_agb <- site_info |>
     filter(!id %in% agb_est$site_id)
 } else {
@@ -256,18 +269,19 @@ if (nagb > 0) {
   lt_sd <- terra::rast(lt_sd_path)
   field_shp <- terra::vect(args$field_shape_path)
 
-  site_bnds <- field_shp[field_shp$UniqueID %in% sites_needing_agb$field_id, ] |>
+  site_bnds <- field_shp[field_shp$parcel_id %in% sites_needing_agb$field_id, ] |>
     terra::project(lt_med)
 
   # Check for unmatched sites
   # TODO is stopping here too strict? Could reduce to warning if needed
-  stopifnot(all(sites_needing_agb$field_id %in% site_bnds$UniqueID))
+  stopifnot(all(sites_needing_agb$field_id %in% site_bnds$parcel_id))
 
   new_agb <- lt_med |>
     terra::extract(x = _, y = site_bnds, fun = mean, bind = TRUE) |>
     terra::extract(x = lt_sd, y = _, fun = mean, bind = TRUE) |>
     as.data.frame() |>
-    left_join(sites_needing_agb, by = c("UniqueID" = "field_id")) |>
+    mutate(parcel_id = as.character(parcel_id)) |>
+    left_join(sites_needing_agb, by = c("parcel_id" = "field_id")) |>
     dplyr::select(
       site_id = id,
       AGB_median_Mg_ha = ends_with("median"),
@@ -414,6 +428,9 @@ ic_samples <- initial_condition_estimated |>
   tidyr::pivot_wider(names_from = variable, values_from = sample) |>
   dplyr::left_join(pft_var_samples, by = c("site_id", "replicate")) |>
   dplyr::mutate(
+    # Experimental hack: Assume leafGrowth==0 means this is an annual crop, force starting biomass to zero even if LandTrendr says otherwise.
+    # This might be a terrible idea.
+    AbvGrndBiomass = if_else(leafGrowth == 0.0, 0, AbvGrndBiomass),
     AbvGrndWood = AbvGrndBiomass * wood_carbon_fraction,
     leaf_carbon_content = tidyr::replace_na(LAI, 0) / SLA * (leafC / 100),
     wood_carbon_content = pmax(AbvGrndWood - leaf_carbon_content, 0)
diff --git a/examples/3_rowcrop/02a_build_events.R b/examples/3_rowcrop/02a_build_events.R
new file mode 100755
index 0000000..3d30262
--- /dev/null
+++ b/examples/3_rowcrop/02a_build_events.R
@@ -0,0 +1,160 @@
+#!/usr/bin/env Rscript
+
+# Creates a single PEcAn-formatted `events.JSON` containing all management
+# events from all years of all the locations in `site_info.csv`,
+# then divides it into a Sipnet-formatted `events.in` for each site.
+
+## ---------------------- parse command-line options --------------------------
+options <- list(
+  optparse::make_option("--site_info_path",
+    default = "site_info.csv",
+    help = "CSV giving ids, locations, and PFTs for sites of interest"
+  ),
+  optparse::make_option("--mgmt_file_dir",
+    default = "data_raw/management",
+    help = paste(
+      "Directory containing management inputs in Parquet format.",
+      "Note: Code currently looks for subpaths that include hard-coded",
+      "version numbers for each management type."
+    )
+  ),
+  optparse::make_option("--event_outdir",
+    default = "data/events",
+    help = "directory to write events-*.in, events.json, and phenology.csv"
+  )
+) |>
+  # Show default values in help message
+  purrr::modify(\(x) {
+    x@help <- paste(x@help, "[default: %default]")
+    x
+  })
+
+args <- optparse::OptionParser(option_list = options) |>
+  optparse::parse_args()
+
+## -------------------------- end option parsing ------------------------------
+
+library(tidyverse)
+
+# TODO these probably deserve to be runtime args,
+# but first better fix other version-specific assumptions below
+mgmt_subdirs <- list(
+  pheno = "phenology/v1.0",
+  plant = "planting/v1.0",
+  harv = "harvest/v1.0",
+  till = "tillage/v1.0",
+  irri = "irrigation/v1.0",
+  fert = NULL, # TODO
+  occ = NULL # TODO
+)
+
+if (!dir.exists(args$event_outdir)) {
+  dir.create(args$event_outdir, recursive = TRUE)
+}
+
+ids <- read.csv(
+  args$site_info_path,
+  colClasses = c(field_id = "character")
+)$field_id
+
+# read management files from inside mgmt directory
+# TODO will break if not all mgmt types live at same path
+# (e.g. user wants to pull in their own tillage but use monitored plant/harv)
+read_parquet_years <- function(dir, parcel_ids, id_var = "parcel_id") {
+  file.path(args$mgmt_file_dir, dir) |>
+    list.files("*.parq(uet)?$", full.names = TRUE) |>
+    arrow::open_dataset() |>
+    # as.character is a hack bc planting dataset has id as string some years and int others.
+    # TODO remove when fixed upstream.
+    filter(as.character(.data[[id_var]]) %in% parcel_ids) |>
+    collect()
+}
+
+# phenology doesn't have to be json -- we'll just write one CSV of all sites and years
+pheno <- read_parquet_years(mgmt_subdirs$pheno, ids, "site_id") |>
+  # TODO some seasons wrap past year end (e.g. 2025-09-08 to 2026-03-29) --
+  # current code drops year so leafonday > leafoffday, which write.config skips
+  # as invalid.
+  mutate(
+    leafonday = lubridate::yday(leafonday),
+    leafoffday = lubridate::yday(leafoffday)
+  )
+# If csv exists already, append instead of clobbering
+# TODO keep old version of the dups instead of updating?
+# Not sure which will be less surprising
+pheno_out_path <- file.path(args$event_outdir, "phenology.csv")
+if (file.exists(pheno_out_path)) {
+  existing_pheno <- read.csv(
+    pheno_out_path,
+    colClasses = c(site_id = "character"))
+  site_dups <- intersect(pheno$site_id, existing_pheno$site_id)
+  if (length(site_dups) > 0) {
+    warning(
+      "Overwriting existing phenology records for ",
+      length(site_dups), " sites in ", pheno_out_path
+    )
+  }
+  pheno <- existing_pheno |>
+    filter(!(site_id %in% site_dups)) |>
+    bind_rows(pheno)
+}
+write.csv(pheno, file = pheno_out_path, row.names = FALSE)
+
+plant <- read_parquet_years(mgmt_subdirs$plant, ids, "site_id") |>
+  # TODO fix upstream
+  dplyr::rename(
+    leaf_c_kg_m2 = C_LEAF,
+    wood_c_kg_m2 = C_STEM,
+    fine_root_c_kg_m2 = C_FINEROOT,
+    coarse_root_c_kg_m2 = C_COARSEROOT,
+  ) |>
+  # Some planting events happen in the year previous to bulk of growth.
+  # This is only a problem at the very start of the run, where the reported
+  # planting date lands before start of simulation and causes a Sipnet error.
+  # Temporary workaround: Adjust dates forward.
+  # TODO: Should also (/instead?) adjust initial pool sizes in these cases.
+  dplyr::mutate(date = pmax(date,  "2016-01-01"))
+
+harv <- read_parquet_years(mgmt_subdirs$harv, ids, "site_id")
+
+till <- read_parquet_years(mgmt_subdirs$till, ids, "site_id") |>
+  mutate(
+    date = OGMn_date,
+    # Placeholder -- better-informed conversion under development.
+    # Note that NaNs get forced to 0. This was an arbitrary choice.
+    tillage_eff_0to1 = pmax(0, pmin(1, ndti_pct_change / 100), na.rm = TRUE)
+  )
+
+irrig <- read_parquet_years(mgmt_subdirs$irri, ids, "parcel_id") |>
+  filter(
+    # Ignore ensemble dimension. TODO support event ensembles across all event types
+    ens_id == "irr_ens_001",
+    # ignore all irrigation events before start of simulation
+    date >= "2016-01-01"
+  ) |>
+  mutate(
+    event_type = "irrigation",
+    site_id = as.character(parcel_id),
+    date = as.character(date)
+  ) |>
+  select(site_id, event_type, date, amount_mm, method)
+
+# TODO add fertilization / NCC here when available
+
+all_events <- dplyr::bind_rows(plant, harv, till, irrig) |>
+  dplyr::arrange(site_id, date, event_type) |>
+  dplyr::nest_by(site_id, .key="events") |>
+  dplyr::mutate(pecan_events_version = "0.1.0")
+
+evt_json_path <- file.path(args$event_outdir, "combined_events.json")
+if (file.exists(evt_json_path)) {
+  # TODO append to existing file like for phenology?
+  # need more care to handle nesting right.
+  warning("Overwriting existing events file ", evt_json_path)
+}
+jsonlite::write_json(all_events, evt_json_path)
+
+# Now divide the json into Sipnet events files
+PEcAn.SIPNET::write.events.SIPNET(evt_json_path, args$event_outdir)
+
+
diff --git a/examples/3_rowcrop/03_xml_build.R b/examples/3_rowcrop/03_xml_build.R
index b89e906..68ceb70 100755
--- a/examples/3_rowcrop/03_xml_build.R
+++ b/examples/3_rowcrop/03_xml_build.R
@@ -88,6 +88,8 @@ args <- optparse::OptionParser(option_list = options) |>
 ## Whew, that was a lot of lines to define a few defaults!
 
 
+# papply emits a lot of uninformative debug messages; let's ignore those
+PEcAn.logger::logger.setLevel("INFO")
 
 site_info <- read.csv(args$site_file)
 stopifnot(
@@ -157,6 +159,14 @@ settings <- settings |>
     path = args$event_dir,
     path_template = "{path}/events-{id}.in"
   ) |>
+  # For now, hard-coding one phenology path for all sites, placed inside event dir
+  # TODO make settable?
+  setEnsemblePaths(
+    n_reps = args$n_ens,
+    input_type = "leaf_phenology",
+    path = args$event_dir,
+    path_template = "{path}/phenology.csv"
+  ) |>
   papply(add_soil_pft)
 
 # Update just the first component of the output directory,
diff --git a/examples/3_rowcrop/05_run_model.R b/examples/3_rowcrop/05_run_model.R
index 6667cc4..3988c0e 100755
--- a/examples/3_rowcrop/05_run_model.R
+++ b/examples/3_rowcrop/05_run_model.R
@@ -43,12 +43,13 @@ options(error = quote({
 library("PEcAn.all")
 
 
-# Report package versions for provenance
-PEcAn.all::pecan_version()
-
 # Open and read in settings file for PEcAn run.
 settings <- PEcAn.settings::read.settings(args$settings)
 
+# Report package and model versions for provenance
+PEcAn.all::pecan_version()
+PEcAn.logger::logger.info(system2(settings$model$binary, "-v", stdout = TRUE))
+
 # Start ecosystem model runs
 if (PEcAn.utils::status.check("MODEL") == 0) {
   PEcAn.utils::status.start("MODEL")
diff --git a/examples/3_rowcrop/README.md b/examples/3_rowcrop/README.md
index 6962262..19ca40e 100644
--- a/examples/3_rowcrop/README.md
+++ b/examples/3_rowcrop/README.md
@@ -27,18 +27,14 @@ simulation period.
 
 ## Caveats
 
-TODO UPDATE when no longer true
-
-This simulation is under active development and all the notes below are subject
-to change as we update the code.
-For now, instructions assume a local run on MacOS and will be updated for a
+Instructions assume a local run on MacOS and will be updated for a
 Linux + Slurm + Apptainer HPC environment as we finish testing and deployment.
 
 Aspirationally, any command prefixed with `[host_args]` is one that ought to
 work on HPC by "just" adding a system-specific prefix, e.g.
-`./01_ERA4_nc_to_clim.R --start_date=2016-01-01` on my machine becomes
+`./01_ERA5_nc_to_clim.R --start_date=2016-01-01` on my machine becomes
 `sbatch -n16 --mem=12G --mail-type=ALL --uid=jdoe \
-	./01_ERA4_nc_to_clim.R --start_date=2016-01-01` on yours.
+	./01_ERA5_nc_to_clim.R --start_date=2016-01-01` on yours.
 
 
 ## Running the workflow
@@ -72,17 +68,14 @@ Contact chelsea.carey@arb.ca.gov for more information about the dataset.
 
 Once obtained, place them in `data_raw/private/HSP` and run
 ```{sh}
-../tools/build_validation_siteinfo.R
+../../tools/build_validation_siteinfo.R
 ```
 to create `validation_site_info.csv`.
 
 
 ### 1. Convert climate driver files
 
-TODO 1: current development version of PEcAn.sipnet still writes 13-col
-	clim files with constants for grid index and soil water. Document which version writes correctly.
-
-TODO 2: show how to pass n_cores from host_args
+TODO: show how to pass n_cores from host_args
 (NSLOTS? SLURM_CPUS_PER_TASK?)
 
 ```{sh}
@@ -91,7 +84,7 @@ TODO 2: show how to pass n_cores from host_args
 	--site_sipnet_met_path=data/ERA5_CA_SIPNET \
 	--site_info_file=data_raw/ca_half_degree_grid.csv \
 	--start_date=2016-01-01 \
-	--end_date=2025-12-31 \
+	--end_date=2023-12-31 \
 	--n_cores=7
 ```
 
@@ -110,6 +103,9 @@ so I symlinked `data/IC_prep_val/soil_moisture/` to `data/IC_prep/soil_moisture/
 	--pft_dir=data_raw/pfts \
 	--data_dir=data/IC_prep_val \
 	--ic_outdir=data/IC_files
+
+../../tools/build_site_info.R --location_file=../../data/design_points.csv
+
 [host_args] ./02_ic_build.R \
 	--site_info_path=site_info.csv \
 	--pft_dir=data_raw/pfts \
@@ -117,14 +113,65 @@ so I symlinked `data/IC_prep_val/soil_moisture/` to `data/IC_prep/soil_moisture/
 	--ic_outdir=data/IC_files
 ```
 
-### 3. generate settings file
+### 2a. Generate event files
+
+Management events are read from files produced by the monitoring pipeline or equivalent sources.
+The current version assumes all event types are provided as Parquet files, and that they live in subdirectories of `mgmt_file_dir` that include specific, currently hardcoded, version numbers. See script for the details.
+Future versions of the event_build script may support passing separate paths per data product and potentially also add support for alternate storage formats (e.g. allowing csv as well as parquet).
+
+```{sh}
+[host_args] ./02a_build_events.R \
+  --site_info_path=validation_site_info.csv \
+  --mgmt_file_dir=data_raw/management \
+  --event_outdir=data/val_events
+[host_args] ./02a_build_events.R \
+  --site_info_path=site_info.csv \
+  --mgmt_file_dir=data_raw/management \
+  --event_outdir=data/events
+```
+
+For validation we need an additional hack:
+`02a_build_events.R` created event files named by their *parcel* id,
+but the validation dataset uses a separate set of *site* ids derived by hashing locations, experiment names, and treatment codes.
+This is done to (1) keep locations opaque since the validation data are nonpublic, and (2) allow simulation of separate plots (treatments) whose locations all fall in a single parcel of the statewide map.
+A future solution would be to teach `03_xml_build.R` how to find event files that are named by parcel id, so that sites which share a parcel do not need to duplicate the file.
+For now though, let's duplicate the files of interest so they're named after the site IDs PEcAn will use. For now, I'm not cleaning up the originals afterwards -- they're small and might be used by other runs that reuse this data directory.
+
+```{r}
+vsi <- read.csv("validation_site_info.csv") |>
+  dplyr::distinct(id, field_id) |>
+  dplyr::rename(site_id = id)
+vsi |> purrr::pwalk(
+  \(site_id, field_id) file.copy(
+    paste0("data/val_events/events-", field_id, ".in"),
+    paste0("data/val_events/events-", site_id, ".in")
+  )
+)
+
+# And rename inside the phenology file, too
+read.csv("data/val_events/phenology.csv") |>
+  dplyr::rename(field_id = site_id) |>
+  dplyr::left_join(vsi) |>
+  write.csv("data/val_events/phenology.csv", row.names = FALSE)
+```
+
+
+### 3. Generate settings file
 
 ```{sh}
 [host_args] ./03_xml_build.R \
+	--end_date=2023-12-31 \
 	--ic_dir=data/IC_files \
 	--site_file=validation_site_info.csv \
+	--event_dir=data/val_events \
 	--output_file=validation_settings.xml \
 	--output_dir_name=val_out
+[host_args] ./03_xml_build.R \
+	--ic_dir=data/IC_files \
+	--end_date=2023-12-31 \
+	--site_file=site_info.csv \
+	--output_file=settings.xml \
+	--output_dir_name=output
 ```
 
 ### 4. Set up model run directories
@@ -134,14 +181,16 @@ stage instead of in xml_build.
 
 ```{sh}
 [host_args] ./04_set_up_runs.R --settings=validation_settings.xml
+host_args] ./04_set_up_runs.R --settings=settings.xml
 ```
 
 ### 5. Run model
 
 ```{sh}
 export NCPUS=8
-ln -s [your/path/to]/sipnet/sipnet sipnet.git
+ln -s [your/path/to]/sipnet/sipnet sipnet
 [host_args] ./05_run_model.R --settings=val_out/pecan.CONFIGS.xml
+[host_args] ./05_run_model.R --settings=output/pecan.CONFIGS.xml
 ```
 
 ### 6. Validate
diff --git a/examples/3_rowcrop/template.xml b/examples/3_rowcrop/template.xml
index 574014c..6386690 100644
--- a/examples/3_rowcrop/template.xml
+++ b/examples/3_rowcrop/template.xml
@@ -9,6 +9,11 @@
  <modeloutdir><!-- altered at config time -->output/out</modeloutdir>
  <rundir><!-- altered at config time -->output/run</rundir>
  <pfts>
+  <pft>
+   <name>annual_crop</name>
+   <posterior.files>data_raw/pfts/annual_crop/post.distns.Rdata</posterior.files>
+   <outdir>data_raw/pfts/annual_crop/</outdir>
+  </pft>
   <pft>
    <name>temperate.deciduous</name>
    <posterior.files>data_raw/pfts/temperate.deciduous/post.distns.Rdata</posterior.files>
@@ -44,6 +49,9 @@
    <events>
     <method>sampling</method>
    </events>
+   <leaf_phenology>
+    <method>sampling</method>
+   </leaf_phenology>
    <!-- TODO soil physics here when ready -->
   </samplingspace>
   <start.year><!-- inserted at config time --></start.year>
@@ -54,14 +62,27 @@
   <type>SIPNET</type>
   <revision>2.0.0</revision>
   <delete.raw>TRUE</delete.raw>
-  <binary>sipnet.git</binary>
+  <binary>./sipnet</binary>
   <options>
     <GDD>0</GDD>
-    <NITROGEN_CYCLE>1</NITROGEN_CYCLE>
-    <ANAEROBIC>1</ANAEROBIC>
+    <NITROGEN_CYCLE>0</NITROGEN_CYCLE>
+    <ANAEROBIC>0</ANAEROBIC>
     <LITTER_POOL>1</LITTER_POOL>
   </options>
  </model>
+ <host>
+  <name>localhost</name>
+  <outdir><!-- altered at config time -->output/out</outdir>
+  <rundir><!-- altered at config time -->output/run</rundir>
+  <!-- <qsub>sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@</qsub> -->
+  <!-- <qsub.jobid>.*job ([0-9]+).*</qsub.jobid> -->
+  <qstat>squeue -j @JOBID@ || echo DONE</qstat>
+  <modellauncher>
+    <binary>parallel -j ${NCPUS:-1} --skip-first-line '{}/job.sh' ::::</binary>
+    <!-- <qsub.extra>-a 1-@NJOBS@</qsub.extra> -->
+    <Njobmax>1000</Njobmax>
+  </modellauncher>
+ </host>
  <run>
   <site><!-- inserted at config time --></site>
   <inputs>
@@ -79,21 +100,11 @@
      <ensemble><!-- inserted at config time --></ensemble>
      <path><!-- inserted at config time --></path>
    </events>
+   <leaf_phenology>
+    <path><!-- inserted at config time --></path>
+   </leaf_phenology>
   </inputs>
   <start.date><!-- inserted at config time --></start.date>
   <end.date><!-- inserted at config time --></end.date>
  </run>
- <host>
-  <name>localhost</name>
-  <outdir><!-- altered at config time -->output/out</outdir>
-  <rundir><!-- altered at config time -->output/run</rundir>
-  <!-- <qsub>sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@</qsub> -->
-  <!-- <qsub.jobid>.*job ([0-9]+).*</qsub.jobid> -->
-  <qstat>squeue -j @JOBID@ || echo DONE</qstat>
-  <modellauncher>
-    <binary>parallel -j ${NCPUS:-1} --skip-first-line '{}/job.sh' ::::</binary>
-    <!-- <qsub.extra>-a 1-@NJOBS@</qsub.extra> -->
-    <Njobmax>1000</Njobmax>
-  </modellauncher>
- </host>
 </pecan>
diff --git a/examples/3_rowcrop/validate.R b/examples/3_rowcrop/validate.R
index 9c54442..7a3e985 100755
--- a/examples/3_rowcrop/validate.R
+++ b/examples/3_rowcrop/validate.R
@@ -29,6 +29,35 @@ args <- optparse::OptionParser(option_list = options) |>
 library(tidyverse)
 
 
+## Function definitions
+
+# Calculate change per year from successive timepoints.
+# @param df: dataframe containing at least column `year`
+#  plus any others to be converted to differences between years
+# @return dataframe one row shorter than input, with year column removed
+#  and all others converted to difference on a yearly basis
+# @examples
+# data.frame(
+#   year = c(2020, 2021, 2025),
+#   x = c(1, 2, 4),
+#   y = c(10, 5, 1)
+# ) |> diff_years()
+# #    x  y
+# # 2 1.0 -5
+# # 3 0.5 -1
+diff_years <- function(df) {
+  df |>
+  arrange(year) |>
+  # convert all vars, including year, to deltas for each timestep
+  mutate_all(\(x) (x - lag(x))) |>
+  # then scale non-time deltas to timestep length
+  mutate(across(-year, \(x) x / year)) |>
+  select(-year) |>
+  _[-1,]
+}
+
+
+
 ## Read validation data, identify target years from each site
 
 soc_obs <- read.csv(args$val_data_path) |>
@@ -91,8 +120,8 @@ soc_sim <- sim_files_wanted |>
     )
   ) |>
   unnest(contents) |>
-  group_by(ens_num, site, year) |>
-  slice_max(posix)
+  ungroup() |>
+  slice_max(posix, by = c(ens_num, site, year))
 
 ## Combine and align obs + sim
 
@@ -100,14 +129,40 @@ soc_compare <- soc_sim |>
   left_join(soc_obs) |>
   # TODO these filters need refinement --
   # eg Are NAs actually expected or should they trigger complaints?
-  drop_na(obs_SOC) |>
+  drop_na(obs_SOC) #|>
   # TODO excluded as surprisingly high
   # May want to re-include after inspecting data for individual sites
-  filter(obs_SOC < 20)
+  # filter(obs_SOC < 20)
   # TODO will eventually want to have PFTs labeled here
 
 if (!dir.exists(args$output_dir)) dir.create(args$output_dir, recursive = TRUE)
 
+# Debug use only:
+# Contains private treatment names, so do not leave this CSV sitting in your output directory
+# write.csv(
+#   soc_compare |> select(-path),
+#   file.path(args$output_dir, "soc_compare_tmp.csv"),
+#   row.names = FALSE
+# )
+
+
+
+# SOC change between sequential measurements, in g/m2/yr
+soc_timedelta <- soc_compare |>
+  # some years have multiple samples, others don't; can't track individual samples across years.
+  # Instead collapse these to treatment means
+  # TODO propagate variance?
+  summarize(
+    across(c(TotSoilCarb, obs_SOC), mean),
+    .by = c(ens_num, site, year)
+  ) |>
+  nest_by(ens_num, site) |>
+  filter(nrow(data) > 1) |>
+  mutate(yrly_diff = map(list(data), diff_years)) |>
+  unnest(yrly_diff)
+
+
+
 ## lm fit + CIs
 
 soc_fits <- soc_compare |>
@@ -147,6 +202,34 @@ soc_ci <- soc_fits |>
     pred_mean = mean(pred),
   )
 
+soc_timedelta_fits <- soc_timedelta |>
+  ungroup() |>
+  nest_by(ens_num) |>
+  mutate(
+    fit = list(lm(TotSoilCarb ~ obs_SOC, data = data)),
+    r2 = summary(fit)$adj.r.squared,
+    nse = 1 - (
+      sum((data$obs_SOC - data$TotSoilCarb)^2) /
+        sum((data$obs_SOC - mean(data$obs_SOC))^2)
+    ),
+    rmse = sqrt(mean((data$obs_SOC - data$TotSoilCarb)^2)),
+    bias = mean(data$TotSoilCarb - data$obs_SOC)
+  )
+soc_timedelta_ci <- soc_timedelta_fits |>
+  mutate(
+    predx = list(seq(min(data$obs_SOC), max(data$obs_SOC), by = 0.1)),
+    pred = list(predict(fit, data.frame(obs_SOC = predx)))
+  ) |>
+  unnest(c(predx, pred)) |>
+  ungroup() |>
+  group_by(predx) |>
+  summarize(
+    pred_q5 = quantile(pred, 0.05),
+    pred_q95 = quantile(pred, 0.95),
+    pred_mean = mean(pred),
+  )
+
+
 ## Scatterplot
 
 soc_lm_plot <- ggplot(soc_compare) +
@@ -186,6 +269,35 @@ ggsave(
 )
 
 
+soc_timedelta_plot <- ggplot(soc_timedelta) +
+  aes(obs_SOC, TotSoilCarb) +
+  geom_point() +
+  geom_abline(lty = "dotted") +
+  geom_ribbon(
+    data = soc_timedelta_ci,
+    mapping = aes(
+      x = predx,
+      ymin = pred_q5,
+      ymax = pred_q95,
+      y = NULL
+    ),
+    alpha = 0.4
+  ) +
+  geom_line(
+    data = soc_timedelta_ci,
+    mapping = aes(predx, pred_mean),
+    col = "blue"
+  ) +
+  xlab("Change in measured 0-30 cm soil C stock (kg C / m2 / yr)") +
+  ylab("Change in simulated 0-30 cm soil C stock (kg C / m2 / yr)") +
+  theme_bw()
+ggsave(
+  file.path(args$output_dir, "SOC_yrly_scatter.png"),
+  plot = soc_timedelta_plot,
+  height = 8,
+  width = 8
+)
+
 soc_fits |>
   ungroup() |>
   summarize(across(r2:bias, c(mean = mean, sd = sd))) |>
diff --git a/tools/build_site_info.R b/tools/build_site_info.R
new file mode 100755
index 0000000..6a95c56
--- /dev/null
+++ b/tools/build_site_info.R
@@ -0,0 +1,155 @@
+#!/usr/bin/env Rscript
+
+# (re)building site info from an existing set of design points,
+# retaining location but updating to use harmonized parcel IDs
+# and PFT asignments for 2016 (previous version used 2018)
+
+# This may need further adjustment to account for PFT timeseries once restarts
+# are enabled.
+
+## ---------------------- parse command-line options --------------------------
+options <- list(
+  optparse::make_option("--location_file",
+    default = "../data/design_points.csv",
+    help = paste(
+      "CSV giving at least lat and lon for sites of interest.",
+      "Any other columns will be passed unchanged to the output."
+      )
+  ),
+  optparse::make_option("--out_file",
+    default = "site_info.csv",
+    help = "Path to write CSV with parcel ids and PFTs added"
+  ),
+  optparse::make_option("--parcel_file",
+    default = "data_raw/management/crops/v4.1/parcels-consolidated.gpkg",
+    help = "Geopackage to be used for spatial lookup of parcel IDs"
+  ),
+  optparse::make_option("--crop_file",
+    default = "data_raw/management/crops/v4.1/crops_all_years.parq",
+    help = "Parquet file containing harmonized DWR crop history"
+  )
+) |>
+  # Show default values in help message
+  purrr::modify(\(x) {
+    x@help <- paste(x@help, "[default: %default]")
+    x
+  })
+
+args <- optparse::OptionParser(option_list = options) |>
+  optparse::parse_args()
+
+## -------------------------- end option parsing ------------------------------
+
+
+library(tidyverse)
+
+
+
+#' Assign DWR/LandIQ California crop codes to Sipnet PFT names
+#'
+#' Built for the MAGiC project, may or may not be applicable elsewhere
+#'
+#' @param CLASS vector of crop class codes (1-2 capital letters each)
+#' @param SUBCLASS vector of crop identifiers (1-2 numeric digits each)
+#' @return PFT assignments as character, NA if unclassified
+dwr_crop_to_pft <- function(CLASS, SUBCLASS) {
+  dplyr::case_when(
+
+    ## N fixers, treated as generic annual until N fixer PFT is created
+    CLASS == "F" & SUBCLASS %in% c(10)     ~ "annual_crop", # dry beans
+    CLASS == "P" & SUBCLASS %in% c(1, 2)   ~ "annual_crop", # alfalfa, clover
+    CLASS == "T" & SUBCLASS %in% c(3, 11)  ~ "annual_crop", # Green beans, peas
+
+    ## subclasses with physiology differing from the rest of their class
+    # woody berries
+    CLASS == "T" & SUBCLASS %in% c(19, 28) ~ "temperate.deciduous",
+    # "Flowers, nursery & Christmas tree farms"
+    # (A weird grouping, but assuming these are most likely to be tree-like)
+    CLASS == "T" & SUBCLASS %in% c(16)     ~ "temperate.deciduous",
+    
+    ## Whole-class assignments
+    CLASS %in% c("F", "G", "T")       ~ "annual_crop", # field crops, grains/hay, truck crops
+    CLASS %in% c("P")                 ~ "grass", # perennial pasture grass; annual grasses in G
+    CLASS %in% c("D", "C", "V", "YP") ~ "temperate.deciduous", # deciduous, citrus, vineyard, young perennial
+    CLASS %in% c("R")                 ~ "grass", # Rice; TODO update when rice PFT is created
+    CLASS %in% c("X", "I")            ~ "annual_crop", # fallow, not cropped, or unclassified
+      # TODO maybe this should just get soil PFT or be skipped during site selection?
+      # Logic for defaulting to annual crop here:
+      # Temporarily idle/fallow likely to grow small amount annual weeds;
+      # If no plant/harv events, annual will grow very little. 
+
+    # Urban, industrial, native vegetation, semi-agricultural, vacant, etc
+    TRUE ~ NA_character_
+  )
+}
+
+#' Look up parcel IDs from harmonized DWR California crop map
+#'
+#' @param df dataframe with at least columns `lat` and `lon`
+#'  Any other columns will be passed through unchanged
+#'
+#' @return dataframe with `parcel_id` column added
+#'
+#' @examples
+#' point_to_dwr_parcelid(
+#'   c(32.18, 32.22),
+#'   c(-122.22, -123.18),
+#'   "data_raw/management/crops/v4.1/parcels-consolidated.gpkg"
+#' )
+#'
+point_to_dwr_parcelid <- function(df, geo_file = args$parcel_file) {
+  stopifnot(is.numeric(df$lat), is.numeric(df$lon))
+  parcel_geo <- terra::vect(geo_file)
+  nearest_parcels <- df |>
+  terra::vect(crs="epsg:4326") |>
+  terra::project(parcel_geo) |>
+  terra::nearest(parcel_geo)
+
+  df |>
+  dplyr::mutate(parcel_id = parcel_geo$parcel_id[nearest_parcels$to_id])
+}
+
+#' @param ids vector of parcel ids
+#' @param years,seasons numeric vectors to subset by.
+#'  If not specified, returns all years and seasons.
+#' @param crop_file path to a Parquet file containing harmonized DWR crop history
+#' @return dataframe of crop info
+dwr_parcelid_to_crop <- function(
+    ids, 
+    years = NULL,
+    seasons = NULL,
+    crop_file = args$crop_file) {
+  cropdat <- arrow::open_dataset(args$crop_file) |>
+    dplyr::filter(.data$parcel_id %in% ids) |>
+    select(parcel_id, year, season, CLASS, SUBCLASS)
+  if (!is.null(years)) {
+    cropdat <- cropdat |> dplyr::filter(.data$year %in% years)
+  }
+  if (!is.null(seasons)) {
+    cropdat <- cropdat |> dplyr::filter(.data$season %in% seasons)
+  }
+
+  dplyr::collect(cropdat)
+}
+
+
+design_pts <- read.csv(args$location_file)
+pts_matched <- point_to_dwr_parcelid(design_pts)
+crop_2016 <- dwr_parcelid_to_crop(pts_matched$parcel_id, years = 2016, seasons = 2) |>
+  mutate(site.pft = dwr_crop_to_pft(CLASS, SUBCLASS)) |>
+  dplyr::select("parcel_id", "site.pft")
+
+if (!is.null(design_pts$id) && anyDuplicated(design_pts$id)) {
+  PEcAn.logger::logger.severe("column `id` of design points is not unique")
+}
+
+site_info <- pts_matched |>
+  left_join(crop_2016, by = "parcel_id") |>
+  rename(field_id = parcel_id) # TODO propagate `parcel_id` convention further downstream?
+                               # OR rethink naming: id vs site_id vs something else?
+  
+if (is.null(site_info$id)) {
+  site_info$id <- site_info$field_id
+}
+
+write.csv(site_info, args$out_file, row.names = FALSE)
diff --git a/tools/build_validation_siteinfo.R b/tools/build_validation_siteinfo.R
index 53d95d5..e8872d8 100755
--- a/tools/build_validation_siteinfo.R
+++ b/tools/build_validation_siteinfo.R
@@ -14,13 +14,12 @@ data_dir <- "data_raw/private/HSP/"
 soc_file <- "Harmonized_Data_Croplands.csv"
 mgmt_file <- "Harmonized_SiteMngmt_Croplands.csv"
 
+loc_tmp_file <- "validation_site_locs.csv"
+output_file <- "validation_site_info.csv"
+on.exit(unlink(loc_tmp_file), add = TRUE)
 # can't use datapoints from before our simulations start
 min_yr <- 2016
 
-# parcel ID lookup
-field_map <- "data_raw/management/crops/v4.1/parcels-consolidated.gpkg"
-field_pft_info <- "data_raw/management/crops/v4.1/crops_all_years.parq"
-
 # For first pass, selecting only the control/no-treatment plots.
 # TODO: revisit this as we build more management into the workflow.
 site_ids <- read.csv(file.path(data_dir, mgmt_file)) |>
@@ -32,47 +31,28 @@ site_locs <- read.csv(file.path(data_dir, soc_file)) |>
   # some IDs in site_locs contain spaces stripped in site_id; let's match
   mutate(BaseID = gsub("\\s+", "", BaseID)) |>
   distinct(ProjectName, BaseID, Latitude, Longitude) |>
-  rename(lat = Latitude, lon = Longitude) |>
-  inner_join(site_ids)
-
-dwr_fields <- terra::vect(field_map)
-dwr_field_pfts <- arrow::read_parquet(field_pft_info) |>
-  dplyr::filter(year == min_yr, season == 2) |>
-  select(parcel_id, crop_class = CLASS)
-site_dwr_ids <- site_locs |>
-  terra::vect(crs = "epsg:4326") |>
-  terra::project(dwr_fields) |>
-  terra::nearest(dwr_fields) |>
-  as.data.frame() |>
-  mutate(
-    ProjectName = site_locs$ProjectName[from_id],
-    BaseID = site_locs$BaseID[from_id],
-    parcel_id = dwr_fields$parcel_id[to_id],
-  ) |>
-  left_join(dwr_field_pfts)
-
-stopifnot(nrow(site_dwr_ids) == nrow(site_locs))
-
-site_locs |>
-  left_join(site_dwr_ids, by = c("ProjectName", "BaseID")) |>
   mutate(
-    id = paste(ProjectName, BaseID, lat, lon) |>
-      purrr::map_chr(rlang::hash),
-    pft = dplyr::case_when(
-      crop_class %in% c("G", "F", "P", "T") ~ "grass",
-      crop_class %in% c("D", "C", "V", "YP") ~ "temperate.deciduous",
-      # TODO later: R = rice, T19 & T28 = woody berries
-      TRUE ~ NA_character_
-    )
+    val_id = paste(ProjectName, BaseID, Latitude, Longitude) |>
+      purrr::map_chr(rlang::hash)
   ) |>
-  # TODO is this desirable?
-  # In production, may be better to complain if no PFT match
-  drop_na(pft) |>
+  inner_join(site_ids) |>
+  distinct(id = val_id, lat = Latitude, lon = Longitude)
+
+# Match against field IDs and crop codes using same script used on non-val files
+# TODO avoid round trip to CSV here...
+# it's only here because I replaced a chunk of code that was redundant with
+# `build_site_info.R` and this was faster than thinking how to refactor further
+write.csv(site_locs, loc_tmp_file, row.names = FALSE)
+callr::rscript(
+  "../../tools/build_site_info.R",
+  cmdargs = c(paste0("--location_file=", loc_tmp_file),
+              paste0("--out_file=", output_file))
+)
+read.csv(output_file) |>
   # Temporary hack:
   # Where multiple treatments share a location,
   # use only one of them
-  group_by(lat, lon, pft) |>
+  group_by(lat, lon, site.pft) |>
   slice_sample(n = 1) |>
   ungroup() |>
-  select(id, field_id = parcel_id, lat, lon, site.pft = pft) |>
-  write.csv("validation_site_info.csv", row.names = FALSE)
+  write.csv(output_file, row.names = FALSE)
diff --git a/tools/run_CA_grid_ERA5_nc_extraction.R b/tools/run_CA_grid_ERA5_nc_extraction.R
index c0bf935..68d6fdb 100644
--- a/tools/run_CA_grid_ERA5_nc_extraction.R
+++ b/tools/run_CA_grid_ERA5_nc_extraction.R
@@ -17,7 +17,7 @@ ca_bboxgrid <- expand.grid(
   lon = seq(from = -124.5, to = -114, by = 0.5),
   lat = seq(from = 32.5, to = 42, by = 0.5)
 ) |>
-  mutate(id = paste0(lat, "N_", abs(lon), "W"))
+  dplyr::mutate(id = paste0(lat, "N_", abs(lon), "W"))
 ca_gridcell_ids <- ca_bboxgrid |>
   vect(crs = "epsg:4326") |>
   project(ca_shp) |>
@@ -25,7 +25,7 @@ ca_gridcell_ids <- ca_bboxgrid |>
   intersect(ca_shp) |>
   _$id
 ca_grid <- ca_bboxgrid |>
-  filter(id %in% ca_gridcell_ids)
+  dplyr::filter(id %in% ca_gridcell_ids)
 
 PEcAn.data.atmosphere::extract.nc.ERA5(
   slat = ca_grid$lat,