bedapub · julianesiebourg · Jul 16, 2025 · Oct 20, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -23,3 +23,4 @@
 ^vignettes/cached/.*\.Rmd$
 # large vignette excluded on CRAN
 ^vignettes/plate_scoring_examples.Rmd$
+^AGENTS\.md$
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,86 @@
+# AGENTS.md
+
+## Code Style
+
+### General R Style
+
+- Use `snake_case` for function and variable names
+- Use roxygen2 for documentation with markdown enabled (`Roxygen: list(markdown = TRUE)`)
+- Prefer tidyverse-style code (pipes, dplyr verbs)
+- Use `assertthat::assert_that()` for input validation with informative error messages
+- Use `stringr::str_c()` and `stringr::str_glue()` for string operations
+
+### Package Dependencies
+
+- Core dependencies: `dplyr`, `purrr`, `ggplot2`, `tibble`, `tidyr`, `R6`, `data.table`
+- Use `rlang` for non-standard evaluation and function programming utilities
+- Use `data.table` for performance-critical operations
+
+### R6 Classes
+
+The main class is `BatchContainer` (R6). When working with R6:
+- Document methods using roxygen2 `@description` tags
+- Use `private` for internal state
+- Use `active` bindings for computed properties
+- Validate inputs in setters
+
+### Function Documentation
+
+```r
+#' Brief description of the function.
+#'
+#' Longer description if needed.
+#'
+#' @param param_name Description of parameter.
+#' @return Description of return value.
+#' @export
+#'
+#' @examples
+#' # Example code here
+```
+
+### Testing
+
+- Tests are in `tests/testthat/`
+- Use `testthat` for unit tests
+- Test file naming: `test-<feature>.R`
+- Use `expect_true()`, `expect_equal()`, `expect_error()`, etc.
+
+### Vignettes
+
+- Vignettes are in `vignettes/` as `.Rmd` files
+- Use knitr for rendering
+- Some vignettes have cached versions in `vignettes/cached/`
+
+## Key Concepts
+
+- **BatchContainer**: Holds dimensions for sample allocation and assignment
+- **Scoring functions**: Evaluate sample assignments (lower is better)
+- **Shuffle functions**: Propose sample permutations during optimization
+- **OSAT score**: Chi-square-based score for even sample distribution
+
+## Common Patterns
+
+### Creating a scoring function generator
+
+```r
+my_score_generator <- function(param1, param2) {
+  force(param1)
+  force(param2)
+
+  cached_value <- NULL
+
+  function(bc) {
+    # Compute and return score
+  }
+}
+```
+
+### Input validation
+
+```r
+assertthat::assert_that(
+  is.data.frame(df),
+  msg = "df should be a data.frame"
+)
+```
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: designit
 Title: Blocking and Randomization for Experimental Design
-Version: 0.5.0.9000
+Version: 0.5.1
 Authors@R: c(
     person(given = c(first = "Iakov", middle = "I."),
            family = "Davydov",
@@ -81,6 +81,6 @@ Suggests:
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 VignetteBuilder: knitr
 biocViews:
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,6 +17,7 @@ export(form_homogeneous_subgroups)
 export(generate_terms)
 export(get_order)
 export(mk_exponentially_weighted_acceptance_func)
+export(mk_min_distance_score)
 export(mk_plate_scoring_functions)
 export(mk_simanneal_acceptance_func)
 export(mk_simanneal_temp_func)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,6 @@
-# designit (development version)
+# designit 0.5.1
+
+* add `mk_min_distance_score()` and an order randomization example vignette
 
 # designit 0.5.0
 

diff --git a/R/score_min_distance.R b/R/score_min_distance.R
@@ -0,0 +1,128 @@
+#' Compute distance to next occurrence of the same value in a vector
+#'
+#' For each element in the input vector, computes the distance (in positions)
+#' to the next occurrence of the same value. If there is no next occurrence,
+#' returns `Inf`.
+#'
+#' @param x A vector.
+#'
+#' @return A numeric vector of the same length as `x`, where each element
+#'   is the distance to the next occurrence of the same value, or `Inf` if
+#'   there is no next occurrence.
+#'
+#' @keywords internal
+distance_to_next <- function(x) {
+  out <- rep(Inf, length(x))
+  pos_list <- split(seq_along(x), x)
+
+  for (p in pos_list) {
+    if (length(p) > 1) {
+      out[p[-length(p)]] <- diff(p)
+    }
+  }
+
+  out
+}
+
+
+#' Create a scoring function that penalizes small distances between
+#' samples of the same category
+#'
+#' This scoring function is useful for ensuring even spacing of sample
+#' categories along a one-dimensional layout, such as sample processing order.
+#' It penalizes cases where samples of the same category are closer together
+#' than the penalty threshold.
+#'
+#' The penalty for each pair of adjacent same-category samples is
+#' `(penalty_threshold - actual_distance)^2` when
+#' `actual_distance < penalty_threshold`, and 0 otherwise. The total score is
+#' the sum of all penalties.
+#'
+#' @param feature_var Name of the column in the batch container samples
+#'   that contains the feature/category to space out.
+#' @param batch_var Name of the column in the batch container that defines
+#'   the position/order of samples (e.g., processing order, time point).
+#' @param penalty_threshold Distances smaller than this value will be
+#'   penalized. If `NULL` (default), it is set to
+#'   `floor(n_locations / n_levels)`, which is the ideal spacing for even
+#'   distribution.
+#'
+#' @return A scoring function that takes a [BatchContainer] and returns
+#'   a numeric score (lower is better).
+#'
+#' @export
+#'
+#' @examples
+#' set.seed(42)
+#' samples <- data.frame(
+#'   sample_id = 1:40,
+#'   treatment = rep(paste0("Trt", 1:4), each = 10),
+#'   sex = rep(c("F", "M"), 20)
+#' )
+#'
+#' bc <- BatchContainer$new(
+#'   dimensions = list(position = 40)
+#' )
+#' bc <- assign_random(bc, samples)
+#'
+#' # Create scoring functions
+#' scoring <- list(
+#'   treatment = mk_min_distance_score("treatment", "position"),
+#'   sex = mk_min_distance_score("sex", "position")
+#' )
+#'
+#' # Optimize
+#' bc <- optimize_design(
+#'   bc,
+#'   scoring = scoring,
+#'   aggregate_scores_func = sum_scores,
+#'   max_iter = 500,
+#'   quiet = TRUE
+#' )
+mk_min_distance_score <- function(
+  feature_var,
+  batch_var,
+  penalty_threshold = NULL
+) {
+  force(feature_var)
+  force(batch_var)
+  force(penalty_threshold)
+
+  function(bc) {
+    samples <- bc$get_samples()
+
+    assertthat::assert_that(
+      feature_var %in% colnames(samples),
+      msg = stringr::str_glue(
+        "Column '{feature_var}' not found in batch container samples"
+      )
+    )
+    assertthat::assert_that(
+      batch_var %in% colnames(samples),
+      msg = stringr::str_glue(
+        "Column '{batch_var}' not found in batch container samples"
+      )
+    )
+
+    # Order samples by batch_var
+    samples <- samples[order(samples[[batch_var]]), ]
+    feature_values <- samples[[feature_var]]
+
+    # Compute penalty threshold if not provided
+    thresh <- penalty_threshold
+    if (is.null(thresh)) {
+      n_locations <- length(feature_values)
+      n_levels <- length(unique(feature_values))
+      thresh <- floor(n_locations / n_levels)
+    }
+
+    # Compute distances to next occurrence
+    d2n <- distance_to_next(feature_values)
+
+    # Only penalize distances below threshold
+    d2n <- d2n[d2n < thresh]
+
+    # Penalty is defined as (threshold - distance)^2
+    sum((thresh - d2n)^2)
+  }
+}
diff --git a/man/distance_to_next.Rd b/man/distance_to_next.Rd
diff --git a/man/mk_min_distance_score.Rd b/man/mk_min_distance_score.Rd