USCCANA · aoliveram · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: netdiffuseR
 Title: Analysis of Diffusion and Contagion Processes on Networks
-Version: 1.24.0
+Version: 1.25.0
 Authors@R: c(
   person("George", "Vega Yon", email="g.vegayon@gmail.com", role=c("aut", "cre"),
     comment=c(ORCID = "0000-0002-3171-0844", what="Rewrite functions with Rcpp, plus new features")
     ),
   person("Thomas", "Valente", email="tvalente@usc.edu", role=c("aut", "cph"),
     comment=c(ORCID="0000-0002-8824-5816", what="R original code")),
   person("Anibal", "Olivera Morales", role = c("aut", "ctb"), 
-    comment=c(ORCID="0009-0000-3736-7939", what="Multi-diffusion version")),
+    comment=c(ORCID="0009-0000-3736-7939", what="Developer from V1.23.0")),
   person("Stephanie", "Dyal", email="stepharp@usc.edu", role=c("ctb"), comment="Package's first version"),
   person("Timothy", "Hayes", email="timothybhayes@gmail.com", role=c("ctb"), comment="Package's first version")
   )
@@ -21,7 +21,7 @@ Description: Empirical statistical analysis, visualization and simulation of
     9781881303213>, Myers (2000) <DOI:10.1086/303110>, Iyengar and others (2011)
     <DOI:10.1287/mksc.1100.0566>, Burt (1987) <DOI:10.1086/228667>; among others.
 Depends:
-    R (>= 3.1.1)
+    R (>= 3.5)
 License: MIT + file LICENSE
 LazyData: true
 Imports:
@@ -65,6 +65,7 @@ Collate:
     'bass.r'
     'bootnet.r'
     'citer_environment.R'
+    'collapse_timeframes.R'
     'data.r'
     'degree_adoption_diagnostic.R'
     'diffnet-c.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -98,6 +98,7 @@ export(bootnet)
 export(classify)
 export(classify_adopters)
 export(classify_graph)
+export(collapse_timeframes)
 export(compare_matrix)
 export(cumulative_adopt_count)
 export(degree_adoption_diagnostic)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,18 @@
+# Changes in netdiffuseR version 1.25.0 (2026-03-14)
+
+* New function `collapse_timeframes()`: aggregates high-resolution or
+  continuous-time longitudinal edgelists into discrete time windows, ready
+  for use with `edgelist_to_adjmat()` or `as_diffnet()`. The function contains 
+  parameters such as `binarize`, `cumulative`, and `symmetric` for better control
+  over the aggregation process.
+
+* New dataset `epigames` and `epigamesDiffNet`: a simulated epidemic game
+  network with 594 nodes and 15 time periods from the WKU Epi Games study.
+
+* Fixed CRAN example error in `round_to_seq()`: `plot(w, x)` replaced with
+  `plot(w)` to avoid `%||%` operator issue in R 4.4.0+'s `formula.default`
+  when called via `plot.data.frame()`.
+
 # Changes in netdiffuseR version 1.24.0 (2025-12-09)
 
 * New function `degree_adoption_diagnostic()` analyzes the correlation between network 

diff --git a/R/collapse_timeframes.R b/R/collapse_timeframes.R
@@ -0,0 +1,165 @@
+#' Collapse Timeframes in a Longitudinal Edgelist
+#'
+#' @description
+#' Allows users to take a high-resolution or continuous-time longitudinal
+#' edgelist and dynamically collapse or discretize it into larger time windows.
+#' The output is a shorter, aggregated edgelist ready to be passed into
+#' \code{[edgelist_to_adjmat]} or \code{[as_diffnet]}.
+#'
+#' @param edgelist A \code{data.frame} representing the longitudinal edgelist.
+#' @param ego Character scalar. Name of the column representing the ego (sender).
+#' @param alter Character scalar. Name of the column representing the alter (receiver).
+#' @param timevar Character scalar. Name of the column representing the time variable.
+#' @param weightvar Character scalar or \code{NULL}. Name of the column representing
+#'   the edge weight. If \code{NULL}, the function tallies the number of interactions
+#'   within the time window as the weight.
+#' @param window_size Numeric scalar. The size of the time window to collapse into.
+#' @param time_format Character scalar or \code{NULL}. If the time variable is a
+#'   character or factor, the format passed to \code{as.POSIXct}.
+#'   For example, \code{"\%d-\%m-\%Y \%H:\%M"}.
+#' @param relative_time Logical scalar. If \code{TRUE}, normalizes the binned
+#'   times into a strict integer sequence starting at 1 (1, 2, 3...).
+#' @param binarize Logical scalar. If \code{TRUE}, sets all resulting edge weights to 1.
+#' @param cumulative Logical scalar. If \code{TRUE}, edges from previous time windows
+#'   are carried over to subsequent windows.
+#' @param symmetric Logical scalar. If \code{TRUE}, the resulting graph will be
+#'   symmetrized (i.e., if an edge A->B exists, an edge B->A is added).
+#'
+#' @return A \code{data.frame} with 4 columns: the ego, the alter, the new collapsed
+#'   discrete time, and the aggregated weight.
+#'
+#' @export
+#' @examples
+#' \dontrun{
+#' # Load the package's hourly dataset
+#' load(system.file("data/epigames_raw.rda", package = "netdiffuseR"))
+#'
+#' # Collapse the hourly edgelist into a daily edgelist (window_size = 24)
+#' daily_edgelist <- collapse_timeframes(
+#'   edgelist = epigames_raw$edgelist,
+#'   timevar = "time",
+#'   weightvar = "weight",
+#'   window_size = 24
+#' )
+#' head(daily_edgelist)
+#' }
+collapse_timeframes <- function(
+    edgelist,
+    ego = "sender",
+    alter = "receiver",
+    timevar = "time",
+    weightvar = NULL,
+    window_size = 1,
+    time_format = NULL,
+    relative_time = TRUE,
+    binarize = FALSE,
+    cumulative = FALSE,
+    symmetric = FALSE) {
+  # Step 1: Time Column Parsing
+  time_raw <- edgelist[[timevar]]
+
+  if (is.character(time_raw) || is.factor(time_raw)) {
+    if (!is.null(time_format)) {
+      time_raw <- as.numeric(as.POSIXct(as.character(time_raw), format = time_format))
+    } else {
+      time_raw <- as.numeric(as.POSIXct(as.character(time_raw)))
+    }
+  } else if (!is.numeric(time_raw) && !is.integer(time_raw)) {
+    # e.g., Date or POSIXct already
+    time_raw <- as.numeric(time_raw)
+  }
+
+  # Check for NAs after conversion
+  if (any(is.na(time_raw))) {
+    warning("There are NA values in the parsed time variable.")
+  }
+
+  # Step 2: Binning / Window Creation
+  t_min <- min(time_raw, na.rm = TRUE)
+  # Adding a tiny offset so min time doesn't fall out of bounds or shift unnecessarily
+  discrete_time <- ceiling((time_raw - t_min + 1e-9) / window_size)
+  # Ensure the minimum index is 1 at this stage
+  min_dt <- min(discrete_time, na.rm = TRUE)
+  if (min_dt < 1) {
+    discrete_time <- discrete_time - min_dt + 1
+  }
+
+  # Step 3: Handling relative_time
+  if (relative_time) { # e.g. strict sequence 1, 2, 3
+    sorted_unique_times <- sort(unique(discrete_time[!is.na(discrete_time)]))
+    time_map <- stats::setNames(seq_along(sorted_unique_times), sorted_unique_times)
+    discrete_time <- unname(time_map[as.character(discrete_time)])
+  }
+
+  # Create a working data frame to hold things
+  work_df <- data.frame(
+    ego_col = edgelist[[ego]],
+    alter_col = edgelist[[alter]],
+    time_col = discrete_time
+  )
+
+  # Step 4: Aggregation
+  if (is.null(weightvar)) {
+    work_df$weight_col <- 1
+  } else {
+    work_df$weight_col <- edgelist[[weightvar]]
+  }
+
+  # Remove rows with NAs in essential grouping variables
+  work_df <- work_df[!is.na(work_df$ego_col) & !is.na(work_df$alter_col) & !is.na(work_df$time_col), ]
+
+  agg_df <- stats::aggregate(
+    weight_col ~ ego_col + alter_col + time_col,
+    data = work_df,
+    FUN = sum,
+    na.rm = TRUE
+  )
+
+  # Step 5: Output with 4 clean columns
+  weight_col_name <- if (is.null(weightvar)) "weight" else weightvar
+  colnames(agg_df) <- c(ego, alter, timevar, weight_col_name)
+
+  # Step 6: Post-aggregation processing
+
+  # 6.1 Binarize
+  if (binarize) {
+    agg_df[[weight_col_name]] <- 1
+  }
+
+  # 6.2 Symmetrize
+  if (symmetric) {
+    rev_df <- agg_df
+    rev_df[[ego]] <- agg_df[[alter]]
+    rev_df[[alter]] <- agg_df[[ego]]
+
+    # Combine and de-duplicate (in case they already existed symmetrically)
+    agg_df <- unique(rbind(agg_df, rev_df))
+  }
+
+  # 6.3 Cumulative
+  if (cumulative) {
+    all_periods <- sort(unique(agg_df[[timevar]]))
+    if (length(all_periods) > 1) {
+      cumulative_el <- agg_df[agg_df[[timevar]] == all_periods[1], ]
+      for (t_idx in 2:length(all_periods)) {
+        t <- all_periods[t_idx]
+        current <- agg_df[agg_df[[timevar]] == t, ]
+        prev <- cumulative_el[cumulative_el[[timevar]] == all_periods[t_idx - 1], ]
+        if (nrow(prev) > 0) {
+          prev[[timevar]] <- t
+        }
+        # Combine current window with previous accumulated edges and de-duplicate
+        combined <- unique(rbind(current, prev))
+        cumulative_el <- rbind(cumulative_el, combined)
+      }
+      agg_df <- cumulative_el
+    }
+  }
+
+  # Apply standard sort for consistent outputs: time, ego, alter
+  order_idx <- order(agg_df[[timevar]], agg_df[[ego]], agg_df[[alter]])
+  agg_df <- agg_df[order_idx, ]
+  rownames(agg_df) <- NULL
+
+  return(agg_df)
+}
diff --git a/R/data.r b/R/data.r
@@ -778,23 +778,23 @@ NULL # "medInnovationsDiffNet"
 #' the Brazilian Farmers collected as part of the three country study implemented
 #' by Everett Rogers (Rogers, Ascroft, & Röling, 1970), and Korean Family Planning
 #' data collected by researchers at the Seoul National University's School of
-#' Public (Rogers & Kincaid, 1981). The table below summarizes the three datasets:
-#'
-#' \tabular{lccc}{
-#'		\tab	\bold{Medical Innovation}	\tab	\bold{Brazilian Farmers}	\tab	\bold{Korean Family Planning}	\cr
-#'	\emph{Country}	\tab	USA	\tab	Brazil	\tab	Korean	\cr
-#'	\emph{# Respondents}	\tab	125 Doctors	\tab	692 Farmers	\tab	1,047 Women	\cr
-#'	\emph{# Communities}	\tab	4	\tab	11	\tab	25	\cr
-#'	\emph{Innovation}	\tab	Tetracycline	\tab	Hybrid Corn Seed	\tab	Family Planning	\cr
-#'	\emph{Time for Diffusion}	\tab	18 Months	\tab	20 Years	\tab	11 Years	\cr
-#'	\emph{Year Data Collected}	\tab	1955-1956	\tab	1966	\tab	1973	\cr
-#'	\emph{Ave. Time to 50\%}	\tab	6	\tab	16	\tab	7	\cr
-#'	\emph{Highest Saturation}	\tab	0.89	\tab	0.98	\tab	0.83	\cr
-#'	\emph{Lowest Saturation}	\tab	0.81	\tab	0.29	\tab	0.44	\cr
-#'	\emph{Citation}	\tab	Coleman et al (1966)	\tab	Rogers et al (1970)	\tab	Rogers & Kincaid (1981)	\cr
-#'	}
-#'
-#' All datasets include a column called \emph{study} which is coded as
+#' Public (Rogers & Kincaid, 1981). The table below summarizes the datasets:
+#'
+#' \tabular{lcccc}{
+#' 		\tab	\bold{Medical Innovation}	\tab	\bold{Brazilian Farmers}	\tab	\bold{Korean Family Planning}	\tab	\bold{WKU Epi Games}	\cr
+#' 	\emph{Country}	\tab	USA	\tab	Brazil	\tab	Korean	\tab	USA	\cr
+#' 	\emph{# Respondents}	\tab	125 Doctors	\tab	692 Farmers	\tab	1,047 Women	\tab	594 Students	\cr
+#' 	\emph{# Communities}	\tab	4	\tab	11	\tab	25	\tab	Multiple groups	\cr
+#' 	\emph{Innovation}	\tab	Tetracycline	\tab	Hybrid Corn Seed	\tab	Family Planning	\tab	Masks/Medicine	\cr
+#' 	\emph{Time for Diffusion}	\tab	18 Months	\tab	20 Years	\tab	11 Years	\tab	15 Periods	\cr
+#' 	\emph{Year Data Collected}	\tab	1955-1956	\tab	1966	\tab	1973	\tab	Recent	\cr
+#' 	\emph{Ave. Time to 50\%}	\tab	6	\tab	16	\tab	7	\tab	N/A	\cr
+#' 	\emph{Highest Saturation}	\tab	0.89	\tab	0.98	\tab	0.83	\tab	N/A	\cr
+#' 	\emph{Lowest Saturation}	\tab	0.81	\tab	0.29	\tab	0.44	\tab	N/A	\cr
+#' 	\emph{Citation}	\tab	Coleman et al (1966)	\tab	Rogers et al (1970)	\tab	Rogers & Kincaid (1981)	\tab	WKU	\cr
+#' 	}
+#'
+#' All core datasets include a column called \emph{study} which is coded as
 #' (1) Medical Innovation (2) Brazilian Farmers, (3) Korean Family Planning.
 #'
 #' @section Right censored data:
@@ -938,3 +938,52 @@ NULL
 #' @author George G. Vega Yon
 #' @name fakeEdgelist
 NULL # "fakeEdgelist"
+
+
+#' Epi Games Dataset
+#'
+#' @description
+#' The WKU Epi Games dataset represents a simulated epidemic or game environment with
+#' dynamic encounters over 15 time periods. It provides both node-level
+#' attributes and a longitudinal edgelist.
+#'
+#' @format A list with two data frames:
+#'
+#' **attributes**: A data frame with 594 rows and 9 variables representing nodes:
+#' \describe{
+#'  \item{id}{Unique identifier for the participant.}
+#'  \item{toa}{Time of Adoption (1 to 15), representing when the individual was first infected. Non-infected individuals have `NA`.}
+#'  \item{qyes_total}{Cumulative count of times the player participated or scored positively in informative/educational "quarantine" questionnaires.}
+#'  \item{qno_total}{Cumulative count of times the non-quarantine questionnaire factor was registered.}
+#'  \item{mask_prop}{Proportion of time (across 15 steps) the participant used the mask intervention (0.0 to 1.0).}
+#'  \item{med_prop}{Proportion of time the individual used pharmacological interventions or treatments.}
+#'  \item{group}{Experimental group or node cohort.}
+#'  \item{final_score}{Final score obtained in the game.}
+#'  \item{status}{Final state label ("infected" or "not_infected").}
+#' }
+#'
+#' **edgelist**: A longitudinal data frame with 23,684 rows and 4 variables representing edges/contacts:
+#' \describe{
+#'  \item{sender}{Origin node ID of the contact.}
+#'  \item{receiver}{Destination node ID of the contact.}
+#'  \item{time}{Time period of the contact (1 to 15).}
+#'  \item{weight}{Strength, duration, or density of the exposure.}
+#' }
+#'
+#' @source WKU Epi Game simulation
+#' @family diffusion datasets
+#' @name epigames
+NULL # "epigames"
+
+#' \code{diffnet} version of the Epi Games data
+#'
+#' A directed dynamic graph with 594 vertices and 15 time periods. The attributes
+#' in the graph are described in \code{\link{epigames}}.
+#'
+#' Non-adopters have \code{toa = NA}.
+#'
+#' @format A \code{\link{diffnet}} class object.
+#' @source WKU Epi Game simulation
+#' @family diffusion datasets
+#' @name epigamesDiffNet
+NULL
diff --git a/R/plot_diffnet2.r b/R/plot_diffnet2.r
@@ -9,7 +9,7 @@
 #'
 #' x <- rnorm(100)
 #' w <- data.frame(as.integer(round_to_seq(x, as_factor = TRUE)),x)
-#' plot(w,x)
+#' plot(w)
 #'
 #' @seealso Used in \code{\link{diffmap}} and \code{\link{plot_diffnet2}}
 round_to_seq <- function(x, nlevels=20, as_factor=FALSE) {

diff --git a/README.md b/README.md
@@ -52,7 +52,7 @@ And the actual R package:
   Vega Yon G, Olivera Morales A, Valente T (2025). _netdiffuseR:
   Analysis of Diffusion and Contagion Processes on Networks_.
   doi:10.5281/zenodo.1039317 <https://doi.org/10.5281/zenodo.1039317>,
-  R package version 1.24.0, <https://github.com/USCCANA/netdiffuseR>.
+  R package version 1.24.1, <https://github.com/USCCANA/netdiffuseR>.
 
 To see these entries in BibTeX format, use 'print(<citation>,
 bibtex=TRUE)', 'toBibtex(.)', or set
@@ -374,7 +374,7 @@ sessionInfo()
 #> [1] stats     graphics  grDevices utils     datasets  methods   base     
 #> 
 #> other attached packages:
-#> [1] netdiffuseR_1.24.0
+#> [1] netdiffuseR_1.24.1
 #> 
 #> loaded via a namespace (and not attached):
 #>  [1] Matrix_1.7-4          jsonlite_2.0.0        dplyr_1.1.4          

diff --git a/data-raw/epigames.R b/data-raw/epigames.R
@@ -0,0 +1,13 @@
+# data-raw/epigames.R
+# Pre-processing script for the EpiGames Raw Dataset
+
+rm(list = ls())
+
+# The raw data consists of an attributes data frame and an hourly edgelist,
+# both using consistent node IDs (1-594).
+load("data-raw/epigames_hourly.rda")
+
+epigames <- epigames_hourly
+
+# Save compressed raw data
+usethis::use_data(epigames, overwrite = TRUE, compress = "xz")