ropensci · ThierryO · Apr 9, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -5,9 +5,11 @@
 ^LICENSE.md$
 ^Meta$
 ^README\.Rmd$
+^[.]?air[.]toml$
 ^\.Rproj\.user$
 ^\.github$
 ^\.httr-oauth$
+^\.vscode$
 ^\.zenodo\.json$
 ^_pkgdown.yml$
 ^checklist.yml$

diff --git a/.github/workflows/pr_title.yml b/.github/workflows/pr_title.yml
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -0,0 +1,5 @@
+{
+    "recommendations": [
+        "Posit.air-vscode"
+    ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,10 @@
+{
+    "[r]": {
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "Posit.air-vscode"
+    },
+    "[quarto]": {
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "quarto.quarto"
+    }
+}
diff --git a/.zenodo.json b/.zenodo.json
@@ -1,6 +1,6 @@
 {
   "title": "git2rdata: Store and Retrieve Data.frames in a Git Repository",
-  "version": "0.5.1",
+  "version": "0.5.2",
   "license": "GPL-3.0",
   "upload_type": "software",
   "description": "<p>The git2rdata package is an R package for writing and reading dataframes as plain text files. A metadata file stores important information. 1) Storing metadata allows to maintain the classes of variables. By default, git2rdata optimizes the data for file storage. The optimization is most effective on data containing factors. The optimization makes the data less human readable. The user can turn this off when they prefer a human readable format over smaller files. Details on the implementation are available in vignette(“plain_text”, package = “git2rdata”). 2) Storing metadata also allows smaller row based diffs between two consecutive commits. This is a useful feature when storing data as plain text files under version control. Details on this part of the implementation are available in vignette(“version_control”, package = “git2rdata”). Although we envisioned git2rdata with a git workflow in mind, you can use it in combination with other version control systems like subversion or mercurial. 3) git2rdata is a useful tool in a reproducible and traceable workflow. vignette(“workflow”, package = “git2rdata”) gives a toy example. 4) vignette(“efficiency”, package = “git2rdata”) provides some insight into the efficiency of file storage, git repository size and speed for writing and reading.<\/p>",

diff --git a/CITATION.cff b/CITATION.cff
@@ -40,4 +40,4 @@ identifiers:
   value: 10.5281/zenodo.1485309
 - type: url
   value: https://ropensci.github.io/git2rdata/
-version: 0.5.1
+version: 0.5.2
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: git2rdata
 Title: Store and Retrieve Data.frames in a Git Repository
-Version: 0.5.1
+Version: 0.5.2
 Authors@R: c(
     person("Thierry", "Onkelinx", , "thierry.onkelinx@inbo.be", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0001-8804-4216", affiliation = "Research Institute for Nature and Forest (INBO)")),
@@ -11,7 +11,7 @@ Authors@R: c(
     person("Els", "Lommelen", , "els.lommelen@inbo.be", role = "ctb",
            comment = c(ORCID = "0000-0002-3481-5684", affiliation = "Research Institute for Nature and Forest (INBO)")),
     person("Research Institute for Nature and Forest (INBO)", , , "info@inbo.be", role = c("cph", "fnd"),
-           comment = c(ROR = "https://ror.org/00j54wy13"))
+           comment = c(ROR = "00j54wy13"))
   )
 Description: The git2rdata package is an R package for writing and reading
     dataframes as plain text files.  A metadata file stores important
@@ -66,9 +66,9 @@ Collate:
     'datahash.R'
     'display_metadata.R'
     'git2rdata_package.R'
+    'is_git2rmeta.R'
     'write_vc.R'
     'is_git2rdata.R'
-    'is_git2rmeta.R'
     'list_data.R'
     'meta.R'
     'print.R'

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,14 @@
+# git2rdata 0.5.2
+
+* `write_vc()` gains an optional `convert` argument for specifying column 
+  conversions. Conversions are applied before storing and reversed when 
+  reading data back. The convert information is stored in the metadata 
+  and added to the data frame attributes.
+* `read_vc()` now applies conversions specified in the metadata and adds 
+  the convert information to the data frame attributes.
+* Bugfix in `rename_variable()` thanks to @florisvdh for finding and fixing the
+  bug.
+
 # git2rdata 0.5.1
 
 * `write_vc()` stores metadata stored in the data frame.

diff --git a/R/read_vc.R b/R/read_vc.R
@@ -176,6 +176,12 @@ read_vc.character <- function(file, root = ".") {
     optimize = optimize
   )
 
+  # Apply read conversions if present
+  if (has_name(meta_data[["..generic"]], "convert")) {
+    convert <- meta_data[["..generic"]][["convert"]]
+    raw_data <- apply_convert(raw_data, convert, direction = "read")
+  }
+
   names(file) <- c(
     meta_data[["..generic"]][["data_hash"]],
     meta_data[["..generic"]][["hash"]]
@@ -209,6 +215,11 @@ read_vc.character <- function(file, root = ".") {
   attr(raw_data, "optimize") <- meta_data[["..generic"]][["optimize"]]
   attr(raw_data, "sorting") <- meta_data[["..generic"]][["sorting"]]
 
+  # Add convert to attributes if present
+  if (has_name(meta_data[["..generic"]], "convert")) {
+    attr(raw_data, "convert") <- meta_data[["..generic"]][["convert"]]
+  }
+
   class(raw_data) <- c("git2rdata", class(raw_data))
 
   return(raw_data)

diff --git a/R/rename_variable.R b/R/rename_variable.R
@@ -13,7 +13,8 @@
 #' @inheritParams write_vc
 #' @param change A named vector with the old names as values and the new names
 #' as names.
-#' @return invisible `NULL`.
+#' @return a named vector with the file paths relative to `root`. The names
+#' contain the hashes of the files.
 #' @export
 #' @examples
 #'
@@ -106,6 +107,8 @@ rename_variable.character <- function(file, change, root = ".", ...) {
   yaml[["..generic"]][["data_hash"]] <- datahash(file["raw_file"])
   write_yaml(yaml, file["meta_file"], fileEncoding = "UTF-8")
 
+  root <- normalizePath(root, winslash = "/", mustWork = TRUE)
+  file <- normalizePath(file, winslash = "/", mustWork = TRUE)
   hashes <- remove_root(file = file, root = root)
   names(hashes) <-
     c(

diff --git a/R/utils.R b/R/utils.R
@@ -11,3 +11,190 @@ display <- function(verbose, message, linefeed = TRUE) {
   }
   return(invisible(NULL))
 }
+
+#' Validate the convert argument
+#' @noRd
+#' @importFrom assertthat assert_that
+validate_convert <- function(convert, colnames_x) {
+  if (is.null(convert) || length(convert) == 0) {
+    return(list())
+  }
+
+  validate_convert_structure(convert, colnames_x)
+
+  for (col_name in names(convert)) {
+    convert[[col_name]] <- validate_convert_element(
+      convert[[col_name]],
+      col_name
+    )
+  }
+
+  return(convert)
+}
+
+#' Validate convert structure
+#' @noRd
+#' @importFrom assertthat assert_that
+validate_convert_structure <- function(convert, colnames_x) {
+  assert_that(
+    is.list(convert),
+    msg = "convert must be a list"
+  )
+
+  assert_that(
+    !is.null(names(convert)),
+    msg = "convert must be a named list"
+  )
+
+  assert_that(
+    all(names(convert) != ""),
+    msg = "all elements of convert must be named"
+  )
+
+  assert_that(
+    all(names(convert) %in% colnames_x),
+    msg = paste(
+      "all names in convert must be present in colnames of x.",
+      "Missing:",
+      paste(names(convert)[!names(convert) %in% colnames_x], collapse = ", ")
+    )
+  )
+}
+
+#' Validate a single convert element
+#' @noRd
+#' @importFrom assertthat assert_that
+validate_convert_element <- function(conv, col_name) {
+  assert_that(
+    is.character(conv),
+    msg = sprintf(
+      "convert[['%s']] must be a character vector",
+      col_name
+    )
+  )
+  assert_that(
+    length(conv) == 2,
+    msg = sprintf(
+      "convert[['%s']] must have length 2",
+      col_name
+    )
+  )
+  assert_that(
+    !is.null(names(conv)),
+    msg = sprintf(
+      "convert[['%s']] must be a named vector",
+      col_name
+    )
+  )
+  assert_that(
+    all(names(conv) %in% c("write", "read")),
+    msg = sprintf(
+      "convert[['%s']] must have names 'write' and 'read'",
+      col_name
+    )
+  )
+  assert_that(
+    "write" %in% names(conv) && "read" %in% names(conv),
+    msg = sprintf(
+      "convert[['%s']] must have both 'write' and 'read' elements",
+      col_name
+    )
+  )
+
+  validate_convert_function(conv[["write"]], col_name, "write")
+  validate_convert_function(conv[["read"]], col_name, "read")
+  conv[c("write", "read")]
+}
+
+#' Validate a convert function specification
+#' @noRd
+#' @importFrom assertthat assert_that
+validate_convert_function <- function(func_spec, col_name, direction) {
+  assert_that(
+    grepl("::", func_spec, fixed = TRUE),
+    msg = sprintf(
+      "convert[['%s']][['%s']] must be in 'package::function' format",
+      col_name,
+      direction
+    )
+  )
+
+  parts <- strsplit(func_spec, "::", fixed = TRUE)[[1]]
+  assert_that(
+    length(parts) == 2,
+    msg = sprintf(
+      "convert[['%s']][['%s']] must have exactly one '::'",
+      col_name,
+      direction
+    )
+  )
+
+  pkg_name <- parts[1]
+  func_name <- parts[2]
+
+  assert_that(
+    nzchar(pkg_name) && nzchar(func_name),
+    msg = sprintf(
+      "convert[['%s']][['%s']] has empty package or function name",
+      col_name,
+      direction
+    )
+  )
+
+  if (!requireNamespace(pkg_name, quietly = TRUE)) {
+    stop(
+      sprintf(
+        paste(
+          "Package '%s' required for convert[['%s']][['%s']]",
+          "is not available"
+        ),
+        pkg_name,
+        col_name,
+        direction
+      ),
+      call. = FALSE
+    )
+  }
+
+  if (
+    !exists(
+      func_name,
+      where = asNamespace(pkg_name),
+      mode = "function"
+    )
+  ) {
+    stop(
+      sprintf(
+        paste(
+          "Function '%s' not found in package '%s'",
+          "for convert[['%s']][['%s']]"
+        ),
+        func_name,
+        pkg_name,
+        col_name,
+        direction
+      ),
+      call. = FALSE
+    )
+  }
+}
+
+#' Apply conversion functions to columns
+#' @noRd
+apply_convert <- function(x, convert, direction = "write") {
+  if (is.null(convert) || length(convert) == 0) {
+    return(x)
+  }
+
+  for (col_name in names(convert)) {
+    func_spec <- convert[[col_name]][[c(write = 1, read = 2)[direction]]]
+    parts <- strsplit(func_spec, "::", fixed = TRUE)[[1]]
+    pkg_name <- parts[1]
+    func_name <- parts[2]
+
+    func <- get(func_name, envir = asNamespace(pkg_name), mode = "function")
+    x[[col_name]] <- func(x[[col_name]])
+  }
+
+  return(x)
+}