diff --git a/.gitignore b/.gitignore
index af32788ff..be3cd74c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -114,6 +114,7 @@ celerybeat.pid
# Environments
.env
+.Renviron
.venv
env/
venv/
diff --git a/Makefile b/Makefile
index 078b6ac19..de573b998 100644
--- a/Makefile
+++ b/Makefile
@@ -67,9 +67,10 @@ version:
@:$(call check_defined, tag, new semver version tag to use on pyproject.toml)
@poetry version $(tag)
@echo "__version__ = \"$$(poetry version -s)\"" > validmind/__version__.py
+ @sed -i '' 's/^Version: .*/Version: '"$$(poetry version -s)"'/' r/validmind/DESCRIPTION
@echo "Version updated to $$(poetry version -s)"
- @echo "Commiting changes to pyproject.toml and __version__.py with message: $$(poetry version -s)"
- @git add pyproject.toml validmind/__version__.py
+ @echo "Commiting changes to pyproject.toml, __version__.py and r/validmind/DESCRIPTION with message: $$(poetry version -s)"
+ @git add pyproject.toml validmind/__version__.py r/validmind/DESCRIPTION
@git commit -m "$$(poetry version -s)"
generate-test-id-types:
diff --git a/README.md b/README.md
index 487780fc9..4078f355f 100644
--- a/README.md
+++ b/README.md
@@ -118,21 +118,71 @@ You can install the `transformers`, `torch` and `openai` dependencies using the
poetry install --extras llm
```
-### Installing R dependencies
+### Setting up R support
-If you want to use the R support that is provided by the ValidMind Library, you must have R installed on your machine. You can download R from . On a Mac, you can install R using Homebrew:
+#### 1. Install R
+
+You can download R from . On macOS, the easiest way is via Homebrew:
```bash
brew install r
```
-Once you have R installed, install the `r-support` extra to install the necessary dependencies for R by running:
+#### 2. Install Python dependencies
+
+Install `rpy2` so the Python library can interface with R models. On macOS, you may need to build from source to match your R version:
```bash
-poetry install
+# Try the standard install first
pip install rpy2
+
+# If you get R library loading errors, rebuild against your installed R:
+R_HOME=$(Rscript -e 'cat(R.home())') pip install --no-binary :all: --force-reinstall rpy2
+```
+
+#### 3. Install R packages
+
+Open R (type `R` in your terminal) and install the required packages:
+
+```r
+install.packages(c("reticulate", "dplyr", "caTools", "knitr", "glue", "plotly", "htmltools", "rmarkdown", "DT", "base64enc"))
+```
+
+Then install the ValidMind R package from source:
+
+```r
+install.packages("r/validmind", repos = NULL, type = "source")
```
+#### 4. Set up VS Code / Cursor for R
+
+No RStudio required. Install the **R extension** (`REditorSupport.r`) in VS Code or Cursor:
+
+1. Open Extensions (`Cmd+Shift+X`) and search for "R"
+2. Install the **R** extension by REditorSupport
+3. Optionally install the `languageserver` R package for autocomplete: `install.packages("languageserver")`
+
+With the extension installed:
+- Open `.Rmd` files and run chunks with `Cmd+Shift+Enter`
+- Render full documents with `Cmd+Shift+K`
+- Use the R terminal panel for interactive sessions
+
+Alternatively, you can run R notebooks as Jupyter notebooks by installing the R kernel:
+
+```r
+install.packages("IRkernel")
+IRkernel::installspec()
+```
+
+Then create/open `.ipynb` files in VS Code and select the R kernel.
+
+#### 5. Run the quickstart notebooks
+
+Launch R from the repository root (so dataset paths resolve correctly) and run through the notebooks in `notebooks/code_sharing/r/`:
+
+- `quickstart_model_documentation.Rmd` — model documentation workflow
+- `quickstart_model_validation.Rmd` — model validation workflow
+
### Versioning
Make sure you bump the package version before merging a PR with the following command:
diff --git a/notebooks/code_sharing/r/r_custom_tests.Rmd b/notebooks/code_sharing/r/r_custom_tests.Rmd
index 63e52f96a..cd931a562 100644
--- a/notebooks/code_sharing/r/r_custom_tests.Rmd
+++ b/notebooks/code_sharing/r/r_custom_tests.Rmd
@@ -109,15 +109,11 @@ Get your code snippet:
Next, replace this placeholder with your own code snippet:
```{r}
-# Find the path to your Python runtime by running `python -V` in your terminal
-# python_version <- ""
-
vm_r <- vm(
api_host = "https://api.prod.validmind.ai/api/v1/tracking",
api_key = "...",
api_secret = "...",
- model = "...",
- python_version = python_version
+ model = "..."
)
```
diff --git a/notebooks/code_sharing/r/r_customer_churn_demo.Rmd b/notebooks/code_sharing/r/r_customer_churn_demo.Rmd
index ab4c44eb9..8aae0e652 100644
--- a/notebooks/code_sharing/r/r_customer_churn_demo.Rmd
+++ b/notebooks/code_sharing/r/r_customer_churn_demo.Rmd
@@ -24,7 +24,10 @@ We will train a sample model and demonstrate the following documentation functio
```{r setup, include=FALSE}
library(reticulate)
-python_version <- ""
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
use_python(python_version)
library(validmind)
@@ -64,11 +67,10 @@ The code snippet can be copied and pasted directly in the cell below to initiali
```{r}
vm_r <- vm(
+ api_host="https://app.prod.validmind.ai/api/v1/tracking",
api_key="",
api_secret="",
- model="",
- python_version=python_version,
- api_host="https://app.prod.validmind.ai/api/v1/tracking"
+ model=""
)
```
diff --git a/notebooks/code_sharing/r/r_customer_churn_demo_xgboost.Rmd b/notebooks/code_sharing/r/r_customer_churn_demo_xgboost.Rmd
index 44e9e9318..597df3f31 100644
--- a/notebooks/code_sharing/r/r_customer_churn_demo_xgboost.Rmd
+++ b/notebooks/code_sharing/r/r_customer_churn_demo_xgboost.Rmd
@@ -24,7 +24,10 @@ We will train a sample model and demonstrate the following documentation functio
```{r setup, include=FALSE}
library(reticulate)
-python_version <- ""
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
use_python(python_version)
library(validmind)
@@ -65,11 +68,10 @@ The code snippet can be copied and pasted directly in the cell below to initiali
```{r}
vm_r <- vm(
+ api_host="https://app.prod.validmind.ai/api/v1/tracking",
api_key="",
api_secret="",
- model="",
- python_version=python_version,
- api_host="https://app.prod.validmind.ai/api/v1/tracking"
+ model=""
)
```
diff --git a/notebooks/code_sharing/r/r_mortality_demo.Rmd b/notebooks/code_sharing/r/r_mortality_demo.Rmd
index b81eed50e..a8bb252bb 100644
--- a/notebooks/code_sharing/r/r_mortality_demo.Rmd
+++ b/notebooks/code_sharing/r/r_mortality_demo.Rmd
@@ -86,7 +86,10 @@ The features used in the mortality model are:
```{r setup, include=FALSE}
library(reticulate)
-python_version <- ""
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
use_python(python_version)
library(magrittr) # needs to be run every time you start R and want to use %>%
@@ -156,11 +159,10 @@ initialize the ValidMind Library when run:
```{r}
vm_r <- vm(
+ api_host="https://app.prod.validmind.ai/api/v1/tracking",
api_key="",
api_secret="",
- model="",
- python_version=python_version,
- api_host="https://app.prod.validmind.ai/api/v1/tracking"
+ model=""
)
```
diff --git a/notebooks/code_sharing/r/r_time_series_data_validation.Rmd b/notebooks/code_sharing/r/r_time_series_data_validation.Rmd
index 2965eb390..bc2279e63 100644
--- a/notebooks/code_sharing/r/r_time_series_data_validation.Rmd
+++ b/notebooks/code_sharing/r/r_time_series_data_validation.Rmd
@@ -17,7 +17,10 @@ Finally, define and **configure** the specific use case we are working on by set
```{r setup, include=FALSE}
library(reticulate)
-python_version <- ""
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
use_python(python_version)
library(validmind)
@@ -58,11 +61,10 @@ The code snippet can be copied and pasted directly in the cell below to initiali
```{r}
vm_r <- vm(
+ api_host="https://app.prod.validmind.ai/api/v1/tracking",
api_key="",
api_secret="",
- model="",
- python_version=python_version,
- api_host="https://app.prod.validmind.ai/api/v1/tracking"
+ model=""
)
```
diff --git a/notebooks/code_sharing/r/r_time_series_model_validation.Rmd b/notebooks/code_sharing/r/r_time_series_model_validation.Rmd
index 0a1c08b56..08cd9596e 100644
--- a/notebooks/code_sharing/r/r_time_series_model_validation.Rmd
+++ b/notebooks/code_sharing/r/r_time_series_model_validation.Rmd
@@ -17,7 +17,10 @@ Finally, define and **configure** the specific use case we are working on by set
```{r setup, include=FALSE}
library(reticulate)
-python_version <- "/Users/erichare/.pyenv/versions/3.10.10/bin/python"
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
use_python(python_version)
library(magrittr) # needs to be run every time you start R and want to use %>%
@@ -75,11 +78,10 @@ The code snippet can be copied and pasted directly in the cell below to initiali
```{r}
vm_r <- vm(
- api_key="b34dfe4dcb5491212be3eefe77c85cd6",
- api_secret="40f8d2d583baa9e730a7f8872dd57e2f4657c7918c13fa259ba7ccda8a60e858",
- model="clmp6k8e800ds19mot0zu8o34",
- python_version=python_version,
- api_host="https://app.prod.validmind.ai/api/v1/tracking"
+ api_host="https://app.prod.validmind.ai/api/v1/tracking",
+ api_key="",
+ api_secret="",
+ model=""
)
```
diff --git a/notebooks/quickstart/quickstart_model_documentation.Rmd b/notebooks/quickstart/quickstart_model_documentation.Rmd
new file mode 100644
index 000000000..251899d47
--- /dev/null
+++ b/notebooks/quickstart/quickstart_model_documentation.Rmd
@@ -0,0 +1,227 @@
+---
+title: "Quickstart for Model Documentation (R)"
+author: "ValidMind"
+date: "2026-03-18"
+output: html_document
+---
+
+# Quickstart for Model Documentation
+
+Learn the basics of using ValidMind to document models as part of a model development workflow using R. This notebook uses the ValidMind R package (a `reticulate` wrapper around the Python library) to generate a draft of documentation for a binary classification model.
+
+We will:
+
+1. Import a sample dataset and preprocess it
+2. Split the datasets and initialize them for use with ValidMind
+3. Train a logistic regression (GLM) model and initialize it for use with testing
+4. Run the full suite of documentation tests, sending results to the ValidMind Platform
+
+## Setting up
+
+The Python path is auto-configured via the `VALIDMIND_PYTHON` environment variable.
+If not set, it falls back to the system Python. For local development, create a
+`.Renviron` file in the project root with `VALIDMIND_PYTHON=.venv/bin/python`.
+
+```{r setup, include=FALSE}
+library(reticulate)
+
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
+use_python(python_version, required = TRUE)
+
+library(validmind)
+library(dplyr)
+library(caTools)
+library(knitr)
+
+knitr::opts_chunk$set(warning = FALSE, message = FALSE)
+```
+
+## Initialize the ValidMind Library
+
+Log in to the [ValidMind Platform](https://app.prod.validmind.ai) and register a model:
+
+1. Navigate to **Inventory** and click **+ Register Model**.
+2. Under **Documents > Development**, select the `Binary classification` template.
+3. Go to **Getting Started**, select `Development` from the **DOCUMENT** drop-down, and copy the code snippet.
+
+Replace the placeholder values below with your own credentials:
+
+```{r}
+vm_r <- vm(
+ api_host = "https://app.prod.validmind.ai/api/v1/tracking",
+ api_key = "",
+ api_secret = "",
+ model = "",
+ document = "documentation"
+)
+```
+
+## Preview the documentation template
+
+Verify the connection and see the documentation structure:
+
+```{r}
+vm_r$preview_template()
+```
+
+## Load the demo dataset
+
+We use the Bank Customer Churn dataset for this demonstration:
+
+```{r}
+customer_churn <- reticulate::import(
+ "validmind.datasets.classification.customer_churn"
+)
+
+cat(sprintf(
+ paste0(
+ "Loaded demo dataset with:\n\n\t- Target column: '%s'",
+ "\n\t- Class labels: %s\n"
+ ),
+ customer_churn$target_column,
+ paste(
+ names(customer_churn$class_labels),
+ customer_churn$class_labels,
+ sep = ": ", collapse = ", "
+ )
+))
+
+data <- customer_churn$load_data()
+head(data)
+```
+
+## Initialize the raw dataset
+
+Before running tests, initialize a ValidMind dataset object for the raw data:
+
+```{r}
+vm_raw_dataset <- vm_r$init_dataset(
+ dataset = data,
+ input_id = "raw_dataset",
+ target_column = customer_churn$target_column,
+ class_labels = customer_churn$class_labels
+)
+```
+
+## Preprocess the raw dataset
+
+Handle categorical variables using one-hot encoding and remove unnecessary columns:
+
+```{r}
+# load_data() already drops RowNumber, CustomerId, Surname
+# One-hot encode categorical variables
+geo_dummies <- model.matrix(~ Geography - 1, data = data)
+gender_dummies <- model.matrix(~ Gender - 1, data = data)
+data_processed <- data %>% select(-Geography, -Gender)
+data_processed <- cbind(data_processed, geo_dummies, gender_dummies)
+```
+
+### Split the dataset
+
+Split into training (60%), validation (20%), and test (20%) sets:
+
+```{r}
+set.seed(42)
+
+# First split: 80% train+validation, 20% test
+target_col <- customer_churn$target_column
+split1 <- sample.split(data_processed[[target_col]], SplitRatio = 0.8)
+train_val_data <- subset(data_processed, split1 == TRUE)
+test_data <- subset(data_processed, split1 == FALSE)
+
+# Second split: 75% train, 25% validation (of the 80% = 60/20 overall)
+split2 <- sample.split(train_val_data[[target_col]], SplitRatio = 0.75)
+train_data <- subset(train_val_data, split2 == TRUE)
+validation_data <- subset(train_val_data, split2 == FALSE)
+```
+
+## Train a logistic regression model
+
+Train a GLM with a binomial family (logistic regression):
+
+```{r}
+formula <- as.formula(paste(target_col, "~ ."))
+model <- glm(formula, data = train_data, family = binomial)
+summary(model)
+```
+
+## Initialize the ValidMind datasets
+
+```{r}
+vm_train_ds <- vm_r$init_dataset(
+ dataset = train_data,
+ input_id = "train_dataset",
+ target_column = customer_churn$target_column
+)
+
+vm_test_ds <- vm_r$init_dataset(
+ dataset = test_data,
+ input_id = "test_dataset",
+ target_column = customer_churn$target_column
+)
+```
+
+## Initialize a model object
+
+Save the R model and initialize it with ValidMind:
+
+```{r}
+model_path <- save_model(model)
+
+vm_model <- vm_r$init_r_model(
+ model_path = model_path,
+ input_id = "model"
+)
+```
+
+### Assign predictions
+
+Link model predictions to the training and testing datasets:
+
+```{r}
+vm_train_ds$assign_predictions(model = vm_model)
+vm_test_ds$assign_predictions(model = vm_model)
+```
+
+## Run the full suite of tests
+
+Build the test configuration that maps each test to its required inputs:
+
+```{r}
+# Import the test config helper from the Python customer_churn module
+customer_churn <- reticulate::import(
+ "validmind.datasets.classification.customer_churn"
+)
+test_config <- customer_churn$get_demo_test_config()
+```
+
+Preview the test configuration:
+
+```{r}
+vm_utils <- reticulate::import("validmind.utils")
+vm_utils$preview_test_config(test_config)
+```
+
+Run the full documentation test suite and upload results to the ValidMind Platform:
+
+```{r}
+full_suite <- vm_r$run_documentation_tests(config = test_config)
+```
+
+## Next steps
+
+Head to the [ValidMind Platform](https://app.prod.validmind.ai) to view the generated documentation:
+
+1. Navigate to **Inventory** and select your model.
+2. Click **Development** under Documents to see the full draft of your model documentation.
+
+From there, you can make qualitative edits, collaborate with validators, and submit for approval.
+
+---
+
+*Copyright 2023-2026 ValidMind Inc. All rights reserved.*
+*Refer to [LICENSE](https://github.com/validmind/validmind-library/blob/main/LICENSE) for details.*
+*SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial*
diff --git a/notebooks/quickstart/quickstart_model_validation.Rmd b/notebooks/quickstart/quickstart_model_validation.Rmd
new file mode 100644
index 000000000..6e43444e4
--- /dev/null
+++ b/notebooks/quickstart/quickstart_model_validation.Rmd
@@ -0,0 +1,335 @@
+---
+title: "Quickstart for Model Validation (R)"
+author: "ValidMind"
+date: "2026-03-18"
+output: html_document
+---
+
+# Quickstart for Model Validation
+
+Learn the basics of using ValidMind to validate models as part of a model validation workflow using R. This notebook uses the ValidMind R package (a `reticulate` wrapper around the Python library) to generate a draft of a validation report for a binary classification model.
+
+We will:
+
+1. Import a sample dataset and preprocess it, then initialize datasets for use with ValidMind
+2. Independently verify data quality tests performed on datasets by model development
+3. Train a champion model for evaluation
+4. Run model evaluation tests with the ValidMind Library
+
+## Setting up
+
+The Python path is auto-configured via the `VALIDMIND_PYTHON` environment variable.
+If not set, it falls back to the system Python. For local development, create a
+`.Renviron` file in the project root with `VALIDMIND_PYTHON=.venv/bin/python`.
+
+```{r setup, include=FALSE}
+library(reticulate)
+
+python_version <- Sys.getenv("VALIDMIND_PYTHON", Sys.which("python"))
+if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+}
+use_python(python_version, required = TRUE)
+
+library(validmind)
+library(dplyr)
+library(caTools)
+library(knitr)
+
+knitr::opts_chunk$set(warning = FALSE, message = FALSE)
+```
+
+## Initialize the ValidMind Library
+
+Log in to the [ValidMind Platform](https://app.prod.validmind.ai) and register a model:
+
+1. Navigate to **Inventory** and click **+ Register Model**.
+2. Assign yourself as a **Validator** (remove yourself from Owner and Developer roles).
+3. Under **Documents > Validation**, select the `Generic Validation Report` template.
+4. Go to **Getting Started**, select `Validation` from the **DOCUMENT** drop-down, and copy the code snippet.
+
+Replace the placeholder values below with your own credentials:
+
+```{r}
+vm_r <- vm(
+ api_host = "https://app.prod.validmind.ai/api/v1/tracking",
+ api_key = "",
+ api_secret = "",
+ model = "",
+ document = "validation-report"
+)
+```
+
+## Preview the validation report template
+
+Verify the connection and see the validation report structure:
+
+```{r}
+vm_r$preview_template()
+```
+
+## Identify available tests
+
+List the tasks and tags available in the ValidMind test library:
+
+```{r}
+vm_r$tests$list_tasks_and_tags()
+```
+
+List all data quality tests for classification:
+
+```{r}
+vm_r$tests$list_tests(tags = list("data_quality"), task = "classification")
+```
+
+## Load the demo dataset
+
+We use the Bank Customer Churn dataset for this demonstration:
+
+```{r}
+customer_churn <- reticulate::import(
+ "validmind.datasets.classification.customer_churn"
+)
+
+cat(sprintf(
+ paste0(
+ "Loaded demo dataset with:\n\n\t- Target column: '%s'",
+ "\n\t- Class labels: %s\n"
+ ),
+ customer_churn$target_column,
+ paste(
+ names(customer_churn$class_labels),
+ customer_churn$class_labels,
+ sep = ": ", collapse = ", "
+ )
+))
+
+data <- customer_churn$load_data()
+head(data)
+```
+
+## Preprocess the raw dataset
+
+Handle categorical variables using one-hot encoding and remove unnecessary columns:
+
+```{r}
+# load_data() already drops RowNumber, CustomerId, Surname
+# One-hot encode categorical variables
+geo_dummies <- model.matrix(~ Geography - 1, data = data)
+gender_dummies <- model.matrix(~ Gender - 1, data = data)
+data_processed <- data %>% select(-Geography, -Gender)
+data_processed <- cbind(data_processed, geo_dummies, gender_dummies)
+```
+
+### Split the dataset
+
+Split into training (60%), validation (20%), and test (20%) sets:
+
+```{r}
+set.seed(42)
+
+# First split: 80% train+validation, 20% test
+target_col <- customer_churn$target_column
+split1 <- sample.split(data_processed[[target_col]], SplitRatio = 0.8)
+train_val_data <- subset(data_processed, split1 == TRUE)
+test_data <- subset(data_processed, split1 == FALSE)
+
+# Second split: 75% train, 25% validation (of the 80% = 60/20 overall)
+split2 <- sample.split(train_val_data[[target_col]], SplitRatio = 0.75)
+train_data <- subset(train_val_data, split2 == TRUE)
+validation_data <- subset(train_val_data, split2 == FALSE)
+```
+
+### Separate features and targets
+
+```{r}
+x_train <- train_data %>% select(-all_of(target_col))
+y_train <- train_data[[target_col]]
+```
+
+## Initialize the ValidMind datasets
+
+```{r}
+vm_raw_dataset <- vm_r$init_dataset(
+ dataset = data,
+ input_id = "raw_dataset",
+ target_column = customer_churn$target_column,
+ class_labels = customer_churn$class_labels
+)
+
+vm_train_ds <- vm_r$init_dataset(
+ dataset = train_data,
+ input_id = "train_dataset",
+ target_column = customer_churn$target_column
+)
+
+vm_validation_ds <- vm_r$init_dataset(
+ dataset = validation_data,
+ input_id = "validation_dataset",
+ target_column = customer_churn$target_column
+)
+
+vm_test_ds <- vm_r$init_dataset(
+ dataset = test_data,
+ input_id = "test_dataset",
+ target_column = customer_churn$target_column
+)
+```
+
+## Run data quality tests
+
+### Run an individual data quality test
+
+Run the ClassImbalance test on the raw dataset and log it to the platform:
+
+```{r}
+vm_r$tests$run_test(
+ test_id = "validmind.data_validation.ClassImbalance",
+ inputs = list(dataset = vm_raw_dataset)
+)$log()
+```
+
+### Run data comparison tests
+
+Compare class imbalance across dataset splits:
+
+```{r}
+comparison_tests <- list(
+ "validmind.data_validation.ClassImbalance:train_vs_validation" = list(
+ input_grid = list(dataset = list("train_dataset", "validation_dataset"))
+ ),
+ "validmind.data_validation.ClassImbalance:train_vs_test" = list(
+ input_grid = list(dataset = list("train_dataset", "test_dataset"))
+ )
+)
+
+for (test_name in names(comparison_tests)) {
+ cat(paste0("Running: ", test_name, "\n"))
+ config <- comparison_tests[[test_name]]
+ tryCatch({
+ vm_r$tests$run_test(
+ test_name,
+ input_grid = config$input_grid
+ )$log()
+ }, error = function(e) {
+ cat(paste0("Error running test ", test_name, ": ", e$message, "\n"))
+ })
+}
+```
+
+## Train the champion model
+
+Train a logistic regression (GLM) to serve as the champion model:
+
+```{r}
+formula <- as.formula(paste(target_col, "~ ."))
+model <- glm(formula, data = train_data, family = binomial)
+summary(model)
+```
+
+## Initialize the model object
+
+Save the R model and initialize it with ValidMind:
+
+```{r}
+model_path <- save_model(model)
+
+vm_xgboost <- vm_r$init_r_model(
+ model_path = model_path,
+ input_id = "xgboost_champion"
+)
+```
+
+### Assign predictions
+
+Link model predictions to the training and testing datasets:
+
+```{r}
+vm_train_ds$assign_predictions(model = vm_xgboost)
+vm_test_ds$assign_predictions(model = vm_xgboost)
+```
+
+## Run model evaluation tests
+
+### Run model performance tests
+
+List available model performance tests:
+
+```{r}
+vm_r$tests$list_tests(tags = list("model_performance"), task = "classification")
+```
+
+Run and log performance tests:
+
+```{r}
+performance_tests <- c(
+ "validmind.model_validation.sklearn.ClassifierPerformance:xgboost_champion",
+ "validmind.model_validation.sklearn.ConfusionMatrix:xgboost_champion",
+ "validmind.model_validation.sklearn.ROCCurve:xgboost_champion"
+)
+
+for (test in performance_tests) {
+ cat(paste0("Running: ", test, "\n"))
+ vm_r$tests$run_test(
+ test,
+ inputs = list(dataset = vm_test_ds, model = vm_xgboost)
+ )$log()
+}
+```
+
+### Run diagnostic tests
+
+Assess the model for overfitting:
+
+```{r}
+vm_r$tests$run_test(
+ test_id = paste0(
+ "validmind.model_validation.sklearn.OverfitDiagnosis",
+ ":xgboost_champion"
+ ),
+ input_grid = list(
+ datasets = list(list(vm_train_ds, vm_test_ds)),
+ model = list(vm_xgboost)
+ )
+)$log()
+```
+
+Test robustness:
+
+```{r}
+vm_r$tests$run_test(
+ test_id = paste0(
+ "validmind.model_validation.sklearn.RobustnessDiagnosis",
+ ":xgboost_champion"
+ ),
+ input_grid = list(
+ datasets = list(list(vm_train_ds, vm_test_ds)),
+ model = list(vm_xgboost)
+ )
+)$log()
+```
+
+### Run feature importance tests
+
+Note: `PermutationFeatureImportance` and `SHAPGlobalImportance` are not supported for R models.
+
+```{r}
+vm_r$tests$run_test(
+ "validmind.model_validation.FeaturesAUC:xgboost_champion",
+ inputs = list(dataset = vm_test_ds, model = vm_xgboost)
+)$log()
+```
+
+## Next steps
+
+Head to the [ValidMind Platform](https://app.prod.validmind.ai) to view the validation report:
+
+1. Navigate to **Inventory** and select your model.
+2. Click **Validation** under Documents.
+3. Include your logged test results as evidence, create risk assessment notes, and assess compliance.
+
+---
+
+*Copyright 2023-2026 ValidMind Inc. All rights reserved.*
+*Refer to [LICENSE](https://github.com/validmind/validmind-library/blob/main/LICENSE) for details.*
+*SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial*
diff --git a/pyproject.toml b/pyproject.toml
index 7fd360c68..6de558bf2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "validmind"
-version = "2.12.3"
+version = "2.12.4"
description = "ValidMind Library"
readme = "README.pypi.md"
requires-python = ">=3.9,<3.15"
diff --git a/r/validmind/DESCRIPTION b/r/validmind/DESCRIPTION
index 1a64d1f58..37e80f97d 100644
--- a/r/validmind/DESCRIPTION
+++ b/r/validmind/DESCRIPTION
@@ -1,7 +1,7 @@
Package: validmind
Type: Package
Title: Interface to the 'ValidMind' Platform
-Version: 0.1.2
+Version: 2.12.4
Authors@R: c(person("Andres", "Rodriguez", role = c("aut", "cre","cph"),
email = "andres@validmind.ai"))
Maintainer: Andres Rodriguez
diff --git a/r/validmind/R/platform.R b/r/validmind/R/platform.R
index 426b10d40..5b828918f 100644
--- a/r/validmind/R/platform.R
+++ b/r/validmind/R/platform.R
@@ -3,8 +3,11 @@
#' @param api_key The ValidMind API key
#' @param api_secret The ValidMind API secret
#' @param model The ValidMind model
-#' @param python_version The Python Version to use
+#' @param python_version The path to the Python binary to use. Defaults to
+#' the VALIDMIND_PYTHON environment variable, or the system Python.
#' @param api_host The ValidMind host, defaulting to local
+#' @param document The document type to associate with this session
+#' (e.g. "documentation", "validation-report"). Defaults to NULL.
#'
#' @importFrom reticulate import use_python py_config
#'
@@ -14,28 +17,47 @@
#' @examples
#'\dontrun{
#' vm_r <- vm(
+#' api_host="https://app.prod.validmind.ai/api/v1/tracking",
#' api_key="",
#' api_secret="",
#' model="",
-#' python_version=python_version,
-#' api_host="https://app.prod.validmind.ai/api/v1/tracking"
+#' document="documentation"
#' )
#'}
#'
#' @export
-vm <- function(api_key, api_secret, model, python_version,
- api_host = "http://localhost:3000/api/v1/tracking") {
- use_python(python_version)
+vm <- function(api_key, api_secret, model,
+ python_version = Sys.getenv("VALIDMIND_PYTHON", Sys.which("python")),
+ api_host = "http://localhost:3000/api/v1/tracking",
+ document = NULL) {
+ # Resolve relative paths (e.g. ".venv/bin/python") against the working directory
+ if (nchar(python_version) > 0 && !startsWith(python_version, "/")) {
+ python_version <- file.path(getwd(), python_version)
+ }
+ use_python(python_version, required = TRUE)
+
+ # Set environment variables BEFORE Python initializes (required for rpy2 compatibility)
+ # R_HOME: so rpy2 can find the R installation
+ # RPY2_CFFI_MODE: use ABI mode so rpy2 attaches to the existing R session
+ # started by reticulate rather than trying to start a new one
+ Sys.setenv(R_HOME = R.home())
+ Sys.setenv(RPY2_CFFI_MODE = "ABI")
vm <- import("validmind")
- vm$init(
+ init_args <- list(
api_host = api_host,
api_key = api_key,
api_secret = api_secret,
model = model
)
+ if (!is.null(document)) {
+ init_args$document <- document
+ }
+
+ do.call(vm$init, init_args)
+
return(vm)
}
diff --git a/r/validmind/README.md b/r/validmind/README.md
index ed74b8149..4351790cb 100644
--- a/r/validmind/README.md
+++ b/r/validmind/README.md
@@ -1,5 +1,27 @@
# ValidMind R Package
+## Prerequisites
+
+Install the required R packages:
+
+```r
+install.packages(c("reticulate", "dplyr", "caTools", "knitr", "glue", "plotly", "htmltools", "rmarkdown", "DT", "base64enc"))
+```
+
+You also need a Python environment with the `validmind` Python package and `rpy2` installed:
+
+```bash
+pip install validmind rpy2
+```
+
+**Note:** On macOS, if `rpy2` fails to find R libraries, rebuild it from source against your installed R:
+
+```bash
+R_HOME=$(Rscript -e 'cat(R.home())') pip install --no-binary :all: --force-reinstall rpy2
+```
+
+Point `python_version` to your Python binary (e.g. the one in your project's `.venv`).
+
## Installation
You can install ValidMind from CRAN:
@@ -20,23 +42,91 @@ Or you can install the package from source. Ensure you are in the `r/validmind`
devtools::install()
```
+For local development, you can skip `devtools` entirely and install directly from the repo path:
+
+```r
+install.packages("/path/to/validmind-library/r/validmind", repos = NULL, type = "source")
+```
+
+## Configuring the Python path
+
+The R package needs to know which Python binary to use (the one with `validmind` installed). It reads the `VALIDMIND_PYTHON` environment variable, falling back to the system Python if not set.
+
+### Option 1: `.Renviron` file (recommended)
+
+Create a `.Renviron` file in the project root (or `~/.Renviron` for a global setting):
+
+```
+VALIDMIND_PYTHON=.venv/bin/python
+```
+
+R reads this file automatically on startup, before any code runs. Relative paths are resolved against the working directory.
+
+### Option 2: `.env` file in the repo
+
+If the project uses Poetry with in-project virtualenvs (`.venv/` inside the repo), add to the repo's `.env` file:
+
+```
+VALIDMIND_PYTHON=.venv/bin/python
+```
+
+### No configuration needed in Docker / CI
+
+When `VALIDMIND_PYTHON` is not set, the package falls back to `Sys.which("python")`, which resolves to the system Python — correct for environments where `validmind` is installed globally.
+
## QuickStart
-You can connect to your ValidMind profile by providing the appropriate credentials:
+Connect to your ValidMind profile:
```r
vm_r <- vm(
+ api_host="https://api.prod.validmind.ai/api/v1/tracking",
api_key="",
api_secret="",
model="",
- python_version="",
- api_host="https://api.prod.validmind.ai/api/v1/tracking"
+ document="documentation"
)
```
-## Fleshed out example
+The `python_version` parameter is no longer required — it defaults to `VALIDMIND_PYTHON` or the system Python. The `document` parameter specifies which document type to associate with the session (e.g. `"documentation"` or `"validation-report"`).
+
+### Quickstart notebooks
+
+See the `notebooks/code_sharing/r/` folder for full working examples:
+
+- **`quickstart_model_documentation.Rmd`** — End-to-end model documentation workflow: load data, preprocess, train a GLM model, and run the full documentation test suite.
+- **`quickstart_model_validation.Rmd`** — End-to-end model validation workflow: load data, run data quality tests, train a champion GLM model, and run model evaluation tests.
-Please see the `notebooks/code-sharing/r` folder for examples of how to use!
+These notebooks can be run from VS Code (with the R extension), RStudio, or interactively in a terminal R session. When running interactively, launch R from the repository root so that relative dataset paths resolve correctly.
+
+### Key APIs available via reticulate
+
+Since the R package returns the full Python `validmind` module, you can call any Python API directly:
+
+```r
+# Preview the documentation template
+vm_r$preview_template()
+
+# Initialize datasets
+vm_dataset <- vm_r$init_dataset(dataset=df, input_id="my_dataset", target_column="target")
+
+# Initialize R models
+model_path <- save_model(model)
+vm_model <- vm_r$init_r_model(model_path=model_path, input_id="model")
+
+# Assign predictions
+vm_dataset$assign_predictions(model=vm_model)
+
+# Run the full documentation test suite
+vm_r$run_documentation_tests(config=test_config)
+
+# Run individual tests
+vm_r$tests$run_test("validmind.data_validation.ClassImbalance", inputs=list(dataset=vm_dataset))$log()
+
+# List available tests
+vm_r$tests$list_tests(tags=list("data_quality"), task="classification")
+vm_r$tests$list_tasks_and_tags()
+```
## Troubleshooting
diff --git a/tests/test_validmind_tests_module.py b/tests/test_validmind_tests_module.py
index 4ee984c74..4118f6449 100644
--- a/tests/test_validmind_tests_module.py
+++ b/tests/test_validmind_tests_module.py
@@ -7,6 +7,7 @@
from typing import Callable, List
import pandas as pd
+from pandas.io.formats.style import Styler
from validmind.tests import (
list_tags,
@@ -37,8 +38,11 @@ def test_list_tasks(self):
def test_list_tasks_and_tags(self):
tasks_and_tags = list_tasks_and_tags()
- self.assertIsInstance(tasks_and_tags, pd.io.formats.style.Styler)
- df = tasks_and_tags.data
+ # Returns a Styler in notebooks, plain DataFrame otherwise
+ if isinstance(tasks_and_tags, Styler):
+ df = tasks_and_tags.data
+ else:
+ df = tasks_and_tags
self.assertTrue(len(df) > 0)
self.assertTrue(all(isinstance(task, str) for task in df["Task"]))
self.assertTrue(all(isinstance(tag, str) for tag in df["Tags"]))
@@ -51,8 +55,11 @@ def test_list_tests(self):
def test_list_tests_pretty(self):
tests = list_tests(pretty=True)
- self.assertIsInstance(tests, pd.io.formats.style.Styler)
- df = tests.data
+ # Returns a Styler in notebooks, plain DataFrame otherwise
+ if isinstance(tests, Styler):
+ df = tests.data
+ else:
+ df = tests
self.assertTrue(len(df) > 0)
# check has the columns: ID, Name, Description, Required Inputs, Params
self.assertTrue("ID" in df.columns)
diff --git a/validmind/__version__.py b/validmind/__version__.py
index 521faacaf..896b89678 100644
--- a/validmind/__version__.py
+++ b/validmind/__version__.py
@@ -1 +1 @@
-__version__ = "2.12.3"
+__version__ = "2.12.4"
diff --git a/validmind/client.py b/validmind/client.py
index 5c5ceff39..c0fe8b2ca 100644
--- a/validmind/client.py
+++ b/validmind/client.py
@@ -282,33 +282,23 @@ def init_r_model(
"""
Initialize a VM Model from an R model.
- LogisticRegression and LinearRegression models are converted to sklearn models by extracting
- the coefficients and intercept from the R model. XGB models are loaded using the xgboost
- since xgb models saved in .json or .bin format can be loaded directly with either Python or R.
+ The model must first be saved to an .RData file using the R package's
+ ``save_model()`` function. This function then uses rpy2 to load the model
+ into Python for testing and validation.
Args:
- model_path (str): The path to the R model saved as an RDS or XGB file.
+ model_path (str): The path to the R model saved as an .RData file.
input_id (str): The input ID for the model. Defaults to "model".
Returns:
VMModel: A VM Model instance.
"""
-
- # TODO: proper check for supported models
- #
- # if model.get("method") not in R_MODEL_METHODS:
- # raise UnsupportedRModelError(
- # "R model method must be one of {}. Got {}".format(
- # R_MODEL_METHODS, model.get("method")
- # )
- # )
-
- # first we need to load the model using rpy2
- # since rpy2 is an extra we need to conditionally import it
try:
import rpy2.robjects as robjects
- except ImportError:
- raise MissingRExtrasError()
+ except Exception as e:
+ raise MissingRExtrasError(
+ f"`rpy2` is required to use R models. Import failed: {e}"
+ )
r = robjects.r
loaded_objects = r.load(model_path)
@@ -321,6 +311,14 @@ def init_r_model(
input_id=input_id,
)
+ metadata = get_model_info(vm_model)
+ log_input(
+ input_id=input_id,
+ type="model",
+ metadata=metadata,
+ )
+ input_registry.add(key=input_id, obj=vm_model)
+
return vm_model
diff --git a/validmind/models/r_model.py b/validmind/models/r_model.py
index 377c80bd8..cf36e308c 100644
--- a/validmind/models/r_model.py
+++ b/validmind/models/r_model.py
@@ -98,11 +98,10 @@ def r_predict(self, new_data_r):
Instead, there is a global predict() method that returns the predicted
values according to the model type.
"""
- # Use the predict method on the loaded model (assuming the model's name in R is 'model')
predicted_probs = self.r.predict(
self.model, newdata=new_data_r, type="response"
)
- return predicted_probs
+ return np.array(predicted_probs)
def r_xgb_predict(self, new_data_r):
"""
@@ -114,7 +113,7 @@ def r_xgb_predict(self, new_data_r):
predicted_probs = self.r.predict(
self.model, newdata=new_data_r, type="response"
)
- return predicted_probs
+ return np.array(predicted_probs)
def predict_proba(self, new_data):
"""
@@ -127,24 +126,30 @@ def predict(self, new_data, return_probs=False):
Converts the predicted probabilities to classes
"""
try:
+ from rpy2.robjects import conversion, default_converter
from rpy2.robjects import pandas2ri
except ImportError:
raise MissingRExtrasError()
- # Activate the pandas conversion for rpy2
- pandas2ri.activate()
-
new_data_class = get_full_class_name(new_data)
if new_data_class == "numpy.ndarray":
- # We need to reconstruct the DataFrame from the ndarray using the column names
- new_data = pd.DataFrame(new_data, columns=self.test_ds.feature_columns)
+ # Reconstruct a DataFrame from the ndarray using column names
+ # from the model's training data
+ try:
+ model_terms = list(self.r.attr(self.model.rx2["terms"], "term.labels"))
+ new_data = pd.DataFrame(new_data, columns=model_terms)
+ except Exception:
+ # Fallback: use generic column names
+ new_data = pd.DataFrame(new_data)
elif new_data_class != "pandas.core.frame.DataFrame":
raise ValueError(
f"new_data must be a DataFrame or ndarray. Got {new_data_class}"
)
- new_data_r = pandas2ri.py2rpy(new_data)
+ # Use context manager for pandas conversion (activate/deactivate is deprecated)
+ with conversion.localconverter(default_converter + pandas2ri.converter):
+ new_data_r = conversion.get_conversion().py2rpy(new_data)
if self.__model_class() == "xgb.Booster":
predicted_probs = self.r_xgb_predict(new_data_r)
diff --git a/validmind/template.py b/validmind/template.py
index 8fe191389..d4aec2a0e 100644
--- a/validmind/template.py
+++ b/validmind/template.py
@@ -190,20 +190,37 @@ def _create_section_html(tree: List[Dict[str, Any]]) -> str:
return StatefulHTMLRenderer.render_accordion(accordion_items, accordion_titles)
+def _print_section_tree(sections: List[Dict[str, Any]], indent: int = 0) -> None:
+ """Print a plain-text representation of the template section tree."""
+ prefix = " " * indent
+ for i, section in enumerate(sections):
+ number = f"{i + 1}." if indent == 0 else ""
+ print(f"{prefix}{number} {section['title']} ('{section['id']}')")
+
+ for content in section.get("contents", []):
+ content_type = CONTENT_TYPE_MAP.get(
+ content["content_type"], content["content_type"]
+ )
+ print(f"{prefix} - [{content_type}] {content['content_id']}")
+
+ if section.get("sections"):
+ _print_section_tree(section["sections"], indent + 1)
+
+
def preview_template(template: str) -> None:
- """Preview a template in Jupyter Notebook.
+ """Preview a template in Jupyter Notebook or plain text.
Args:
template (dict): The template to preview.
"""
+ section_tree = _convert_sections_to_section_tree(template["sections"])
+
if not is_notebook():
- logger.warning("preview_template() only works in Jupyter Notebook")
+ _print_section_tree(section_tree)
return
html_content = StatefulHTMLRenderer.get_base_css()
- html_content += _create_section_html(
- _convert_sections_to_section_tree(template["sections"])
- )
+ html_content += _create_section_html(section_tree)
display(html_content)
diff --git a/validmind/utils.py b/validmind/utils.py
index bd1cec418..af136184e 100644
--- a/validmind/utils.py
+++ b/validmind/utils.py
@@ -359,6 +359,9 @@ def format_number(number):
def format_dataframe(df: pd.DataFrame) -> pd.DataFrame:
"""Format a pandas DataFrame for display purposes."""
+ if not is_notebook():
+ return df
+
df = df.style.set_properties(**{"text-align": "left"}).hide(axis="index")
return df.set_table_styles([dict(selector="th", props=[("text-align", "left")])])
@@ -509,7 +512,7 @@ def get_dataset_info(dataset):
def preview_test_config(config):
- """Preview test configuration in a collapsible HTML section.
+ """Preview test configuration in a collapsible HTML section or plain text.
Args:
config (dict): Test configuration dictionary.
@@ -521,6 +524,10 @@ def preview_test_config(config):
logger.error(f"JSON serialization failed: {e}")
return
+ if not is_notebook():
+ print(formatted_json)
+ return
+
collapsible_html = f"""