diff --git a/.gitignore b/.gitignore
index 4f1fdc64..9d0335a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,6 @@ inst/doc
 *.log
 *.o
 *.so
-*.dll
\ No newline at end of file
+*.dll
+.lintr
+.vscode
\ No newline at end of file
diff --git a/R/MSstatsConvert_core_functions.R b/R/MSstatsConvert_core_functions.R
index 8f06973c..c5e98c77 100644
--- a/R/MSstatsConvert_core_functions.R
+++ b/R/MSstatsConvert_core_functions.R
@@ -518,6 +518,12 @@ MSstatsMakeAnnotation = function(input, annotation, ...) {
 }
 
 #' Run Anomaly Model
+#'
+#' Detects anomalous measurements in mass spectrometry data using an isolation forest algorithm.
+#' This function identifies unusual precursor measurements based on quality metrics and their 
+#' temporal patterns. For features with insufficient quality metric data, it assigns anomaly 
+#' scores based on the median score of similar features (same peptide and charge combination).
+#' The model supports parallel processing for improved performance on large datasets.
 #' 
 #' @param input data.table preprocessed by the MSstatsBalancedDesign function
 #' @param quality_metrics character vector of quality metrics to use in the model
@@ -535,7 +541,7 @@ MSstatsMakeAnnotation = function(input, annotation, ...) {
 MSstatsAnomalyScores = function(input, quality_metrics, temporal_direction,
                                 missing_run_count, n_feat, run_order, n_trees, 
                                 max_depth, cores){
-    
+
     input = .prepareSpectronautAnomalyInput(input, quality_metrics, 
                                             run_order, n_feat, 
                                             missing_run_count)
@@ -548,14 +554,14 @@ MSstatsAnomalyScores = function(input, quality_metrics, temporal_direction,
                                        temporal_direction[i]))
         }
     }
-    
+
     input = .runAnomalyModel(input, 
                              n_trees=n_trees, 
                              max_depth=max_depth, 
                              cores=cores,
                              split_column="PSM",
                              quality_metrics=quality_metrics)
-    
+
     subset_cols = c("Run", "ProteinName", "PeptideSequence", 
                     "PrecursorCharge", "FragmentIon", 
                     "ProductCharge", "IsotopeLabelType", 
diff --git a/R/clean_DIANN.R b/R/clean_DIANN.R
index afe75554..23f49c8b 100644
--- a/R/clean_DIANN.R
+++ b/R/clean_DIANN.R
@@ -8,7 +8,9 @@
                            quantificationColumn = "FragmentQuantCorrected",
                            global_qvalue_cutoff = 0.01,
                            qvalue_cutoff = 0.01, 
-                           pg_qvalue_cutoff = 0.01) {
+                           pg_qvalue_cutoff = 0.01,
+                           calculateAnomalyScores = FALSE, 
+                           anomalyModelFeatures = c()) {
     dn_input <- getInputFile(msstats_object, "input")
     dn_input <- data.table::as.data.table(dn_input)
     
@@ -19,7 +21,9 @@
     dn_input <- .cleanDIANNAddMissingColumns(dn_input)
     
     # Select required columns
-    dn_input <- .cleanDIANNSelectRequiredColumns(dn_input, quantificationColumn, MBR)
+    dn_input <- .cleanDIANNSelectRequiredColumns(dn_input, quantificationColumn, MBR,
+                                                 calculateAnomalyScores,
+                                                 anomalyModelFeatures)
     
     # Split concatenated values
     dn_input <- .cleanDIANNSplitConcatenatedValues(dn_input, quantificationColumn)
@@ -78,7 +82,9 @@
 #' @param MBR logical indicating if match between runs was used
 #' @return data.table with selected columns
 #' @noRd
-.cleanDIANNSelectRequiredColumns <- function(dn_input, quantificationColumn, MBR) {
+.cleanDIANNSelectRequiredColumns <- function(dn_input, quantificationColumn, MBR,
+                                             calculateAnomalyScores,
+                                             anomalyModelFeatures) {
     base_cols <- c('ProteinNames', 'StrippedSequence', 'ModifiedSequence', 
                    'PrecursorCharge', quantificationColumn, 'QValue', 
                    'PrecursorMz', 'FragmentInfo', 'Run')
@@ -89,7 +95,13 @@
         c('GlobalQValue', 'GlobalPGQValue')
     }
     
-    req_cols <- c(base_cols, mbr_cols)
+    qual_cols <- if (calculateAnomalyScores) {
+        anomalyModelFeatures
+    } else {
+        c()
+    }
+
+    req_cols <- c(base_cols, mbr_cols, qual_cols)
     return(dn_input[, req_cols, with = FALSE])
 }
 
@@ -163,13 +175,13 @@
     getOption("MSstatsLog")("INFO", msg)
     getOption("MSstatsMsg")("INFO", msg)
     
-    dn_input = dn_input[QValue < global_qvalue_cutoff, ]
+    dn_input = dn_input[QValue >= global_qvalue_cutoff, quantificationColumn := 0]
     if (MBR) {
         msg = '** MBR was used to analyze the data. Now setting names and filtering'
         msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff)
         msg_2_mbr = paste0('-- LibQValue < ', qvalue_cutoff)
-        dn_input = dn_input[LibPGQValue < pg_qvalue_cutoff, ]
-        dn_input = dn_input[LibQValue < qvalue_cutoff, ]
+        dn_input = dn_input[LibPGQValue >= pg_qvalue_cutoff, , quantificationColumn := 0]
+        dn_input = dn_input[LibQValue >= qvalue_cutoff, , quantificationColumn := 0]
         getOption("MSstatsLog")("INFO", msg)
         getOption("MSstatsMsg")("INFO", msg)
         getOption("MSstatsLog")("INFO", msg_1_mbr)
@@ -181,8 +193,8 @@
         msg = '** MBR was not used to analyze the data. Now setting names and filtering'
         msg_1 = paste0('-- Filtering on GlobalPGQValue < ', pg_qvalue_cutoff)
         msg_2 = paste0('-- Filtering on GlobalQValue < ', qvalue_cutoff)
-        dn_input = dn_input[GlobalPGQValue < pg_qvalue_cutoff, ]
-        dn_input = dn_input[GlobalQValue < qvalue_cutoff, ]
+        dn_input = dn_input[GlobalPGQValue >= pg_qvalue_cutoff, quantificationColumn := 0]
+        dn_input = dn_input[GlobalQValue >= qvalue_cutoff, quantificationColumn := 0]
         getOption("MSstatsLog")("INFO", msg)
         getOption("MSstatsMsg")("INFO", msg)
         getOption("MSstatsLog")("INFO", msg_1)
diff --git a/R/converters_DIANNtoMSstatsFormat.R b/R/converters_DIANNtoMSstatsFormat.R
index 1b46976b..833990f6 100644
--- a/R/converters_DIANNtoMSstatsFormat.R
+++ b/R/converters_DIANNtoMSstatsFormat.R
@@ -22,6 +22,15 @@
 #' @param quantificationColumn Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x.
 #' Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x. 
 #' Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity.
+#' @param calculateAnomalyScores Default is FALSE. If TRUE, will run anomaly detection model and calculate anomaly scores for each feature. Used downstream to weigh measurements in differential analysis.
+#' @param anomalyModelFeatures character vector of quality metric column names to be used as features in the anomaly detection model. List must not be empty if calculateAnomalyScores=TRUE.
+#' @param anomalyModelFeatureTemporal character vector of temporal direction corresponding to columns passed to anomalyModelFeatures. Values must be one of: `mean_decrease`, `mean_increase`, `dispersion_increase`, or NULL (to perform no temporal feature engineering). Default is empty vector. If calculateAnomalyScores=TRUE, vector must have as many values as anomalyModelFeatures (even if all NULL).
+#' @param removeMissingFeatures Remove features with missing values in more than this fraction of runs. Default is 0.5. Only used if calculateAnomalyScores=TRUE.
+#' @param anomalyModelFeatureCount Feature selection for anomaly model. Anomaly detection works on the precursor-level and can be much slower if all features used. We will by default filter to the top-100 highest intensity features. This can be adjusted as necessary. To turn feature-selection off, set this value to a high number (e.g. 10000). Only used if calculateAnomalyScores=TRUE.
+#' @param runOrder Temporal order of MS runs. Should be a two column data.table with columns `Run` and `Order`, where `Run` matches the run name output by DIA-NN and `Order` is an integer. Used to engineer the temporal features defined in anomalyModelFeatureTemporal.
+#' @param n_trees Number of trees to use in isolation forest when calculateAnomalyScores=TRUE. Default is 100.
+#' @param max_depth Max tree depth to use in isolation forest when calculateAnomalyScores=TRUE. Default is "auto" which calculates depth as log2(N) where N is the number of runs. Otherwise must be an integer.
+#' @param numberOfCores Number of cores for parallel processing anomaly detection model. When > 1, a logfile named 'MSstats_anomaly_model_progress.log' is created to track progress. Only works for Linux & Mac OS. Default is 1.
 #' @param ... additional parameters to `data.table::fread`.
 #'  
 #' @return data.frame in the MSstats required format.
@@ -51,29 +60,39 @@
 #' output_2_0 = DIANNtoMSstatsFormat(input_2_0, annotation = annot_2_0, MBR = FALSE, 
 #'                                 use_log_file = FALSE, quantificationColumn = 'auto')
 #' head(output_2_0)
-DIANNtoMSstatsFormat = function(input, annotation = NULL,
-                                global_qvalue_cutoff = 0.01,
-                                qvalue_cutoff = 0.01, 
-                                pg_qvalue_cutoff = 0.01,
-                                useUniquePeptide = TRUE, 
-                                removeFewMeasurements = TRUE,
-                                removeOxidationMpeptides = TRUE, 
-                                removeProtein_with1Feature = TRUE,
-                                use_log_file = TRUE, append = FALSE, 
-                                verbose = TRUE, log_file_path = NULL,
-                                MBR = TRUE, 
-                                quantificationColumn = "FragmentQuantCorrected",
-                                ...) {
+DIANNtoMSstatsFormat = function(
+    input, annotation = NULL,
+    global_qvalue_cutoff = 0.01,
+    qvalue_cutoff = 0.01, 
+    pg_qvalue_cutoff = 0.01,
+    useUniquePeptide = TRUE, 
+    removeFewMeasurements = TRUE,
+    removeOxidationMpeptides = TRUE, 
+    removeProtein_with1Feature = TRUE,
+    MBR = TRUE, 
+    quantificationColumn = "FragmentQuantCorrected",
+    calculateAnomalyScores=FALSE, anomalyModelFeatures=c(),
+    anomalyModelFeatureTemporal=c(), removeMissingFeatures=.5,
+    anomalyModelFeatureCount=100,
+    runOrder=NULL, n_trees=100, max_depth="auto", numberOfCores=1, 
+    use_log_file = TRUE, append = FALSE, 
+    verbose = TRUE, log_file_path = NULL,
+    ...) {
     MSstatsConvert::MSstatsLogsSettings(use_log_file, append, verbose, 
                                         log_file_path)
     
+    anomalyModelFeatures = .standardizeColnames(anomalyModelFeatures)
+
     input = MSstatsConvert::MSstatsImport(list(input = input),
                                           "MSstats", "DIANN")
+   
     input = MSstatsConvert::MSstatsClean(input, MBR = MBR, 
                                          quantificationColumn = quantificationColumn,
                                          global_qvalue_cutoff = global_qvalue_cutoff,
                                          qvalue_cutoff = qvalue_cutoff, 
-                                         pg_qvalue_cutoff = pg_qvalue_cutoff)
+                                         pg_qvalue_cutoff = pg_qvalue_cutoff,
+                                         calculateAnomalyScores = calculateAnomalyScores, 
+                                         anomalyModelFeatures = anomalyModelFeatures)
     annotation = MSstatsConvert::MSstatsMakeAnnotation(input, annotation)
     
     decoy_filter = list(col_name = "ProteinName",
@@ -87,6 +106,7 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL,
     
     feature_columns = c("PeptideSequence", "PrecursorCharge",
                         "FragmentIon", "ProductCharge")
+    # browser()
     input = MSstatsConvert::MSstatsPreprocess(
         input, 
         annotation, 
@@ -101,19 +121,28 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL,
             remove_features_with_few_measurements = removeFewMeasurements,
             summarize_multiple_psms = max),
         columns_to_fill = list(Fraction = 1,
-                               IsotopeLabelType = "Light"))
+                               IsotopeLabelType = "Light"),
+        anomaly_metrics = anomalyModelFeatures)
     input[, Intensity := ifelse(Intensity == 0, NA, Intensity)]
-    
+    # browser()
     input = MSstatsConvert::MSstatsBalancedDesign(input, feature_columns, 
-                                                  fill_incomplete = FALSE,
+                                                  fill_incomplete = TRUE,
                                                   handle_fractions = FALSE,
-                                                  remove_few = removeFewMeasurements
+                                                  remove_few = removeFewMeasurements,
+                                                  anomaly_metrics = anomalyModelFeatures
     )
-    
+    # browser()
+    if (calculateAnomalyScores){
+        input = MSstatsConvert::MSstatsAnomalyScores(
+            input, anomalyModelFeatures, anomalyModelFeatureTemporal,
+            removeMissingFeatures, anomalyModelFeatureCount, runOrder, n_trees, 
+            max_depth, numberOfCores)
+    }
+    # browser()
     msg_final = paste("** Finished preprocessing. The dataset is ready",
                       "to be processed by the dataProcess function.")
     getOption("MSstatsLog")("INFO", msg_final)
     getOption("MSstatsMsg")("INFO", msg_final)
     getOption("MSstatsLog")("INFO", "\n")
     input
-}
\ No newline at end of file
+}
diff --git a/R/utils_anomaly_score.R b/R/utils_anomaly_score.R
index d604f0b3..d5981732 100644
--- a/R/utils_anomaly_score.R
+++ b/R/utils_anomaly_score.R
@@ -79,66 +79,116 @@
 
 #' Calculate mean increase
 #' @noRd
-.add_mean_increase = function(quality_vector){
-    
-    mean_increase = numeric(length(quality_vector))
-    mean_increase[1] = 0
+.add_mean_increase = function(quality_vector) {
+
+    n = length(quality_vector)
+    mean_increase = rep(NA_real_, n)
+
     d = 0.5
-    
-    for(k in 2:length(quality_vector)) {
-        # 5 is reference (3 sigma)
-        if (mean_increase[k] > 5){
-            mean_increase[k] = max(0,(quality_vector[k] - d), na.rm = TRUE)
-        } else {
-            mean_increase[k] = max(0,
-                                   (quality_vector[k] - d + mean_increase[k-1]),
-                                   na.rm = TRUE) # positive CuSum
+    h = 5
+
+    state = 0
+    mean_increase[1] = if (is.na(quality_vector[1])) NA_real_ else 0
+
+    if (n >= 2) {
+        for (k in 2:n) {
+
+            if (is.na(quality_vector[k])) {
+                mean_increase[k] = NA_real_   # output NA at missing
+                next                          # state unchanged (carry forward)
+            }
+
+            if (state > h) {
+                state = max(0, quality_vector[k] - d)
+            } else {
+                state = max(0, quality_vector[k] - d + state)
+            }
+
+            mean_increase[k] = state
         }
     }
-    return(mean_increase)
+
+    mean_increase
 }
 
 #' Calculate mean decrease
 #' @noRd
-.add_mean_decrease = function(quality_vector){
-    
-    mean_decrease = numeric(length(quality_vector))
-    mean_decrease[1] = 0
+.add_mean_decrease = function(quality_vector) {
+
+    n = length(quality_vector)
+    mean_decrease = rep(NA_real_, n)
+
     d = -0.5
-    
-    for(k in 2:length(quality_vector)) {
-        # 5 is reference (3 sigma)
-        if (mean_decrease[k]>5){
-            mean_decrease[k] <- max(0,(d - quality_vector[k] + 0), na.rm = TRUE)
-        } else {
-            mean_decrease[k] <- max(0,
-                                    (d - quality_vector[k] + mean_decrease[k-1]),
-                                    na.rm = TRUE) # negative CuSum
+    h = 5
+
+    state = 0
+    mean_decrease[1] =
+        if (is.na(quality_vector[1])) NA_real_ else 0
+
+    if (n >= 2) {
+        for (k in 2:n) {
+
+            if (is.na(quality_vector[k])) {
+                mean_decrease[k] = NA_real_   # mark missing
+                next                          # carry state forward
+            }
+
+            if (state > h) {
+                state = max(0, d - quality_vector[k])
+            } else {
+                state = max(0, d - quality_vector[k] + state)
+            }
+
+            mean_decrease[k] = state
         }
     }
-    return(mean_decrease)
+
+    mean_decrease
 }
 
 #' Calculate dispersion increase
 #' @noRd
-.add_dispersion_increase = function(quality_vector){
-    dispersion_increase = numeric(length(quality_vector))
-    v = numeric(length(quality_vector))
-    v[1] = (sqrt(abs(quality_vector[1]))-0.822)/0.349
+.add_dispersion_increase = function(quality_vector) {
+
+    n = length(quality_vector)
+    dispersion_increase = rep(NA_real_, n)
+    v = rep(NA_real_, n)
+
     d = 0.5
-    for(k in 2:length(quality_vector)) {
-        
-        v[k] = (sqrt(abs(quality_vector[k]))-0.822)/0.349 
-        
-        if (dispersion_increase[k] > 5){
-            dispersion_increase[k] = max(0,(v[k] - d),
-                                         na.rm = TRUE)
-        } else {
-            dispersion_increase[k] = max(0, 
-                                         (v[k] - d + dispersion_increase[k-1]),
-                                         na.rm = TRUE) # CuSum variance
+    h = 5
+
+    # initialize state
+    state = 0
+
+    if (!is.na(quality_vector[1])) {
+        v[1] = (sqrt(abs(quality_vector[1])) - 0.822) / 0.349
+        dispersion_increase[1] = 0
+    } else {
+        v[1] = NA_real_
+        dispersion_increase[1] = NA_real_
+    }
+
+    if (n >= 2) {
+        for (k in 2:n) {
+
+            if (is.na(quality_vector[k])) {
+                v[k] = NA_real_
+                dispersion_increase[k] = NA_real_  # mark missing
+                next                               # carry state forward
+            }
+
+            v[k] = (sqrt(abs(quality_vector[k])) - 0.822) / 0.349
+
+            if (state > h) {
+                state = max(0, v[k] - d)
+            } else {
+                state = max(0, v[k] - d + state)
+            }
+
+            dispersion_increase[k] = state
         }
     }
+
     return(dispersion_increase)
 }
 
@@ -172,6 +222,8 @@
     cat(paste0("Number of PSMs to process: ", num_psm), 
         sep = "\n", file = "MSstats_anomaly_model_progress.log")
     
+    model_input = unique(input_data[, c(split_column, quality_metrics), with = FALSE])
+
     model_results = parallel::parLapply(
         cl, seq_len(num_psm), 
         function(i){
@@ -179,7 +231,7 @@
                 cat("Finished processing an additional 100 PSMs", 
                     sep = "\n", file = "MSstats_anomaly_model_progress.log", append = TRUE)
             }
-            single_psm = input_data[get(split_column) == psm_list[[i]], 
+            single_psm = model_input[model_input[[split_column]] == psm_list[[i]], 
                                     ..quality_metrics]
             
             if (max_depth == "auto"){
@@ -192,9 +244,9 @@
         }
     )
     
-    model_results = unlist(model_results)
-    # Clip anomaly scores to stop them from exploding
-    input_data$AnomalyScores = pmax(model_results, .001)
-    
+    model_input$AnomalyScores = unlist(model_results)
+
+    input_data = merge(input_data, model_input, by = c(split_column, quality_metrics), all.x = TRUE)
+      
     return(input_data)
 }
diff --git a/man/DIANNtoMSstatsFormat.Rd b/man/DIANNtoMSstatsFormat.Rd
index 2ea6e068..b7566ee5 100644
--- a/man/DIANNtoMSstatsFormat.Rd
+++ b/man/DIANNtoMSstatsFormat.Rd
@@ -14,12 +14,21 @@ DIANNtoMSstatsFormat(
   removeFewMeasurements = TRUE,
   removeOxidationMpeptides = TRUE,
   removeProtein_with1Feature = TRUE,
+  MBR = TRUE,
+  quantificationColumn = "FragmentQuantCorrected",
+  calculateAnomalyScores = FALSE,
+  anomalyModelFeatures = c(),
+  anomalyModelFeatureTemporal = c(),
+  removeMissingFeatures = 0.5,
+  anomalyModelFeatureCount = 100,
+  runOrder = NULL,
+  n_trees = 100,
+  max_depth = "auto",
+  numberOfCores = 1,
   use_log_file = TRUE,
   append = FALSE,
   verbose = TRUE,
   log_file_path = NULL,
-  MBR = TRUE,
-  quantificationColumn = "FragmentQuantCorrected",
   ...
 )
 }
@@ -50,6 +59,30 @@ the library created after the first MBR pass. Default is 0.01.}
 
 \item{removeProtein_with1Feature}{should proteins with a single feature be removed}
 
+\item{MBR}{True if analysis was done with match between runs}
+
+\item{quantificationColumn}{Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x.
+Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x.
+Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity.}
+
+\item{calculateAnomalyScores}{Default is FALSE. If TRUE, will run anomaly detection model and calculate anomaly scores for each feature. Used downstream to weigh measurements in differential analysis.}
+
+\item{anomalyModelFeatures}{character vector of quality metric column names to be used as features in the anomaly detection model. List must not be empty if calculateAnomalyScores=TRUE.}
+
+\item{anomalyModelFeatureTemporal}{character vector of temporal direction corresponding to columns passed to anomalyModelFeatures. Values must be one of: \code{mean_decrease}, \code{mean_increase}, \code{dispersion_increase}, or NULL (to perform no temporal feature engineering). Default is empty vector. If calculateAnomalyScores=TRUE, vector must have as many values as anomalyModelFeatures (even if all NULL).}
+
+\item{removeMissingFeatures}{Remove features with missing values in more than this fraction of runs. Default is 0.5. Only used if calculateAnomalyScores=TRUE.}
+
+\item{anomalyModelFeatureCount}{Feature selection for anomaly model. Anomaly detection works on the precursor-level and can be much slower if all features used. We will by default filter to the top-100 highest intensity features. This can be adjusted as necessary. To turn feature-selection off, set this value to a high number (e.g. 10000). Only used if calculateAnomalyScores=TRUE.}
+
+\item{runOrder}{Temporal order of MS runs. Should be a two column data.table with columns \code{Run} and \code{Order}, where \code{Run} matches the run name output by Spectronaut and \code{Order} is an integer. Used to engineer the temporal features defined in anomalyModelFeatureTemporal.}
+
+\item{n_trees}{Number of trees to use in isolation forest when calculateAnomalyScores=TRUE. Default is 100.}
+
+\item{max_depth}{Max tree depth to use in isolation forest when calculateAnomalyScores=TRUE. Default is "auto" which calculates depth as log2(N) where N is the number of runs. Otherwise must be an integer.}
+
+\item{numberOfCores}{Number of cores for parallel processing anomaly detection model. When > 1, a logfile named 'MSstats_anomaly_model_progress.log' is created to track progress. Only works for Linux & Mac OS. Default is 1.}
+
 \item{use_log_file}{logical. If TRUE, information about data processing
 will be saved to a file.}
 
@@ -64,12 +97,6 @@ data processing will be saved.
 If not provided, such a file will be created automatically.
 If \code{append = TRUE}, has to be a valid path to a file.}
 
-\item{MBR}{True if analysis was done with match between runs}
-
-\item{quantificationColumn}{Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x.
-Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x.
-Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity.}
-
 \item{...}{additional parameters to \code{data.table::fread}.}
 }
 \value{
diff --git a/man/MSstatsAnomalyScores.Rd b/man/MSstatsAnomalyScores.Rd
index bc2b18a3..b6a9d500 100644
--- a/man/MSstatsAnomalyScores.Rd
+++ b/man/MSstatsAnomalyScores.Rd
@@ -39,5 +39,9 @@ MSstatsAnomalyScores(
 data.table
 }
 \description{
-Run Anomaly Model
+Detects anomalous measurements in mass spectrometry data using an isolation forest algorithm.
+This function identifies unusual precursor measurements based on quality metrics and their
+temporal patterns. For features with insufficient quality metric data, it assigns anomaly
+scores based on the median score of similar features (same peptide and charge combination).
+The model supports parallel processing for improved performance on large datasets.
 }
diff --git a/man/MSstatsClean.Rd b/man/MSstatsClean.Rd
index c5338a47..8c11da0c 100644
--- a/man/MSstatsClean.Rd
+++ b/man/MSstatsClean.Rd
@@ -69,7 +69,9 @@ MSstatsClean(msstats_object, ...)
   quantificationColumn = "FragmentQuantCorrected",
   global_qvalue_cutoff = 0.01,
   qvalue_cutoff = 0.01,
-  pg_qvalue_cutoff = 0.01
+  pg_qvalue_cutoff = 0.01,
+  calculateAnomalyScores = FALSE,
+  anomalyModelFeatures = c()
 )
 
 \S4method{MSstatsClean}{MSstatsMetamorpheusFiles}(msstats_object, MBR = TRUE, qvalue_cutoff = 0.05)
diff --git a/man/dot-cleanRawDIANN.Rd b/man/dot-cleanRawDIANN.Rd
index 6a61b2cc..dbdd9840 100644
--- a/man/dot-cleanRawDIANN.Rd
+++ b/man/dot-cleanRawDIANN.Rd
@@ -10,7 +10,9 @@
   quantificationColumn = "FragmentQuantCorrected",
   global_qvalue_cutoff = 0.01,
   qvalue_cutoff = 0.01,
-  pg_qvalue_cutoff = 0.01
+  pg_qvalue_cutoff = 0.01,
+  calculateAnomalyScores = FALSE,
+  anomalyModelFeatures = c()
 )
 }
 \arguments{
diff --git a/src/isolation_forest.cpp b/src/isolation_forest.cpp
index f1abac22..7ac6dae9 100644
--- a/src/isolation_forest.cpp
+++ b/src/isolation_forest.cpp
@@ -201,6 +201,269 @@ DataFrame calculate_anomaly_score(
     );
 }
 
+// #include <vector>
+// #include <string>
+// #include <random>
+// #include <memory>
+// #include <unordered_map>
+// #include <cmath>
+// #include <Rcpp.h>
+
+// using namespace Rcpp;
+
+// const double EM_CONSTANT = 0.5772156649; // Euler-Mascheroni constant
+// std::mt19937 gen(42); // Fixed random seed for reproducibility
+
+// struct IsolationTreeNode {
+//   bool is_leaf;
+//   int size;
+//   std::string feature;
+//   double value;
+//   bool is_missing_split;
+//   std::unique_ptr<IsolationTreeNode> left;
+//   std::unique_ptr<IsolationTreeNode> right;
+
+//   IsolationTreeNode(int s)
+//     : is_leaf(true),
+//       size(s),
+//       feature(""),
+//       value(0.0),
+//       is_missing_split(false),
+//       left(nullptr),
+//       right(nullptr) {}
+
+//   IsolationTreeNode(
+//     std::string feat,
+//     double val,
+//     bool missing_split,
+//     std::unique_ptr<IsolationTreeNode> l,
+//     std::unique_ptr<IsolationTreeNode> r
+//   )
+//     : is_leaf(false),
+//       size(0),
+//       feature(std::move(feat)),
+//       value(val),
+//       is_missing_split(missing_split),
+//       left(std::move(l)),
+//       right(std::move(r)) {}
+// };
+
+// std::vector<std::unordered_map<std::string, double>>
+// convert_dataframe(const DataFrame& df) {
+//   std::vector<std::unordered_map<std::string, double>> data;
+
+//   int n_rows = df.nrows();
+//   CharacterVector col_names = df.names();
+//   int n_cols = col_names.size();
+
+//   data.reserve(n_rows);
+
+//   for (int i = 0; i < n_rows; ++i) {
+//     std::unordered_map<std::string, double> row;
+//     row.reserve(n_cols);
+
+//     for (int j = 0; j < n_cols; ++j) {
+//       NumericVector col = df[j];
+//       row[as<std::string>(col_names[j])] = col[i];
+//     }
+//     data.push_back(std::move(row));
+//   }
+
+//   return data;
+// }
+
+// std::unique_ptr<IsolationTreeNode>
+// isolation_tree(
+//   const std::vector<std::unordered_map<std::string, double>>& data,
+//   int depth = 0,
+//   int max_depth = -1
+// ) {
+//   int n = static_cast<int>(data.size());
+
+//   if (max_depth == -1) {
+//     max_depth = static_cast<int>(std::log2(n));
+//   }
+
+//   if (n <= 1 || depth >= max_depth) {
+//     return std::make_unique<IsolationTreeNode>(n);
+//   }
+
+//   std::vector<std::string> features;
+//   features.reserve(data[0].size());
+
+//   for (const auto& kv : data[0]) {
+//     features.push_back(kv.first);
+//   }
+
+//   std::uniform_int_distribution<> feat_dist(0, features.size() - 1);
+//   std::string split_feature = features[feat_dist(gen)];
+
+//   double min_val = data[0].at(split_feature);
+//   double max_val = min_val;
+//   int missing_count = 0;
+
+//   for (const auto& row : data) {
+//     double val = row.at(split_feature);
+
+//     if (std::isnan(val)) {
+//       missing_count++;
+//       continue;
+//     }
+
+//     min_val = std::min(min_val, val);
+//     max_val = std::max(max_val, val);
+//   }
+
+//   bool has_missing = (missing_count > 0);
+
+//   double missing_fraction =
+//     static_cast<double>(missing_count) / static_cast<double>(n);
+
+//   if (min_val == max_val && !has_missing) {
+//     return std::make_unique<IsolationTreeNode>(n);
+//   }
+
+//   // Calibrated missing split probability:
+//   // - rare missing -> rare split (more anomalous)
+//   // - common missing -> more missing splits (less anomalous)
+//   double p_miss = std::min(0.5, missing_fraction);
+
+//   bool is_missing_split = false;
+//   double split_value = 0.0;
+
+//   if (has_missing &&
+//       (std::bernoulli_distribution(p_miss)(gen) || min_val == max_val)) {
+//     is_missing_split = true;
+//   } else {
+//     std::uniform_real_distribution<> split_dist(min_val, max_val);
+//     split_value = split_dist(gen);
+//   }
+
+//   std::vector<std::unordered_map<std::string, double>> left_data;
+//   std::vector<std::unordered_map<std::string, double>> right_data;
+//   left_data.reserve(n);
+//   right_data.reserve(n);
+
+//   for (const auto& row : data) {
+//     double v = row.at(split_feature);
+
+//     if (is_missing_split) {
+//       if (std::isnan(v)) {
+//         left_data.push_back(row);
+//       } else {
+//         right_data.push_back(row);
+//       }
+//     } else {
+//       if (v < split_value) {
+//         left_data.push_back(row);
+//       } else {
+//         right_data.push_back(row);
+//       }
+//     }
+//   }
+
+//   return std::make_unique<IsolationTreeNode>(
+//     split_feature,
+//     split_value,
+//     is_missing_split,
+//     isolation_tree(left_data, depth + 1, max_depth),
+//     isolation_tree(right_data, depth + 1, max_depth)
+//   );
+// }
+
+// std::vector<std::unique_ptr<IsolationTreeNode>>
+// isolation_forest(
+//   const std::vector<std::unordered_map<std::string, double>>& data,
+//   int n_trees,
+//   int max_depth
+// ) {
+//   std::vector<std::unique_ptr<IsolationTreeNode>> forest;
+//   forest.reserve(n_trees);
+
+//   for (int i = 0; i < n_trees; ++i) {
+//     forest.push_back(isolation_tree(data, 0, max_depth));
+//   }
+
+//   return forest;
+// }
+
+// // IMPORTANT FIX: respect is_missing_split during traversal
+// int path_length(
+//   const IsolationTreeNode* node,
+//   const std::unordered_map<std::string, double>& obs,
+//   int depth = 0
+// ) {
+//   if (node->is_leaf) {
+//     return depth + node->size;
+//   }
+
+//   auto it = obs.find(node->feature);
+//   if (it == obs.end()) {
+//     // Feature absent from obs map. Fall back: stop at current depth.
+//     return depth;
+//   }
+
+//   double x = it->second;
+
+//   if (node->is_missing_split) {
+//     if (std::isnan(x)) {
+//       return path_length(node->left.get(), obs, depth + 1);
+//     }
+//     return path_length(node->right.get(), obs, depth + 1);
+//   }
+
+//   if (std::isnan(x)) {
+//     // Numeric split but x missing: choose a side deterministically.
+//     // Here: treat missing as going left.
+//     return path_length(node->left.get(), obs, depth + 1);
+//   }
+
+//   if (x < node->value) {
+//     return path_length(node->left.get(), obs, depth + 1);
+//   }
+
+//   return path_length(node->right.get(), obs, depth + 1);
+// }
+
+// // [[Rcpp::export]]
+// DataFrame calculate_anomaly_score(DataFrame df, int n_trees, int max_depth) {
+//   std::vector<std::unordered_map<std::string, double>> data =
+//     convert_dataframe(df);
+
+//   int n = static_cast<int>(data.size());
+
+//   auto forest = isolation_forest(data, n_trees, max_depth);
+
+//   std::vector<double> avg_path_length(n, 0.0);
+
+//   for (int i = 0; i < n; ++i) {
+//     double total_path_length = 0.0;
+
+//     for (const auto& tree : forest) {
+//       total_path_length += path_length(tree.get(), data[i]);
+//     }
+
+//     avg_path_length[i] = total_path_length / static_cast<double>(n_trees);
+//   }
+
+//   double c_n = 2.0 * (std::log(n - 1.0) + EM_CONSTANT) -
+//     (2.0 * (n - 1.0) / n);
+
+//   std::vector<double> scores(n, 0.0);
+
+//   for (int i = 0; i < n; ++i) {
+//     scores[i] = std::pow(2.0, -(avg_path_length[i] / c_n));
+//   }
+
+//   return DataFrame::create(
+//     _["avg_depth"] = avg_path_length,
+//     _["anomaly_score"] = scores
+//   );
+// }
+
+
+// TODO: Parallelization would be faster within RcppParallel framework
+// but requires setup and testing.
 // Parallel task struct to process each data frame
 // struct ParallelTask : public Worker {
 //     // Input list of data frames