From 1e805b9e178f390a022d15509197b0e8179e14e9 Mon Sep 17 00:00:00 2001 From: "Robert B. Young" Date: Tue, 3 Mar 2026 16:34:47 +1030 Subject: [PATCH 1/2] Enhance Van Krevelen and DBE plots with all-class view and log scaling Make classe parameter optional in plot_van_krevelen() and plot_dbe_vs_carbon_number(). When omitted, plots all assigned classes using existing atoms_ratio_all/carbon_number_all/dbe_all helper methods. New parameters: - log_abundance: log10 color scaling for high dynamic range data - alpha: configurable point transparency Also adds abundance-based z-ordering (higher values on top), colorbars, and grid lines. Fully backward-compatible. Co-Authored-By: Claude Opus 4.6 --- corems/molecular_id/factory/classification.py | 161 +++++++++++++----- 1 file changed, 121 insertions(+), 40 deletions(-) diff --git a/corems/molecular_id/factory/classification.py b/corems/molecular_id/factory/classification.py index 063e5320..1350ebd9 100644 --- a/corems/molecular_id/factory/classification.py +++ b/corems/molecular_id/factory/classification.py @@ -699,14 +699,15 @@ def plot_ms_class(self, classe, color="g"): return ax def plot_van_krevelen( - self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis" + self, classe=None, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis", + alpha=0.5, log_abundance=False ): - """Plot Van Krevelen Diagram + """Plot Van Krevelen Diagram for a single class or all assigned classes Parameters ---------- - classe : str - Class name + classe : str, optional + Class name or None to plot all assigned classes, by default None max_hc : float, optional Max H/C ratio, by default 2.5 max_oc : float, optional @@ -714,50 +715,91 @@ def plot_van_krevelen( ticks_number : int, optional Number of ticks, by default 5 color : str, optional - Matplotlib color, by default "viridis" + Matplotlib color/colormap, by default "viridis" + alpha : float, optional + Transparency of points, by default 0.5 + log_abundance : bool, optional + If True, use log10 scale for abundance values, by default False Returns ------- ax : matplotlib.axes Matplotlib axes object - abun_perc : float - Class percentile of the relative abundance + abun_perc : float or None + Class percentile of the relative abundance (if classe specified) """ - if classe != Labels.unassigned: - # get data + import numpy as np + ax = plt.gca() + + if classe is not None and classe != Labels.unassigned: + # Single class plot abun_perc = self.abundance_count_percentile(classe) hc = self.atoms_ratio(classe, "H", "C") oc = self.atoms_ratio(classe, "O", "C") abundance = self.abundance(classe) - # plot data - ax = plt.gca() + if log_abundance: + abundance = [np.log10(a + 1e-10) for a in abundance] + colorbar_label = 'log\u2081\u2080(Abundance)' + else: + colorbar_label = 'Abundance' - ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color) + # Sort by abundance so higher values are plotted on top + sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i]) + hc = [hc[i] for i in sorted_indices] + oc = [oc[i] for i in sorted_indices] + abundance = [abundance[i] for i in sorted_indices] - # ax.scatter(carbon_number, dbe, c=color, alpha=0.5) + scatter = ax.scatter(oc, hc, c=abundance, alpha=alpha, cmap=color) + plt.colorbar(scatter, label=colorbar_label) title = "%s, %.2f %%" % (classe, abun_perc) ax.set_title(title) - ax.set_xlabel("O/C", fontsize=16) - ax.set_ylabel("H/C", fontsize=16) - ax.tick_params(axis="both", which="major", labelsize=18) - ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True)) - ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True)) - # returns matplot axes obj and the class percentile of the relative abundance + return_val = ax, abun_perc + else: + # All assigned classes plot + hc = self.atoms_ratio_all("H", "C") + oc = self.atoms_ratio_all("O", "C") + abundance = self.abundance_assigned() + + if log_abundance: + abundance = [np.log10(a + 1e-10) for a in abundance] + colorbar_label = 'log\u2081\u2080(Abundance)' + else: + colorbar_label = 'Abundance' + + sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i]) + hc = [hc[i] for i in sorted_indices] + oc = [oc[i] for i in sorted_indices] + abundance = [abundance[i] for i in sorted_indices] + + scatter = ax.scatter(oc, hc, c=abundance, alpha=alpha, cmap=color) + plt.colorbar(scatter, label=colorbar_label) + + ax.set_title("Van Krevelen Diagram - All Assigned Classes") + + return_val = ax + + ax.set_xlabel("O/C", fontsize=16) + ax.set_ylabel("H/C", fontsize=16) + ax.tick_params(axis="both", which="major", labelsize=18) + ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True)) + ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True)) + ax.grid(alpha=0.3, linestyle='--') - return ax, abun_perc + return return_val def plot_dbe_vs_carbon_number( - self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis" + self, classe=None, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis", + alpha=0.5, log_abundance=False ): - """Plot DBE vs Carbon Number + """Plot DBE vs Carbon Number for a single class or all assigned classes Parameters ---------- - classe : str - Class name + classe : str, optional + Class name or None to plot all assigned classes, by default None max_c : int, optional Max Carbon Number, by default 50 max_dbe : int, optional @@ -767,37 +809,76 @@ def plot_dbe_vs_carbon_number( c_incr : int, optional Carbon Number increment, by default 10 color : str, optional - Matplotlib color, by default "viridis" + Matplotlib color/colormap, by default "viridis" + alpha : float, optional + Transparency of points, by default 0.5 + log_abundance : bool, optional + If True, use log10 scale for abundance values, by default False Returns ------- ax : matplotlib.axes Matplotlib axes object - abun_perc : float - Class percentile of the relative abundance + abun_perc : float or None + Class percentile of the relative abundance (if classe specified) """ - if classe != Labels.unassigned: - # get data + import numpy as np + ax = plt.gca() + + if classe is not None and classe != Labels.unassigned: + # Single class plot abun_perc = self.abundance_count_percentile(classe) carbon_number = self.carbon_number(classe) dbe = self.dbe(classe) abundance = self.abundance(classe) - # plot data - ax = plt.gca() + if log_abundance: + abundance = [np.log10(a + 1e-10) for a in abundance] + colorbar_label = 'log\u2081\u2080(Abundance)' + else: + colorbar_label = 'Abundance' - ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color) + sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i]) + carbon_number = [carbon_number[i] for i in sorted_indices] + dbe = [dbe[i] for i in sorted_indices] + abundance = [abundance[i] for i in sorted_indices] - # ax.scatter(carbon_number, dbe, c=color, alpha=0.5) + scatter = ax.scatter(carbon_number, dbe, c=abundance, alpha=alpha, cmap=color) + plt.colorbar(scatter, label=colorbar_label) title = "%s, %.2f %%" % (classe, abun_perc) ax.set_title(title) - ax.set_xlabel("Carbon number", fontsize=16) - ax.set_ylabel("DBE", fontsize=16) - ax.tick_params(axis="both", which="major", labelsize=18) - ax.set_xticks(range(0, max_c, c_incr)) - ax.set_yticks(range(0, max_dbe, dbe_incr)) - # returns matplot axes obj and the class percentile of the relative abundance + return_val = ax, abun_perc + else: + # All assigned classes plot + carbon_number = self.carbon_number_all() + dbe = self.dbe_all() + abundance = self.abundance_assigned() + + if log_abundance: + abundance = [np.log10(a + 1e-10) for a in abundance] + colorbar_label = 'log\u2081\u2080(Abundance)' + else: + colorbar_label = 'Abundance' + + sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i]) + carbon_number = [carbon_number[i] for i in sorted_indices] + dbe = [dbe[i] for i in sorted_indices] + abundance = [abundance[i] for i in sorted_indices] + + scatter = ax.scatter(carbon_number, dbe, c=abundance, alpha=alpha, cmap=color) + plt.colorbar(scatter, label=colorbar_label) + + ax.set_title("DBE vs Carbon Number - All Assigned Classes") + + return_val = ax + + ax.set_xlabel("Carbon number", fontsize=16) + ax.set_ylabel("DBE", fontsize=16) + ax.tick_params(axis="both", which="major", labelsize=18) + ax.set_xticks(range(0, max_c, c_incr)) + ax.set_yticks(range(0, max_dbe, dbe_incr)) + ax.grid(alpha=0.3, linestyle='--') - return ax, abun_perc + return return_val From 8463eab1beb131c5f38c0de3e5dce55ae963b2cc Mon Sep 17 00:00:00 2001 From: "Robert B. Young" Date: Wed, 4 Mar 2026 17:07:33 +1030 Subject: [PATCH 2/2] Add tests for enhanced Van Krevelen and DBE plot methods Test backward-compatible single-class plots, new all-classes view, and log10 abundance scaling for both plot_van_krevelen() and plot_dbe_vs_carbon_number(). Uses shipped Bruker SRFA test data. Co-Authored-By: Claude Opus 4.6 --- tests/test_classification.py | 103 ++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 9 deletions(-) diff --git a/tests/test_classification.py b/tests/test_classification.py index a5826b6e..fa7a7346 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -1,13 +1,20 @@ import sys +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import pytest + from corems.molecular_id.factory.classification import HeteroatomsClassification, Labels from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -def test_heteroatoms_classification(mass_spectrum_ftms, postgres_database): +@pytest.fixture +def classified_mass_spectrum(mass_spectrum_ftms, postgres_database): + """Run molecular formula search and return a HeteroatomsClassification object.""" mass_spectrum_ftms.molecular_search_settings.url_database = postgres_database mass_spectrum_ftms.molecular_search_settings.error_method = 'None' - mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -10 + mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -10 mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 10 mass_spectrum_ftms.molecular_search_settings.mz_error_range = 1 mass_spectrum_ftms.molecular_search_settings.isProtonated = True @@ -16,24 +23,102 @@ def test_heteroatoms_classification(mass_spectrum_ftms, postgres_database): usedAtoms = {'C': (1, 100), 'H': (4, 200), 'O': (1, 18)} mass_spectrum_ftms.molecular_search_settings.usedAtoms = usedAtoms - # Check that there are not assigned peaks assert mass_spectrum_ftms.percentile_assigned()[2] == 0 - SearchMolecularFormulas(mass_spectrum_ftms).run_worker_mass_spectrum() - - # Check if search was successful assert mass_spectrum_ftms.percentile_assigned()[2] > 0 - mass_spectrum_by_classes = HeteroatomsClassification(mass_spectrum_ftms) + return HeteroatomsClassification(mass_spectrum_ftms) + + +def test_heteroatoms_classification(classified_mass_spectrum): + mass_spectrum_by_classes = classified_mass_spectrum # Check that the plot is created mass_spectrum_by_classes.plot_ms_assigned_unassigned() # Check that ratios, DBE, carbon number, abundance and mz_exp are calculated - + assert len(mass_spectrum_by_classes.atoms_ratio_all("H", "C")) > 0 assert len(mass_spectrum_by_classes.dbe_all()) > 0 assert len(mass_spectrum_by_classes.abundance_assigned()) > 0 assert len(mass_spectrum_by_classes.mz_exp_assigned()) > 0 assert mass_spectrum_by_classes.abundance_count_percentile(Labels.unassigned) > 0 - assert mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned) > 0 \ No newline at end of file + assert mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned) > 0 + + +def test_plot_van_krevelen_single_class(classified_mass_spectrum): + """Test Van Krevelen plot for a single heteroatom class (backward compat).""" + mass_spectrum = classified_mass_spectrum + # Get a valid class name from the assigned classes + assigned_classes = [c for c in mass_spectrum.get_classes() if c != Labels.unassigned] + assert len(assigned_classes) > 0, "No assigned classes found" + + plt.figure() + result = mass_spectrum.plot_van_krevelen(assigned_classes[0]) + ax, abun_perc = result + assert ax is not None + assert abun_perc > 0 + assert assigned_classes[0] in ax.get_title() + assert ax.get_xlabel() == "O/C" + assert ax.get_ylabel() == "H/C" + plt.close() + + +def test_plot_van_krevelen_all_classes(classified_mass_spectrum): + """Test Van Krevelen plot for all assigned classes (new functionality).""" + plt.figure() + ax = classified_mass_spectrum.plot_van_krevelen() + assert ax is not None + assert "All Assigned Classes" in ax.get_title() + assert ax.get_xlabel() == "O/C" + assert ax.get_ylabel() == "H/C" + plt.close() + + +def test_plot_van_krevelen_log_abundance(classified_mass_spectrum): + """Test Van Krevelen plot with log10 abundance scaling.""" + plt.figure() + ax = classified_mass_spectrum.plot_van_krevelen(log_abundance=True) + assert ax is not None + # Check that colorbar has log label + cbar = ax.figure.axes[-1] # colorbar is the last axes + assert "log" in cbar.get_ylabel().lower() or "log" in cbar.get_ylabel() + plt.close() + + +def test_plot_dbe_vs_carbon_number_single_class(classified_mass_spectrum): + """Test DBE vs Carbon Number plot for a single class (backward compat).""" + mass_spectrum = classified_mass_spectrum + assigned_classes = [c for c in mass_spectrum.get_classes() if c != Labels.unassigned] + assert len(assigned_classes) > 0 + + plt.figure() + result = mass_spectrum.plot_dbe_vs_carbon_number(assigned_classes[0]) + ax, abun_perc = result + assert ax is not None + assert abun_perc > 0 + assert assigned_classes[0] in ax.get_title() + assert ax.get_xlabel() == "Carbon number" + assert ax.get_ylabel() == "DBE" + plt.close() + + +def test_plot_dbe_vs_carbon_number_all_classes(classified_mass_spectrum): + """Test DBE vs Carbon Number plot for all assigned classes (new functionality).""" + plt.figure() + ax = classified_mass_spectrum.plot_dbe_vs_carbon_number() + assert ax is not None + assert "All Assigned Classes" in ax.get_title() + assert ax.get_xlabel() == "Carbon number" + assert ax.get_ylabel() == "DBE" + plt.close() + + +def test_plot_dbe_vs_carbon_number_log_abundance(classified_mass_spectrum): + """Test DBE vs Carbon Number plot with log10 abundance scaling.""" + plt.figure() + ax = classified_mass_spectrum.plot_dbe_vs_carbon_number(log_abundance=True) + assert ax is not None + cbar = ax.figure.axes[-1] + assert "log" in cbar.get_ylabel().lower() or "log" in cbar.get_ylabel() + plt.close() \ No newline at end of file