Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 121 additions & 40 deletions corems/molecular_id/factory/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,65 +699,107 @@ def plot_ms_class(self, classe, color="g"):
return ax

def plot_van_krevelen(
self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"
self, classe=None, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis",
alpha=0.5, log_abundance=False
):
"""Plot Van Krevelen Diagram
"""Plot Van Krevelen Diagram for a single class or all assigned classes

Parameters
----------
classe : str
Class name
classe : str, optional
Class name or None to plot all assigned classes, by default None
max_hc : float, optional
Max H/C ratio, by default 2.5
max_oc : float, optional
Max O/C ratio, by default 2
ticks_number : int, optional
Number of ticks, by default 5
color : str, optional
Matplotlib color, by default "viridis"
Matplotlib color/colormap, by default "viridis"
alpha : float, optional
Transparency of points, by default 0.5
log_abundance : bool, optional
If True, use log10 scale for abundance values, by default False

Returns
-------
ax : matplotlib.axes
Matplotlib axes object
abun_perc : float
Class percentile of the relative abundance
abun_perc : float or None
Class percentile of the relative abundance (if classe specified)
"""
if classe != Labels.unassigned:
# get data
import numpy as np
ax = plt.gca()

if classe is not None and classe != Labels.unassigned:
# Single class plot
abun_perc = self.abundance_count_percentile(classe)
hc = self.atoms_ratio(classe, "H", "C")
oc = self.atoms_ratio(classe, "O", "C")
abundance = self.abundance(classe)

# plot data
ax = plt.gca()
if log_abundance:
abundance = [np.log10(a + 1e-10) for a in abundance]
colorbar_label = 'log\u2081\u2080(Abundance)'
else:
colorbar_label = 'Abundance'

ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color)
# Sort by abundance so higher values are plotted on top
sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i])
hc = [hc[i] for i in sorted_indices]
oc = [oc[i] for i in sorted_indices]
abundance = [abundance[i] for i in sorted_indices]

# ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
scatter = ax.scatter(oc, hc, c=abundance, alpha=alpha, cmap=color)
plt.colorbar(scatter, label=colorbar_label)

title = "%s, %.2f %%" % (classe, abun_perc)
ax.set_title(title)
ax.set_xlabel("O/C", fontsize=16)
ax.set_ylabel("H/C", fontsize=16)
ax.tick_params(axis="both", which="major", labelsize=18)
ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))

# returns matplot axes obj and the class percentile of the relative abundance
return_val = ax, abun_perc
else:
# All assigned classes plot
hc = self.atoms_ratio_all("H", "C")
oc = self.atoms_ratio_all("O", "C")
abundance = self.abundance_assigned()

if log_abundance:
abundance = [np.log10(a + 1e-10) for a in abundance]
colorbar_label = 'log\u2081\u2080(Abundance)'
else:
colorbar_label = 'Abundance'

sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i])
hc = [hc[i] for i in sorted_indices]
oc = [oc[i] for i in sorted_indices]
abundance = [abundance[i] for i in sorted_indices]

scatter = ax.scatter(oc, hc, c=abundance, alpha=alpha, cmap=color)
plt.colorbar(scatter, label=colorbar_label)

ax.set_title("Van Krevelen Diagram - All Assigned Classes")

return_val = ax

ax.set_xlabel("O/C", fontsize=16)
ax.set_ylabel("H/C", fontsize=16)
ax.tick_params(axis="both", which="major", labelsize=18)
ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))
ax.grid(alpha=0.3, linestyle='--')

return ax, abun_perc
return return_val

def plot_dbe_vs_carbon_number(
self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"
self, classe=None, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis",
alpha=0.5, log_abundance=False
):
"""Plot DBE vs Carbon Number
"""Plot DBE vs Carbon Number for a single class or all assigned classes

Parameters
----------
classe : str
Class name
classe : str, optional
Class name or None to plot all assigned classes, by default None
max_c : int, optional
Max Carbon Number, by default 50
max_dbe : int, optional
Expand All @@ -767,37 +809,76 @@ def plot_dbe_vs_carbon_number(
c_incr : int, optional
Carbon Number increment, by default 10
color : str, optional
Matplotlib color, by default "viridis"
Matplotlib color/colormap, by default "viridis"
alpha : float, optional
Transparency of points, by default 0.5
log_abundance : bool, optional
If True, use log10 scale for abundance values, by default False

Returns
-------
ax : matplotlib.axes
Matplotlib axes object
abun_perc : float
Class percentile of the relative abundance
abun_perc : float or None
Class percentile of the relative abundance (if classe specified)
"""
if classe != Labels.unassigned:
# get data
import numpy as np
ax = plt.gca()

if classe is not None and classe != Labels.unassigned:
# Single class plot
abun_perc = self.abundance_count_percentile(classe)
carbon_number = self.carbon_number(classe)
dbe = self.dbe(classe)
abundance = self.abundance(classe)

# plot data
ax = plt.gca()
if log_abundance:
abundance = [np.log10(a + 1e-10) for a in abundance]
colorbar_label = 'log\u2081\u2080(Abundance)'
else:
colorbar_label = 'Abundance'

ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color)
sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i])
carbon_number = [carbon_number[i] for i in sorted_indices]
dbe = [dbe[i] for i in sorted_indices]
abundance = [abundance[i] for i in sorted_indices]

# ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
scatter = ax.scatter(carbon_number, dbe, c=abundance, alpha=alpha, cmap=color)
plt.colorbar(scatter, label=colorbar_label)

title = "%s, %.2f %%" % (classe, abun_perc)
ax.set_title(title)
ax.set_xlabel("Carbon number", fontsize=16)
ax.set_ylabel("DBE", fontsize=16)
ax.tick_params(axis="both", which="major", labelsize=18)
ax.set_xticks(range(0, max_c, c_incr))
ax.set_yticks(range(0, max_dbe, dbe_incr))

# returns matplot axes obj and the class percentile of the relative abundance
return_val = ax, abun_perc
else:
# All assigned classes plot
carbon_number = self.carbon_number_all()
dbe = self.dbe_all()
abundance = self.abundance_assigned()

if log_abundance:
abundance = [np.log10(a + 1e-10) for a in abundance]
colorbar_label = 'log\u2081\u2080(Abundance)'
else:
colorbar_label = 'Abundance'

sorted_indices = sorted(range(len(abundance)), key=lambda i: abundance[i])
carbon_number = [carbon_number[i] for i in sorted_indices]
dbe = [dbe[i] for i in sorted_indices]
abundance = [abundance[i] for i in sorted_indices]

scatter = ax.scatter(carbon_number, dbe, c=abundance, alpha=alpha, cmap=color)
plt.colorbar(scatter, label=colorbar_label)

ax.set_title("DBE vs Carbon Number - All Assigned Classes")

return_val = ax

ax.set_xlabel("Carbon number", fontsize=16)
ax.set_ylabel("DBE", fontsize=16)
ax.tick_params(axis="both", which="major", labelsize=18)
ax.set_xticks(range(0, max_c, c_incr))
ax.set_yticks(range(0, max_dbe, dbe_incr))
ax.grid(alpha=0.3, linestyle='--')

return ax, abun_perc
return return_val
103 changes: 94 additions & 9 deletions tests/test_classification.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import sys

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pytest

from corems.molecular_id.factory.classification import HeteroatomsClassification, Labels
from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas


def test_heteroatoms_classification(mass_spectrum_ftms, postgres_database):
@pytest.fixture
def classified_mass_spectrum(mass_spectrum_ftms, postgres_database):
"""Run molecular formula search and return a HeteroatomsClassification object."""
mass_spectrum_ftms.molecular_search_settings.url_database = postgres_database
mass_spectrum_ftms.molecular_search_settings.error_method = 'None'
mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -10
mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -10
mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 10
mass_spectrum_ftms.molecular_search_settings.mz_error_range = 1
mass_spectrum_ftms.molecular_search_settings.isProtonated = True
Expand All @@ -16,24 +23,102 @@ def test_heteroatoms_classification(mass_spectrum_ftms, postgres_database):
usedAtoms = {'C': (1, 100), 'H': (4, 200), 'O': (1, 18)}
mass_spectrum_ftms.molecular_search_settings.usedAtoms = usedAtoms

# Check that there are not assigned peaks
assert mass_spectrum_ftms.percentile_assigned()[2] == 0

SearchMolecularFormulas(mass_spectrum_ftms).run_worker_mass_spectrum()

# Check if search was successful
assert mass_spectrum_ftms.percentile_assigned()[2] > 0

mass_spectrum_by_classes = HeteroatomsClassification(mass_spectrum_ftms)
return HeteroatomsClassification(mass_spectrum_ftms)


def test_heteroatoms_classification(classified_mass_spectrum):
mass_spectrum_by_classes = classified_mass_spectrum

# Check that the plot is created
mass_spectrum_by_classes.plot_ms_assigned_unassigned()

# Check that ratios, DBE, carbon number, abundance and mz_exp are calculated

assert len(mass_spectrum_by_classes.atoms_ratio_all("H", "C")) > 0
assert len(mass_spectrum_by_classes.dbe_all()) > 0
assert len(mass_spectrum_by_classes.abundance_assigned()) > 0
assert len(mass_spectrum_by_classes.mz_exp_assigned()) > 0
assert mass_spectrum_by_classes.abundance_count_percentile(Labels.unassigned) > 0
assert mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned) > 0
assert mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned) > 0


def test_plot_van_krevelen_single_class(classified_mass_spectrum):
"""Test Van Krevelen plot for a single heteroatom class (backward compat)."""
mass_spectrum = classified_mass_spectrum
# Get a valid class name from the assigned classes
assigned_classes = [c for c in mass_spectrum.get_classes() if c != Labels.unassigned]
assert len(assigned_classes) > 0, "No assigned classes found"

plt.figure()
result = mass_spectrum.plot_van_krevelen(assigned_classes[0])
ax, abun_perc = result
assert ax is not None
assert abun_perc > 0
assert assigned_classes[0] in ax.get_title()
assert ax.get_xlabel() == "O/C"
assert ax.get_ylabel() == "H/C"
plt.close()


def test_plot_van_krevelen_all_classes(classified_mass_spectrum):
"""Test Van Krevelen plot for all assigned classes (new functionality)."""
plt.figure()
ax = classified_mass_spectrum.plot_van_krevelen()
assert ax is not None
assert "All Assigned Classes" in ax.get_title()
assert ax.get_xlabel() == "O/C"
assert ax.get_ylabel() == "H/C"
plt.close()


def test_plot_van_krevelen_log_abundance(classified_mass_spectrum):
"""Test Van Krevelen plot with log10 abundance scaling."""
plt.figure()
ax = classified_mass_spectrum.plot_van_krevelen(log_abundance=True)
assert ax is not None
# Check that colorbar has log label
cbar = ax.figure.axes[-1] # colorbar is the last axes
assert "log" in cbar.get_ylabel().lower() or "log" in cbar.get_ylabel()
plt.close()


def test_plot_dbe_vs_carbon_number_single_class(classified_mass_spectrum):
"""Test DBE vs Carbon Number plot for a single class (backward compat)."""
mass_spectrum = classified_mass_spectrum
assigned_classes = [c for c in mass_spectrum.get_classes() if c != Labels.unassigned]
assert len(assigned_classes) > 0

plt.figure()
result = mass_spectrum.plot_dbe_vs_carbon_number(assigned_classes[0])
ax, abun_perc = result
assert ax is not None
assert abun_perc > 0
assert assigned_classes[0] in ax.get_title()
assert ax.get_xlabel() == "Carbon number"
assert ax.get_ylabel() == "DBE"
plt.close()


def test_plot_dbe_vs_carbon_number_all_classes(classified_mass_spectrum):
"""Test DBE vs Carbon Number plot for all assigned classes (new functionality)."""
plt.figure()
ax = classified_mass_spectrum.plot_dbe_vs_carbon_number()
assert ax is not None
assert "All Assigned Classes" in ax.get_title()
assert ax.get_xlabel() == "Carbon number"
assert ax.get_ylabel() == "DBE"
plt.close()


def test_plot_dbe_vs_carbon_number_log_abundance(classified_mass_spectrum):
"""Test DBE vs Carbon Number plot with log10 abundance scaling."""
plt.figure()
ax = classified_mass_spectrum.plot_dbe_vs_carbon_number(log_abundance=True)
assert ax is not None
cbar = ax.figure.axes[-1]
assert "log" in cbar.get_ylabel().lower() or "log" in cbar.get_ylabel()
plt.close()