Skip to content

Commit 208cdc8

Browse files
authored
Merge pull request #258 from mwang87/victoria-bug
Victoria Bug Testing and Fix
2 parents dc4949d + 0d59185 commit 208cdc8

6 files changed

Lines changed: 179 additions & 4 deletions

massql/msql_engine.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,8 @@ def _evalute_variable_query(parsed_dict, input_filename,
209209

210210
presearch_parse["conditions"] = non_variable_conditions
211211

212-
ms1_df, ms2_df = _executeconditions_query(presearch_parse, input_filename,
212+
ms1_df, ms2_df = _executeconditions_query(presearch_parse, input_filename,
213+
ms1_input_df=ms1_df, ms2_input_df=ms2_df,
213214
cache=cache, cache_dir=cache_dir, cache_file=cache_file)
214215
variable_x_ms1_df = ms1_df
215216

tests/get_data.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ wget --no-verbose --output-document=NS_1x_test.mzML "https://massiveproxy.gnps2.
99
wget --no-verbose --output-document=JB_182_2_fe.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084289/ccms_peak/JB_182_2_fe.mzML"
1010
wget --no-verbose --output-document=S_N2_neutral_Zn.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000083387/updates/2019-11-12_allegraaron_e893cb7e/peak/S_N2_neutral_Zn.mzML"
1111
wget --no-verbose --output-document=gnps-library.json "https://external.gnps2.org/gnpslibrary/GNPS-LIBRARY.json"
12-
wget --no-verbose --output-document=specs_ms.mgf "http://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?task=5ecfcf81cb3c471698995b194d8246a0&block=main&file=spectra/specs_ms.mgf"
12+
wget --no-verbose --tries=3 --waitretry=5 --output-document=specs_ms.mgf "https://massive.ucsd.edu/ProteoSAFe/DownloadResultFile?task=5ecfcf81cb3c471698995b194d8246a0&block=main&file=spectra/specs_ms.mgf"
1313
wget --no-verbose --output-document=1810E-II.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084691/ccms_peak/1810E-II.mzML"
1414
wget --no-verbose --output-document=T04251505.mzXML "https://massiveproxy.gnps2.org/massiveproxy/MSV000082797/ccms_peak/raw/MTBLS368/T04251505.mzXML"
1515
wget --no-verbose --output-document=isa_9_fe.mzML "https://massiveproxy.gnps2.org/massiveproxy/MSV000084030/ccms_peak/isa_9_fe.mzML"
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
{
2+
"conditions": [
3+
{
4+
"conditiontype": "where",
5+
"qualifiers": {
6+
"qualifiermztolerance": {
7+
"comparator": "equal",
8+
"name": "qualifiermztolerance",
9+
"unit": "mz",
10+
"value": 0.05
11+
},
12+
"type": "qualifier"
13+
},
14+
"type": "ms2productcondition",
15+
"value": [
16+
337.25
17+
]
18+
},
19+
{
20+
"conditiontype": "where",
21+
"qualifiers": {
22+
"qualifiermztolerance": {
23+
"comparator": "equal",
24+
"name": "qualifiermztolerance",
25+
"unit": "mz",
26+
"value": 0.05
27+
},
28+
"type": "qualifier"
29+
},
30+
"type": "ms2productcondition",
31+
"value": [
32+
319.24
33+
]
34+
},
35+
{
36+
"conditiontype": "where",
37+
"type": "ms2precursorcondition",
38+
"value": [
39+
"X"
40+
]
41+
},
42+
{
43+
"conditiontype": "where",
44+
"qualifiers": {
45+
"qualifiermztolerance": {
46+
"comparator": "equal",
47+
"name": "qualifiermztolerance",
48+
"unit": "mz",
49+
"value": 0.05
50+
},
51+
"type": "qualifier"
52+
},
53+
"type": "ms2productcondition",
54+
"value": [
55+
"X-390.277"
56+
]
57+
},
58+
{
59+
"conditiontype": "where",
60+
"qualifiers": {
61+
"qualifierintensitymatch": {
62+
"comparator": "equal",
63+
"name": "qualifierintensitymatch",
64+
"value": "Y"
65+
},
66+
"qualifierintensityreference": {
67+
"name": "qualifierintensityreference"
68+
},
69+
"qualifierppmtolerance": {
70+
"comparator": "equal",
71+
"name": "qualifierppmtolerance",
72+
"unit": "ppm",
73+
"value": 40.0
74+
},
75+
"type": "qualifier"
76+
},
77+
"type": "ms2productcondition",
78+
"value": [
79+
319.24
80+
]
81+
},
82+
{
83+
"conditiontype": "where",
84+
"qualifiers": {
85+
"qualifierintensitymatch": {
86+
"comparator": "equal",
87+
"name": "qualifierintensitymatch",
88+
"value": "Y*300.0"
89+
},
90+
"qualifierintensitytolpercent": {
91+
"comparator": "equal",
92+
"name": "qualifierintensitytolpercent",
93+
"value": 99.0
94+
},
95+
"qualifierppmtolerance": {
96+
"comparator": "equal",
97+
"name": "qualifierppmtolerance",
98+
"unit": "ppm",
99+
"value": 40.0
100+
},
101+
"type": "qualifier"
102+
},
103+
"type": "ms2productcondition",
104+
"value": [
105+
201.16
106+
]
107+
}
108+
],
109+
"query": "QUERY scaninfo(MS2DATA) WHERE MS2PROD=337.25:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEMZ=0.05 AND MS2PREC=X AND MS2PROD=X-390.277:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEPPM=40:INTENSITYMATCH=Y:INTENSITYMATCHREFERENCE AND MS2PROD=201.16:TOLERANCEPPM=40:INTENSITYMATCH=Y*300:INTENSITYMATCHPERCENT=99",
110+
"querytype": {
111+
"datatype": "datams2data",
112+
"function": "functionscaninfo"
113+
}
114+
}

tests/test_extraction.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,12 @@ def test_extract_mzXML():
5151
assert(len(merged_summary_df) == 5)
5252

5353
def test_extract_MGF():
54+
mgf_path = "tests/data/specs_ms.mgf"
55+
assert os.path.exists(mgf_path), f"Test data file {mgf_path} not found - download may have failed"
56+
assert os.path.getsize(mgf_path) > 1000, f"Test data file {mgf_path} appears corrupt (too small) - download may have failed"
57+
5458
query = "QUERY scaninfo(MS2DATA)"
55-
results_df = msql_engine.process_query(query, "tests/data/specs_ms.mgf")
59+
results_df = msql_engine.process_query(query, mgf_path)
5660
print(results_df)
5761

5862
assert(len(results_df) > 1)

tests/test_queries.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,5 @@ QUERY scaninfo(MS2DATA) WHERE MS2PREC=X AND MOBILITY=range(min=X*0.0006775+0.405
4545
QUERY scaninfo(MS2DATA) WHERE MS2PROD=(58.06513 OR 60.04439 OR 70.06513 OR 72.08078 OR 74.06004 OR 84.04439 OR 84.08078 OR 86.09643 OR 87.05529 OR 88.0393 OR 88.07569 OR 100.11208 OR 101.07094 OR 101.10732 OR 102.05495 OR 102.09134 OR 104.05285 OR 110.07127 OR 114.12773 OR 115.08659 OR 115.12297 OR 116.0706 OR 118.0685 OR 120.08078 OR 124.08692 OR 129.10224 OR 129.11347 OR 129.13862 OR 130.08625 OR 132.08415 OR 134.09643 OR 136.07569 OR 138.10257 OR 143.12912 OR 148.11208 OR 150.09134 OR 157.14477 OR 159.09167 OR 164.10699 OR 173.10732 OR 187.12297):CARDINALITY=range(min=2,max=5):TOLERANCEPPM=10:INTENSITYPERCENT=5
4646
QUERY scaninfo(MS2DATA) WHERE MS2PROD=226.18:TOLERANCEPPM=5:EXCLUDED
4747
QUERY scaninfo(MS2DATA) WHERE MS2PROD=formula(C10)
48-
QUERY scaninfo(MS2DATA) WHERE MS2PROD=341.28:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PROD=323.27:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PREC=X AND MS2PROD=X-358.2871:TOLERANCEMZ=0.01:INTENSITYPERCENT=2
48+
QUERY scaninfo(MS2DATA) WHERE MS2PROD=341.28:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PROD=323.27:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PREC=X AND MS2PROD=X-358.2871:TOLERANCEMZ=0.01:INTENSITYPERCENT=2
49+
QUERY scaninfo(MS2DATA) WHERE MS2PROD=337.25:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEMZ=0.05 AND MS2PREC=X AND MS2PROD=X-390.277:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEPPM=40:INTENSITYMATCH=Y:INTENSITYMATCHREFERENCE AND MS2PROD=201.16:TOLERANCEPPM=40:INTENSITYMATCH=Y*300:INTENSITYMATCHPERCENT=99

tests/test_query.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,61 @@ def test_ms2_intensitypercent_gt_lt_eq_tripartite():
917917
assert scans_gt.issubset(scans_eq), "INTENSITYPERCENT= (>=) must include all scans matched by INTENSITYPERCENT>"
918918

919919

920+
def test_ms2_variable_with_intensitymatch():
921+
"""Test a complex query combining MS2PREC=X variable, MS2PROD with INTENSITYMATCH and INTENSITYMATCHREFERENCE."""
922+
import pandas as pd
923+
import numpy as np
924+
925+
query = "QUERY scaninfo(MS2DATA) WHERE MS2PROD=337.25:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEMZ=0.05 AND MS2PREC=X AND MS2PROD=X-390.277:TOLERANCEMZ=0.05 AND MS2PROD=319.24:TOLERANCEPPM=40:INTENSITYMATCH=Y:INTENSITYMATCHREFERENCE AND MS2PROD=201.16:TOLERANCEPPM=40:INTENSITYMATCH=Y*300:INTENSITYMATCHPERCENT=99"
926+
927+
# Verify it parses correctly
928+
parse_obj = msql_parser.parse_msql(query)
929+
print(json.dumps(parse_obj, indent=4))
930+
931+
condition_types = [c["type"] for c in parse_obj["conditions"]]
932+
assert condition_types.count("ms2productcondition") == 5
933+
assert condition_types.count("ms2precursorcondition") == 1
934+
935+
# Check INTENSITYMATCH qualifiers are present
936+
ref_conditions = [c for c in parse_obj["conditions"]
937+
if "qualifiers" in c and "qualifierintensityreference" in c.get("qualifiers", {})]
938+
assert len(ref_conditions) == 1
939+
940+
match_conditions = [c for c in parse_obj["conditions"]
941+
if "qualifiers" in c and "qualifierintensitytolpercent" in c.get("qualifiers", {})]
942+
assert len(match_conditions) == 1
943+
assert match_conditions[0]["qualifiers"]["qualifierintensitymatch"]["value"] == "Y*300.0"
944+
945+
# Test execution with synthetic data that has matching peaks
946+
precmz = 710.0
947+
rows = []
948+
peaks = [
949+
(337.25, 1000.0),
950+
(319.24, 500.0), # reference Y
951+
(201.16, 150000.0), # Y*300 = 500*300 = 150000
952+
(319.723, 800.0), # X-390.277 = 710-390.277 = 319.723
953+
]
954+
for mz, intensity in peaks:
955+
rows.append({
956+
'scan': 1, 'ms1scan': 0, 'rt': 1.0,
957+
'mz': mz, 'i': intensity,
958+
'precmz': precmz, 'charge': 1, 'polarity': 1,
959+
'i_norm': intensity / max(p[1] for p in peaks),
960+
'i_tic_norm': intensity / sum(p[1] for p in peaks),
961+
})
962+
ms2_df = pd.DataFrame(rows)
963+
ms1_df = pd.DataFrame({
964+
'scan': [0], 'rt': [1.0], 'mz': [precmz], 'i': [10000.0],
965+
'i_norm': [1.0], 'i_tic_norm': [1.0], 'polarity': [1]
966+
})
967+
968+
results_df = msql_engine.process_query(query, "tests/data/GNPS00002_A3_p.mzML",
969+
ms1_df=ms1_df, ms2_df=ms2_df)
970+
print(results_df)
971+
assert len(results_df) > 0, "Query should find the matching scan in synthetic data"
972+
assert 1 in results_df["scan"].values
973+
974+
920975
def debug_query():
921976
query = "QUERY scaninfo(MS2DATA) WHERE MS2PROD=341.28:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PROD=323.27:TOLERANCEMZ=0.01:INTENSITYPERCENT=2 AND MS2PREC=X AND MS2PROD=X-358.2871:TOLERANCEMZ=0.01:INTENSITYPERCENT=2"
922977

0 commit comments

Comments
 (0)