Skip to content

Commit

Permalink
Merge pull request #153 from mwang87/mobility-testing
Browse files Browse the repository at this point in the history
Mobility Variables in Min/Max
  • Loading branch information
mwang87 authored Aug 28, 2021
2 parents ee93d16 + 2848615 commit 60bf6b4
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 36 deletions.
12 changes: 8 additions & 4 deletions massql/msql_cmd.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#!/usr/bin/env python

from massql import msql_parser
from massql import msql_engine
from massql import msql_extract

import argparse
import os
import sys
import json
import pandas as pd

# Making sure the root is in the path, kind of a hack
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from massql import msql_parser
from massql import msql_engine
from massql import msql_extract

def main():
parser = argparse.ArgumentParser(description="MSQL CMD")
parser.add_argument('filename', help='Input filename')
Expand Down
105 changes: 80 additions & 25 deletions massql/msql_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,27 +168,48 @@ def _evalute_variable_query(parsed_dict, input_filename, cache=True, parallel=Fa
presearch_parse = copy.deepcopy(parsed_dict)
non_variable_conditions = []
for condition in presearch_parse["conditions"]:
for value in condition["value"]:
if "value" in condition:
for value in condition["value"]:
try:
# Checking if X is in any string
if "X" in value:
continue
except TypeError:
# This is when the target is actually a float
pass
non_variable_conditions.append(condition)
elif "min" in condition:
min_val = condition["min"]
max_val = condition["max"]
try:
if "X" in min_val:
continue
except TypeError:
# This is when the target is actually a float
pass

try:
# Checking if X is in any string
if "X" in value:
if "X" in max_val:
continue
except TypeError:
# This is when the target is actually a float
pass

non_variable_conditions.append(condition)

presearch_parse["conditions"] = non_variable_conditions

ms1_df, ms2_df = _executeconditions_query(presearch_parse, input_filename, cache=cache)
variable_x_ms1_df = ms1_df

# TODO: Checking if we can prefilter the X variable, if there are conditions
# Here we are trying to pre-filter conditions based upon the qualifiers to make the variable search space smaller
for condition in parsed_dict["conditions"]:
if not condition["conditiontype"] == "where":
continue

if not "X" in condition["value"]:
continue
if "value" in condition:
if not "X" in condition["value"]:
continue

# Filtering MS1 peaks only to consider contention for X
if condition["type"] == "ms1mzcondition":
Expand All @@ -198,6 +219,8 @@ def _evalute_variable_query(parsed_dict, input_filename, cache=True, parallel=Fa
(ms1_df["i_norm"] > min_intpercent) &
(ms1_df["i_tic_norm"] > min_tic_percent_intensity)]

# TODO: Do this for other types of variables

# Here we will start with the smallest mass and then go up
masses_considered_df_list = []
if variable_properties["query_ms1"]:
Expand All @@ -217,49 +240,81 @@ def _evalute_variable_query(parsed_dict, input_filename, cache=True, parallel=Fa

running_max_mz = 0
for masses_obj in tqdm(masses_list):
if running_max_mz > masses_obj["mz"]:
mz_val = masses_obj["mz"]

if running_max_mz > mz_val:
continue

# Cheking the validity of the mz_val
if mz_val < variable_properties["min"] or mz_val > variable_properties["max"]:
continue
mz_val_defect = mz_val - int(mz_val)
if mz_val_defect < variable_properties["mindefect"] or mz_val_defect > variable_properties["maxdefect"]:
continue

#######################
# Writing new query
#######################
substituted_parse = copy.deepcopy(parsed_dict)
mz_val = masses_obj["mz"]


for condition in substituted_parse["conditions"]:
for i, value in enumerate(condition["value"]):
# Rewriting the condition value
# This is for standard conditions
if "value" in condition:
for i, value in enumerate(condition["value"]):
# Rewriting the condition value
try:
if "X" in value:
new_value = math_parser.parse(value).evaluate({
"X" : mz_val
})
condition["value"][i] = new_value
except TypeError:
# This is when the target is actually a float
pass

# Rewriting the qualifier values
try:
if "qualifiers" in condition:
for qualifier in condition["qualifiers"]:
if "qualifier" in qualifier:
if "value" in condition["qualifiers"][qualifier]:
old_value = condition["qualifiers"][qualifier]["value"]
condition["qualifiers"][qualifier]["value"] = old_value.replace("X", str(mz_val))
except AttributeError:
pass

# TODO: For other types of conditions that might include variables
if "min" in condition:
# Rewriting the condition min
value = condition["min"]
try:
if "X" in value:
new_value = math_parser.parse(value).evaluate({
"X" : mz_val
})
condition["value"][i] = new_value
condition["min"] = new_value
except TypeError:
# This is when the target is actually a float
pass

# Rewriting the qualifier values
if "max" in condition:
# Rewriting the condition min
value = condition["max"]
try:
if "qualifiers" in condition:
for qualifier in condition["qualifiers"]:
if "qualifier" in qualifier:
if "value" in condition["qualifiers"][qualifier]:
old_value = condition["qualifiers"][qualifier]["value"]
condition["qualifiers"][qualifier]["value"] = old_value.replace("X", str(mz_val))
except AttributeError:
if "X" in value:
new_value = math_parser.parse(value).evaluate({
"X" : mz_val
})
condition["max"] = new_value
except TypeError:
# This is when the target is actually a float
pass

# Let's consider this mz
running_max_mz = masses_obj["mz_max"]

# Checking the x conditions
substituted_parse["comment"] = str(mz_val)
if mz_val < variable_properties["min"] or mz_val > variable_properties["max"]:
continue
mz_val_defect = mz_val - int(mz_val)
if mz_val_defect < variable_properties["mindefect"] or mz_val_defect > variable_properties["maxdefect"]:
continue

all_concrete_queries.append(substituted_parse)
else:
Expand Down
7 changes: 5 additions & 2 deletions massql/msql_engine_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,11 @@ def ms2prec_condition(condition, ms1_df, ms2_df, reference_conditions_register):
]

# Filtering the MS1 data now
ms1_scans = set(ms2_df["ms1scan"])
ms1_filtered_df = ms1_df[ms1_df["scan"].isin(ms1_scans)]
if len(ms1_df) > 0:
ms1_scans = set(ms2_df["ms1scan"])
ms1_filtered_df = ms1_df[ms1_df["scan"].isin(ms1_scans)]
else:
ms1_filtered_df = ms1_df

ms1_list.append(ms1_filtered_df)
ms2_list.append(ms2_filtered_df)
Expand Down
4 changes: 2 additions & 2 deletions massql/msql_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ def condition(self, items):
condition_dict["type"] = condition_type

if function == "mobilityrange":
condition_dict["min"] = float(items[-2])
condition_dict["max"] = float(items[-1])
condition_dict["min"] = items[-2]
condition_dict["max"] = items[-1]

return condition_dict

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setuptools.setup(
name="massql",
version="0.0.7",
version="0.0.8",
author="Mingxun Wang",
author_email="[email protected]",
description="Mass spectrometry query language python implementation",
Expand Down
8 changes: 7 additions & 1 deletion tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ def test_mobility():

assert(parsed_output["conditions"][0]["min"] == 100)

def test_mobility_variables():
query = "QUERY scaninfo(MS2DATA) WHERE MS2PREC=X AND MOBILITY=range(min=X/100, max=2*X/100)"
parsed_output = msql_parser.parse_msql(query)
print(parsed_output)

def main():
#test_xrange_parse()
#test_parse()
Expand All @@ -200,7 +205,8 @@ def main():
#test_ms2_or()
#test_ms1_multiple_or()
#test_ms1_multiple_or_with_variable()
test_mobility()
#test_mobility()
test_mobility_variables()


if __name__ == "__main__":
Expand Down
22 changes: 21 additions & 1 deletion tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,24 @@ def test_ms2_mobility():

assert(len(results_df) == 8682)

def test_ms2_mobility_variable():
query = "QUERY scaninfo(MS2DATA) WHERE MS2PREC=X AND MOBILITY=range(min=X*0.0011+0.5-0.1, max=X*0.0011+0.5+0.1) AND X=range(min=854.5, max=854.7)"
results_df = msql_engine.process_query(query, "tests/data/meoh_water_ms2_1_31_1_395.mzML")

print(results_df)

assert(len(results_df) == 4)

def test_ms2_mobility_variable2():
query = "QUERY scaninfo(MS2DATA) WHERE MS2PREC=X AND MOBILITY=range(min=1, max=2) AND X=range(min=400, max=500)"
results_df = msql_engine.process_query(query, "tests/data/meoh_water_ms2_1_31_1_395.mzML")

print(results_df)

assert(len(results_df) == 1654)



def main():
#msql_engine.init_ray()

Expand Down Expand Up @@ -607,8 +625,10 @@ def main():
#test_topdown()
#test_defect()
#test_or_against_iron()
test_quad_brominated()
#test_quad_brominated()
#test_ms2_mobility()
#test_ms2_mobility_variable()
test_ms2_mobility_variable2()

if __name__ == "__main__":
main()
Expand Down

0 comments on commit 60bf6b4

Please sign in to comment.