Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Nov 9, 2023
1 parent 56040df commit c9544e6
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 220 deletions.
66 changes: 33 additions & 33 deletions python/quantmsio/quantms_io/core/ae.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
from quantms_io.core.sdrf import SDRFHandler
from quantms_io.utils.file_utils import delete_files_extension
import logging
logging.basicConfig(level = logging.INFO)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def get_ibaq_columns(path):
with open(path) as f:
line = f.readline()
return line.split('\n')[0].split(',')


class AbsoluteExpressionHander:

LABEL_MAP = {
'ProteinName': 'protein',
'SampleID': 'sample_accession',
Expand All @@ -28,17 +30,16 @@ class AbsoluteExpressionHander:
#INFO=<ID=condition, Number=1, Type=String, Description="Value of the factor value">
#INFO=<ID=ibaq, Number=1, Type=Float, Description="Intensity based absolute quantification">
#INFO=<ID=ribaq, Number=1, Type=Float, Description="relative iBAQ">\n"""

ABSOLUTE_EXPRESSION_EXTENSION = ".absolute.tsv"

def __init__(self):
self.ibaq_df = None
self.ae_file_path = None
self.project_manager = None
self.sdrf_manager = None
self.sdrf_file_path = None



def load_project_file(self, project_file: str):
"""
Load a project file that link the different files in the quamtms.io format
Expand All @@ -53,19 +54,18 @@ def load_project_file(self, project_file: str):

self.project_manager = ProjectHandler()
self.project_manager.load_project_info(project_file)


def load_ibaq_file(self,path):
usecols = ['ProteinName','SampleID','Condition','Ibaq','IbaqLog']

def load_ibaq_file(self, path):
usecols = ['ProteinName', 'SampleID', 'Condition', 'Ibaq', 'IbaqLog']
ibaq_columns = get_ibaq_columns(path)
for col in usecols:
if col not in ibaq_columns:
raise Exception(f"Not found {col} in ibaq file")
ibaqs = pd.read_csv(path,usecols=usecols)
ibaqs.rename(columns=AbsoluteExpressionHander.LABEL_MAP,inplace=True)
ibaqs = pd.read_csv(path, usecols=usecols)
ibaqs.rename(columns=AbsoluteExpressionHander.LABEL_MAP, inplace=True)
self.ae_file_path = path
self.ibaq_df = ibaqs

def load_sdrf_file(self, sdrf_file: str):
self.sdrf_file_path = sdrf_file

Expand All @@ -75,32 +75,32 @@ def load_sdrf_file(self, sdrf_file: str):
self.sdrf_manager = SDRFHandler(sdrf_file=sdrf_file)

def convert_ibaq_to_quantms(
self,
output_folder: str = None,
output_file_prefix: str = None,
delete_existing: bool = False,
self,
output_folder: str = None,
output_file_prefix: str = None,
delete_existing: bool = False,
):
output_lines = ''
if self.project_manager:
output_lines += (
"#project_accession: "
+ self.project_manager.project.project_info["project_accession"]
+ "\n"
"#project_accession: "
+ self.project_manager.project.project_info["project_accession"]
+ "\n"
)
output_lines += (
"#project_title: "
+ self.project_manager.project.project_info["project_title"]
+ "\n"
"#project_title: "
+ self.project_manager.project.project_info["project_title"]
+ "\n"
)
output_lines += (
"#project_description: "
+ self.project_manager.project.project_info["project_description"]
+ "\n"
"#project_description: "
+ self.project_manager.project.project_info["project_description"]
+ "\n"
)
output_lines += (
"#quantms_version: "
+ self.project_manager.project.project_info["quantms_version"]
+ "\n"
"#quantms_version: "
+ self.project_manager.project.project_info["quantms_version"]
+ "\n"
)
factor_value = self.get_factor_value()
if factor_value is not None:
Expand All @@ -109,7 +109,7 @@ def convert_ibaq_to_quantms(
output_lines += AbsoluteExpressionHander.AE_HEADER + str(
self.ibaq_df.to_csv(sep="\t", index=False, header=True)
)
output_lines = output_lines.replace('\r','')
output_lines = output_lines.replace('\r', '')
# Create the output file name
base_name = output_file_prefix
if output_file_prefix is None:
Expand All @@ -133,9 +133,9 @@ def convert_ibaq_to_quantms(
output_filename_path = f"{output_folder}/{output_filename}"

# Save the combined lines to a TSV file
with open(output_filename_path, "w",encoding='utf8') as f:
with open(output_filename_path, "w", encoding='utf8') as f:
f.write(output_lines)

if self.project_manager:
self.project_manager.add_quantms_file(
file_category="absolute_file", file_name=output_filename
Expand Down Expand Up @@ -163,4 +163,4 @@ def update_project_file(self, project_file: str = None):

if project_file is not None:
project_file = self.project_file
self.project_manager.save_updated_project_info(output_file_name=project_file)
self.project_manager.save_updated_project_info(output_file_name=project_file)
44 changes: 22 additions & 22 deletions python/quantmsio/quantms_io/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import duckdb
import random


def get_skip_rows(path):
skip_rows = 0
with open(path) as f:
Expand All @@ -16,44 +17,43 @@ def get_skip_rows(path):
line = f.readline()
return skip_rows


class Database:

def __init__(self):

self.feature_db = []
self.ibaq_db = []
def load_db(self,path:str):

def load_db(self, path: str):

if path.endswith(".parquet"):
self.feature_db.append(duckdb.read_parquet(path))
elif path.endswith(".absolute.tsv") :
elif path.endswith(".absolute.tsv"):
skip_rows = get_skip_rows(path)
self.ibaq_db.append(duckdb.read_csv(path,sep='\t',skiprows=skip_rows))
def get_unique_peptides(self,n):
self.ibaq_db.append(duckdb.read_csv(path, sep='\t', skiprows=skip_rows))

def get_unique_peptides(self, n):

feature_db = self.feature_db[n]
unique_peps = duckdb.sql(f"SELECT DISTINCT sequence FROM feature_db").df()

return unique_peps['sequence'].tolist()

def get_unique_proteins(self,n):
def get_unique_proteins(self, n):

ibaq_db = self.ibaq_db[n]
unique_prts = duckdb.sql(f"SELECT DISTINCT protein FROM ibaq_db").df()

return unique_prts['protein'].tolist()
def query_peptide(self,peptide:str,n):

def query_peptide(self, peptide: str, n):

feature_db = self.feature_db[n]
return duckdb.sql(f"SELECT * FROM feature_db WHERE sequence ='{peptide}'").df()


def query_protein(self,protein:str,n):
def query_protein(self, protein: str, n):

ibaq_db = self.ibaq_db[n]

return duckdb.sql(f"SELECT * FROM ibaq_db WHERE protein ='{protein}'").df()

39 changes: 20 additions & 19 deletions python/quantmsio/quantms_io/core/de.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from quantms_io.utils.file_utils import delete_files_extension

import logging
logging.basicConfig(level = logging.INFO)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -109,10 +110,10 @@ def load_project_file(self, project_file: str):
self.project_manager.load_project_info(project_file)

def convert_msstats_to_quantms(
self,
output_folder: str = None,
output_file_prefix: str = None,
delete_existing: bool = False,
self,
output_folder: str = None,
output_file_prefix: str = None,
delete_existing: bool = False,
):
"""
Convert a MSstats differential file to quantms.io format
Expand Down Expand Up @@ -140,24 +141,24 @@ def convert_msstats_to_quantms(
output_lines = ''
if self.project_manager:
output_lines += (
"#project_accession: "
+ self.project_manager.project.project_info["project_accession"]
+ "\n"
"#project_accession: "
+ self.project_manager.project.project_info["project_accession"]
+ "\n"
)
output_lines += (
"#project_title: "
+ self.project_manager.project.project_info["project_title"]
+ "\n"
"#project_title: "
+ self.project_manager.project.project_info["project_title"]
+ "\n"
)
output_lines += (
"#project_description: "
+ self.project_manager.project.project_info["project_description"]
+ "\n"
"#project_description: "
+ self.project_manager.project.project_info["project_description"]
+ "\n"
)
output_lines += (
"#quantms_version: "
+ self.project_manager.project.project_info["quantms_version"]
+ "\n"
"#quantms_version: "
+ self.project_manager.project.project_info["quantms_version"]
+ "\n"
)
factor_value = self.get_factor_value()
if factor_value is not None:
Expand Down Expand Up @@ -197,7 +198,7 @@ def convert_msstats_to_quantms(
output_filename_path = f"{output_folder}/{output_filename}"

# Save the combined lines to a TSV file
with open(output_filename_path, "w",encoding='utf8') as f:
with open(output_filename_path, "w", encoding='utf8') as f:
f.write(output_lines)
if self.project_manager:
self.project_manager.add_quantms_file(
Expand Down Expand Up @@ -228,7 +229,7 @@ def get_contrast_labels(quantms_df: pd.DataFrame):
"""
unique_labels = []
for label in quantms_df["label"].unique():
for condition in label.split("-",1):
for condition in label.split("-", 1):
unique_labels.append(condition)
'''
if len(unique_label) == 1:
Expand Down
Loading

0 comments on commit c9544e6

Please sign in to comment.