Skip to content

Commit

Permalink
minor_comment
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed Nov 1, 2023
1 parent 9d3361c commit 49a2b70
Showing 1 changed file with 30 additions and 6 deletions.
36 changes: 30 additions & 6 deletions python/quantmsio/quantms_io/core/statistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def __init__(self,feature_path:str=None,ibaq_path:str=None):
self.ibaq_db = self.load_db(ibaq_path)

def load_db(self,path:str):
'''
Load the database.
'''
if path is not None:
if os.path.exists(path):
if path.endswith(".parquet"):
Expand All @@ -62,23 +65,31 @@ def load_db(self,path:str):

@check_exist('feature_db')
def get_unique_peptides(self):

'''
return: A list of deduplicated peptides.
'''
feature_db = self.feature_db
unique_peps = duckdb.sql(f"SELECT DISTINCT sequence FROM feature_db").df()

return unique_peps['sequence'].tolist()

@check_exist('ibaq_db')
def get_unique_proteins(self):

'''
return: A list of deduplicated proteins.
'''
ibaq_db = self.ibaq_db
unique_prts = duckdb.sql(f"SELECT DISTINCT protein FROM ibaq_db").df()

return unique_prts['protein'].tolist()

@check_exist('feature_db')
def query_peptide(self,peptide:str):

'''
peptide: Peptide that need to be queried.
return: A DataFrame of all information about query peptide.
'''

feature_db = self.feature_db
if check_string('^[A-Z]+$',peptide):
return duckdb.sql(f"SELECT * FROM feature_db WHERE sequence ='{peptide}'").df()
Expand All @@ -87,7 +98,11 @@ def query_peptide(self,peptide:str):

@check_exist('ibaq_db')
def query_protein(self,protein:str):

'''
protein: Protein that need to be queried.
return: A DataFrame of all information about query protein.
'''

ibaq_db = self.ibaq_db
if check_string('^[A-Z]+',protein):
return duckdb.sql(f"SELECT * FROM ibaq_db WHERE protein ='{protein}'").df()
Expand All @@ -96,6 +111,9 @@ def query_protein(self,protein:str):

@check_exist('feature_db')
def plot_peptide_distribution_of_protein(self):
'''
Bar graphs of peptide counts for different samples.
'''
feature_db = self.feature_db
df = duckdb.sql(f"SELECT sample_accession, COUNT(sequence) FROM feature_db GROUP BY sample_accession").df()
df.columns = ['sample','peptides']
Expand All @@ -107,11 +125,14 @@ def plot_peptide_distribution_of_protein(self):

@check_exist('feature_db')
def plot_intensty_distribution_of_samples(self):
'''
Kde of peptide intensity distribution for different samples.
'''
feature_db = self.feature_db
sample_accessions = duckdb.sql(f"SELECT DISTINCT sample_accession FROM feature_db").df()['sample_accession'].tolist()
random.shuffle(sample_accessions)
if len(sample_accessions) > 8:
sample_accessions = sample_accessions[:8]
if len(sample_accessions) > 10:
sample_accessions = sample_accessions[:10]
df = pd.DataFrame()
for sample in sample_accessions:
df_sample = duckdb.sql(f"SELECT intensity FROM feature_db WHERE sample_accession='{sample}'").df()
Expand All @@ -124,6 +145,9 @@ def plot_intensty_distribution_of_samples(self):

@check_exist('feature_db')
def plot_intensty_box_of_samples(self):
'''
Boxplot of peptide intensity distribution for different samples.
'''
feature_db = self.feature_db
sample_accessions = duckdb.sql(f"SELECT DISTINCT sample_accession FROM feature_db").df()['sample_accession'].tolist()
random.shuffle(sample_accessions)
Expand Down

0 comments on commit 49a2b70

Please sign in to comment.