Skip to content

Commit

Permalink
little_change
Browse files Browse the repository at this point in the history
  • Loading branch information
zprobot committed Nov 11, 2024
1 parent 3400615 commit fd6842d
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 29 deletions.
8 changes: 3 additions & 5 deletions quantmsio/core/diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyopenms import AASequence
from pyopenms.Constants import PROTON_MASS_U
from quantmsio.operate.tools import get_ahocorasick
from quantmsio.utils.file_utils import extract_protein_list, save_slice_file
from quantmsio.utils.file_utils import close_file, extract_protein_list, save_slice_file
from quantmsio.core.sdrf import SDRFHandler
from quantmsio.core.mztab import MzTab
from quantmsio.core.feature import Feature
Expand Down Expand Up @@ -240,8 +240,7 @@ def write_feature_to_file(
if not pqwriter:
pqwriter = pq.ParquetWriter(output_path, feature.schema)
pqwriter.write_table(feature)
if pqwriter:
pqwriter.close()
close_file(pqwriter=pqwriter)
self.destroy_duckdb_database()

def write_features_to_file(
Expand All @@ -261,6 +260,5 @@ def write_features_to_file(
for key, df in Feature.slice(report, partitions):
feature = Feature.transform_feature(df)
pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename)
for pqwriter in pqwriters.values():
pqwriter.close()
close_file(pqwriters=pqwriters)
self.destroy_duckdb_database()
8 changes: 3 additions & 5 deletions quantmsio/core/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pyarrow as pa
import pyarrow.parquet as pq
from quantmsio.operate.tools import get_ahocorasick, get_protein_accession
from quantmsio.utils.file_utils import extract_protein_list,save_slice_file
from quantmsio.utils.file_utils import extract_protein_list,save_slice_file, close_file
from quantmsio.core.mztab import MzTab
from quantmsio.core.psm import Psm
from quantmsio.core.sdrf import SDRFHandler
Expand Down Expand Up @@ -188,8 +188,7 @@ def write_feature_to_file(
if not pqwriter:
pqwriter = pq.ParquetWriter(output_path, feature.schema)
pqwriter.write_table(feature)
if pqwriter:
pqwriter.close()
close_file(pqwriter=pqwriter)

def write_features_to_file(
self,
Expand All @@ -208,8 +207,7 @@ def write_features_to_file(
partitions, file_num, protein_str, duckdb_max_memory, duckdb_threads
):
pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename)
for pqwriter in pqwriters.values():
pqwriter.close()
close_file(pqwriters)

def generate_best_scan(self, rows, pep_dict):
key = (rows["peptidoform"], rows["precursor_charge"])
Expand Down
11 changes: 4 additions & 7 deletions quantmsio/core/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from quantmsio.core.common import MAXQUANT_PSM_MAP, MAXQUANT_PSM_USECOLS, MAXQUANT_FEATURE_MAP, MAXQUANT_FEATURE_USECOLS
from quantmsio.core.feature import Feature
from quantmsio.core.psm import Psm
from quantmsio.utils.file_utils import extract_protein_list, save_slice_file
from quantmsio.utils.file_utils import close_file, extract_protein_list, save_slice_file

logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO)

Expand Down Expand Up @@ -290,8 +290,7 @@ def write_psm_to_file(self, msms_path: str, output_path: str, chunksize: int = 1
if not pqwriter:
pqwriter = pq.ParquetWriter(output_path, parquet.schema)
pqwriter.write_table(parquet)
if pqwriter:
pqwriter.close()
close_file(pqwriter=pqwriter)

def _init_sdrf(self, sdrf_path: str):
Sdrf = SDRFHandler(sdrf_path)
Expand All @@ -310,8 +309,7 @@ def write_feature_to_file(
if not pqwriter:
pqwriter = pq.ParquetWriter(output_path, parquet.schema)
pqwriter.write_table(parquet)
if pqwriter:
pqwriter.close()
close_file(pqwriter=pqwriter)

def write_features_to_file(
self,
Expand All @@ -333,5 +331,4 @@ def write_features_to_file(
for key, df in Feature.slice(report, partitions):
feature = Feature.transform_feature(df)
pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename)
for pqwriter in pqwriters.values():
pqwriter.close()
close_file(pqwriters=pqwriters)
13 changes: 2 additions & 11 deletions quantmsio/operate/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from quantmsio.operate.query import Query, map_spectrum_mz
from quantmsio.core.openms import OpenMSHandler
from quantmsio.utils.pride_utils import get_unanimous_name
from quantmsio.utils.file_utils import load_de_or_ae, save_slice_file, save_file
from quantmsio.utils.file_utils import load_de_or_ae, save_slice_file, save_file, close_file

def init_save_info(parquet_path: str):
pqwriters = {}
Expand All @@ -20,15 +20,6 @@ def init_save_info(parquet_path: str):
return pqwriters, pqwriter_no_part, filename


def close_file(partitions: list, pqwriters: dict, pqwriter_no_part: str):
if not partitions or len(partitions) == 0:
if pqwriter_no_part:
pqwriter_no_part.close()
else:
for pqwriter in pqwriters.values():
pqwriter.close()


def generate_psms_of_spectrum(
parquet_path: str,
mzml_directory: str,
Expand Down Expand Up @@ -58,7 +49,7 @@ def generate_psms_of_spectrum(
pqwriters, pqwriter_no_part = save_parquet_file(
partitions, table, output_folder, filename, pqwriters, pqwriter_no_part, PSM_SCHEMA
)
close_file(partitions, pqwriters, pqwriter_no_part)
close_file(pqwriters, pqwriter_no_part)


def save_parquet_file(
Expand Down
9 changes: 8 additions & 1 deletion quantmsio/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,11 @@ def save_file(parquet_table, pqwriter, output_folder, filename):
if not pqwriter:
pqwriter = pq.ParquetWriter(save_path, parquet_table.schema)
pqwriter.write_table(parquet_table)
return pqwriter
return pqwriter

def close_file(pqwriters: dict = None, pqwriter: object = None):
if pqwriter:
pqwriter.close()
else:
for pqwriter in pqwriters.values():
pqwriter.close()

0 comments on commit fd6842d

Please sign in to comment.