diff --git a/quantmsio/core/diann.py b/quantmsio/core/diann.py index 941c247..585a209 100644 --- a/quantmsio/core/diann.py +++ b/quantmsio/core/diann.py @@ -9,7 +9,7 @@ from pyopenms import AASequence from pyopenms.Constants import PROTON_MASS_U from quantmsio.operate.tools import get_ahocorasick -from quantmsio.utils.file_utils import extract_protein_list, save_slice_file +from quantmsio.utils.file_utils import close_file, extract_protein_list, save_slice_file from quantmsio.core.sdrf import SDRFHandler from quantmsio.core.mztab import MzTab from quantmsio.core.feature import Feature @@ -240,8 +240,7 @@ def write_feature_to_file( if not pqwriter: pqwriter = pq.ParquetWriter(output_path, feature.schema) pqwriter.write_table(feature) - if pqwriter: - pqwriter.close() + close_file(pqwriter=pqwriter) self.destroy_duckdb_database() def write_features_to_file( @@ -261,6 +260,5 @@ def write_features_to_file( for key, df in Feature.slice(report, partitions): feature = Feature.transform_feature(df) pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename) - for pqwriter in pqwriters.values(): - pqwriter.close() + close_file(pqwriters=pqwriters) self.destroy_duckdb_database() \ No newline at end of file diff --git a/quantmsio/core/feature.py b/quantmsio/core/feature.py index 825494f..33f037e 100644 --- a/quantmsio/core/feature.py +++ b/quantmsio/core/feature.py @@ -3,7 +3,7 @@ import pyarrow as pa import pyarrow.parquet as pq from quantmsio.operate.tools import get_ahocorasick, get_protein_accession -from quantmsio.utils.file_utils import extract_protein_list,save_slice_file +from quantmsio.utils.file_utils import extract_protein_list,save_slice_file, close_file from quantmsio.core.mztab import MzTab from quantmsio.core.psm import Psm from quantmsio.core.sdrf import SDRFHandler @@ -188,8 +188,7 @@ def write_feature_to_file( if not pqwriter: pqwriter = pq.ParquetWriter(output_path, feature.schema) pqwriter.write_table(feature) - if pqwriter: - pqwriter.close() + close_file(pqwriter=pqwriter) def write_features_to_file( self, @@ -208,8 +207,7 @@ def write_features_to_file( partitions, file_num, protein_str, duckdb_max_memory, duckdb_threads ): pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename) - for pqwriter in pqwriters.values(): - pqwriter.close() + close_file(pqwriters) def generate_best_scan(self, rows, pep_dict): key = (rows["peptidoform"], rows["precursor_charge"]) diff --git a/quantmsio/core/maxquant.py b/quantmsio/core/maxquant.py index c20ce7e..883c07b 100644 --- a/quantmsio/core/maxquant.py +++ b/quantmsio/core/maxquant.py @@ -15,7 +15,7 @@ from quantmsio.core.common import MAXQUANT_PSM_MAP, MAXQUANT_PSM_USECOLS, MAXQUANT_FEATURE_MAP, MAXQUANT_FEATURE_USECOLS from quantmsio.core.feature import Feature from quantmsio.core.psm import Psm -from quantmsio.utils.file_utils import extract_protein_list, save_slice_file +from quantmsio.utils.file_utils import close_file, extract_protein_list, save_slice_file logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO) @@ -290,8 +290,7 @@ def write_psm_to_file(self, msms_path: str, output_path: str, chunksize: int = 1 if not pqwriter: pqwriter = pq.ParquetWriter(output_path, parquet.schema) pqwriter.write_table(parquet) - if pqwriter: - pqwriter.close() + close_file(pqwriter=pqwriter) def _init_sdrf(self, sdrf_path: str): Sdrf = SDRFHandler(sdrf_path) @@ -310,8 +309,7 @@ def write_feature_to_file( if not pqwriter: pqwriter = pq.ParquetWriter(output_path, parquet.schema) pqwriter.write_table(parquet) - if pqwriter: - pqwriter.close() + close_file(pqwriter=pqwriter) def write_features_to_file( self, @@ -333,5 +331,4 @@ def write_features_to_file( for key, df in Feature.slice(report, partitions): feature = Feature.transform_feature(df) pqwriters = save_slice_file(feature, pqwriters, output_folder, key, filename) - for pqwriter in pqwriters.values(): - pqwriter.close() \ No newline at end of file + close_file(pqwriters=pqwriters) \ No newline at end of file diff --git a/quantmsio/operate/tools.py b/quantmsio/operate/tools.py index e2a2382..5f076d4 100644 --- a/quantmsio/operate/tools.py +++ b/quantmsio/operate/tools.py @@ -11,7 +11,7 @@ from quantmsio.operate.query import Query, map_spectrum_mz from quantmsio.core.openms import OpenMSHandler from quantmsio.utils.pride_utils import get_unanimous_name -from quantmsio.utils.file_utils import load_de_or_ae, save_slice_file, save_file +from quantmsio.utils.file_utils import load_de_or_ae, save_slice_file, save_file, close_file def init_save_info(parquet_path: str): pqwriters = {} @@ -20,15 +20,6 @@ def init_save_info(parquet_path: str): return pqwriters, pqwriter_no_part, filename -def close_file(partitions: list, pqwriters: dict, pqwriter_no_part: str): - if not partitions or len(partitions) == 0: - if pqwriter_no_part: - pqwriter_no_part.close() - else: - for pqwriter in pqwriters.values(): - pqwriter.close() - - def generate_psms_of_spectrum( parquet_path: str, mzml_directory: str, @@ -58,7 +49,7 @@ def generate_psms_of_spectrum( pqwriters, pqwriter_no_part = save_parquet_file( partitions, table, output_folder, filename, pqwriters, pqwriter_no_part, PSM_SCHEMA ) - close_file(partitions, pqwriters, pqwriter_no_part) + close_file(pqwriters, pqwriter_no_part) def save_parquet_file( diff --git a/quantmsio/utils/file_utils.py b/quantmsio/utils/file_utils.py index 9f412cd..2e26aac 100644 --- a/quantmsio/utils/file_utils.py +++ b/quantmsio/utils/file_utils.py @@ -102,4 +102,11 @@ def save_file(parquet_table, pqwriter, output_folder, filename): if not pqwriter: pqwriter = pq.ParquetWriter(save_path, parquet_table.schema) pqwriter.write_table(parquet_table) - return pqwriter \ No newline at end of file + return pqwriter + +def close_file(pqwriters: dict = None, pqwriter: object = None): + if pqwriter: + pqwriter.close() + else: + for pqwriter in pqwriters.values(): + pqwriter.close() \ No newline at end of file