diff --git a/quantmsio/core/feature.py b/quantmsio/core/feature.py index 4f36b17..ab0f1b8 100644 --- a/quantmsio/core/feature.py +++ b/quantmsio/core/feature.py @@ -138,13 +138,17 @@ def merge_psm(rows, index): ) def generate_feature(self, file_num=10, protein_str=None, duckdb_max_memory="16GB", duckdb_threads=4): + for msstats in self.generate_feature_report(file_num, protein_str, duckdb_max_memory, duckdb_threads): + feature = self.transform_feature(msstats) + yield feature + + def generate_feature_report(self, file_num=10, protein_str=None, duckdb_max_memory="16GB", duckdb_threads=4): map_dict = self.extract_psm_msg(1000000, protein_str) for msstats in self.transform_msstats_in(file_num, protein_str, duckdb_max_memory, duckdb_threads): self.merge_msstats_and_psm(msstats, map_dict) self.add_additional_msg(msstats) self.convert_to_parquet_format(msstats) - feature = self.transform_feature(msstats) - yield feature + yield msstats @staticmethod def slice(df, partitions): @@ -160,11 +164,7 @@ def slice(df, partitions): yield key, df def generate_slice_feature(self, partitions, file_num=10, protein_str=None, duckdb_max_memory="16GB", duckdb_threads=4): - map_dict = self.extract_psm_msg(1000000, protein_str) - for msstats in self.transform_msstats_in(file_num, protein_str, duckdb_max_memory, duckdb_threads): - self.merge_msstats_and_psm(msstats, map_dict) - self.add_additional_msg(msstats) - self.convert_to_parquet_format(msstats) + for msstats in self.generate_feature_report(file_num, protein_str, duckdb_max_memory, duckdb_threads): for key, df in self.slice(msstats, partitions): feature = self.transform_feature(df) yield key, feature