From 171a685f5d32c7f94b04ac43d603d3e7c95d8dca Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:29:09 -0400 Subject: [PATCH] add bedfile read exception --- bedboss/refgenome_validator/utils.py | 15 ++++++++++++++- scripts/ref_genome_validating/validate_genome.py | 9 +++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/bedboss/refgenome_validator/utils.py b/bedboss/refgenome_validator/utils.py index 68cb66e..e0c9d29 100644 --- a/bedboss/refgenome_validator/utils.py +++ b/bedboss/refgenome_validator/utils.py @@ -3,6 +3,7 @@ from geniml.io.utils import is_gzipped import logging +from bedboss.exceptions import BedBossException _LOGGER = logging.getLogger("bedboss") @@ -67,7 +68,7 @@ def _read_file_pd(*args, **kwargs) -> pd.DataFrame: except (pd.errors.ParserError, pd.errors.EmptyDataError) as _: if row_count <= max_rows: row_count += 1 - return _read_gzipped_file(*args) + raise BedfileReadException(reason="Cannot read bed file.") def get_bed_chrom_info(bedfile: str) -> dict: @@ -84,3 +85,15 @@ def get_bed_chrom_info(bedfile: str) -> dict: max_end_for_each_chrom = df.groupby(0)[2].max() return max_end_for_each_chrom.to_dict() + + +class BedfileReadException(BedBossException): + """Exception when there is an exception during refgenome validation""" + + def __init__(self, reason: str = ""): + """ + Optionally provide explanation for exceptional condition. + + :param str reason: some context why error occurred + """ + super(BedfileReadException, self).__init__(reason) diff --git a/scripts/ref_genome_validating/validate_genome.py b/scripts/ref_genome_validating/validate_genome.py index ad207f6..4e04517 100644 --- a/scripts/ref_genome_validating/validate_genome.py +++ b/scripts/ref_genome_validating/validate_genome.py @@ -25,7 +25,8 @@ # BEDFILE_DIRECTORY = ( # "/home/drc/GITHUB/bedboss/bedboss/scripts/ref_genome_validating/results" # ) - BEDFILE_DIRECTORY = "/home/drc/GITHUB/bedboss/bedboss/scripts/ref_genome_validating/data/bed_small_subset" + # BEDFILE_DIRECTORY = "/home/drc/GITHUB/bedboss/bedboss/scripts/ref_genome_validating/data/bed_small_subset" + BEDFILE_DIRECTORY = "/home/drc/GITHUB/bedboss/bedboss/scripts/ref_genome_validating/data/test_singles" try: PEP_URL = os.environ["PEP_URL"] @@ -72,6 +73,11 @@ def main(): # build genome models # for each reference genome in the user's config file, build a genome model + # from geniml.io import RegionSet + # + # ff =RegionSet("/home/drc/GITHUB/bedboss/bedboss/scripts/ref_genome_validating/data/test_singles/GSM8196564_435_RUNX3_KO_H3K27AC_v_435_ctrl_IgG_seacr.relaxed.bed") + # ff + all_genome_models = [] # for reference_genome in rgc.list(): @@ -213,7 +219,6 @@ def main(): tier = { "tier_rating": {} } # add this to a column to make comparisons easier for human eyes on pephub - all_vals = {} if compat_vector: for i in compat_vector.keys(): if i is not None: