From 0bae693163338468cc53bb0b239f8b655820057a Mon Sep 17 00:00:00 2001 From: Jayanth Date: Tue, 16 Apr 2019 12:43:18 +0530 Subject: [PATCH] Fixed some bugs and made it run on directories. --- README.md | 4 ++-- pyoracc/wrapper/cli.py | 42 ++++++++++++++++++++++---------------- pyoracc/wrapper/segment.py | 3 ++- setup.py | 2 +- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 4e5fa5d..53f4e22 100644 --- a/README.md +++ b/README.md @@ -73,9 +73,9 @@ To run it on folder: $ pyoracc -i ./pyoracc/test/data -f cdli -To enable segmentation (will be fast), use switch -s/--segment: +To disable segmentation (will be slow) and to run on whole, use switch -w/--whole: - $ pyoracc -i ./pyoracc/test/data -f cdli -s + $ pyoracc -i ./pyoracc/test/data -f cdli -w To see the console messages of the tool, use --verbose switch diff --git a/pyoracc/wrapper/cli.py b/pyoracc/wrapper/cli.py index e33f171..c4ec969 100644 --- a/pyoracc/wrapper/cli.py +++ b/pyoracc/wrapper/cli.py @@ -22,14 +22,14 @@ def check_atf_message((segpathname, atftype, verbose)): return -1 -def check_and_process(pathname, atftype, segment, verbose=False): +def check_and_process(pathname, atftype, whole, verbose=False): mode = os.stat(pathname)[ST_MODE] if S_ISREG(mode) and pathname.lower().endswith('.atf'): # It's a file, call the callback function if verbose: click.echo('Info: Parsing {0}.'.format(pathname)) try: - if segment: + if not whole: pool = Pool() segmentor = Segmentor(pathname, verbose) outfolder = segmentor.convert() @@ -61,31 +61,37 @@ def check_and_process(pathname, atftype, segment, verbose=False): @click.option('--atf_type', '-f', type=click.Choice(['cdli', 'oracc']), prompt=True, required=True, help='Input the atf file type.') -@click.option('--segment', '-s', default=False, required=False, is_flag=True, - help='Disables the segmentation of the atf file.') +@click.option('--whole', '-w', default=False, required=False, is_flag=True, + help='Disables the segmentation of the atf file and run as a whole.') @click.option('--verbose', '-v', default=False, required=False, is_flag=True, help='Enables verbose mode.') @click.version_option() -def main(input_path, atf_type, segment, verbose): +def main(input_path, atf_type, whole, verbose): """My Tool does one work, and one work well.""" tsbegin = time.time() - pool = Pool() if os.path.isdir(input_path): - process_ids = [] + failures = 0 + successes = 0 with click.progressbar(os.listdir(input_path), label='Info: Checking the files') as bar: for index, f in enumerate(bar): pathname = os.path.join(input_path, f) - process_ids.append(pool.apply_async( - check_and_process, (pathname, atf_type, segment, verbose))) - - result = map(lambda x: x.get(), process_ids) - successes = sum(filter(lambda x: (x == 1), result)) - failures = -sum(filter(lambda x: (x == -1), result)) - click.echo("Failed with {0} out of {1} ({2}%)" - .format(failures, failures + successes, - failures * 100.0 / (failures + successes))) + try: + check_and_process(pathname, atf_type, whole, verbose) + successes += 1 + click.echo('Info: Correctly parsed {0}.'.format(pathname)) + except (SyntaxError, IndexError, AttributeError, + UnicodeDecodeError) as e: + failures += 1 + click.echo("Info: Failed with message: {0} in {1}" + .format(e, pathname)) + finally: + try: + click.echo("Failed with {0} out of {1} ({2}%)" + .format(failures, failures + successes, failures * 100.0 / (failures + successes))) + except ZeroDivisionError: + click.echo("Empty files to process") else: - check_and_process(input_path, atf_type, segment, verbose) + check_and_process(input_path, atf_type, whole, verbose) tsend = time.time() - click.echo("Total time taken: {0} minutes)".format((tsend-tsbegin)/60.0)) + click.echo("Total time taken: {0} minutes".format((tsend-tsbegin)/60.0)) diff --git a/pyoracc/wrapper/segment.py b/pyoracc/wrapper/segment.py index 22ba19a..2227f53 100644 --- a/pyoracc/wrapper/segment.py +++ b/pyoracc/wrapper/segment.py @@ -12,7 +12,7 @@ class Segmentor: def __init__(self, inputFile, verbose): self.inputFileName = inputFile - self.outfolder = os.path.join(os.path.dirname(self.inputFileName), + self.outfolder = os.path.join(os.path.dirname(self.inputFileName), "..", os.path.basename(self.inputFileName)+OUTPUT_FOLDER) self.verbose = verbose self.__reset__() @@ -27,6 +27,7 @@ def convert(self): with codecs.open(self.inputFileName, 'r', 'utf-8') as openedFile: for (i, line) in enumerate(openedFile): self.__parse(i, line.strip()) + self.write2file() return self.outfolder def write2file(self): diff --git a/setup.py b/setup.py index c8fc713..febb951 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def run(self): setup(name='pyoracc', - version='0.2.2', + version='0.2.3', author='UCL Research IT Services', author_email='rc-softdev@ucl.ac.uk', description='Python tools for working with ORACC/CDLI ATF files',