Skip to content

Commit

Permalink
Fixed some bugs and made it run on directories.
Browse files Browse the repository at this point in the history
  • Loading branch information
jayanthkmr committed Apr 16, 2019
1 parent d035bb9 commit 0bae693
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 22 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ To run it on folder:

$ pyoracc -i ./pyoracc/test/data -f cdli

To enable segmentation (will be fast), use switch -s/--segment:
To disable segmentation (will be slow) and to run on whole, use switch -w/--whole:

$ pyoracc -i ./pyoracc/test/data -f cdli -s
$ pyoracc -i ./pyoracc/test/data -f cdli -w

To see the console messages of the tool, use --verbose switch

Expand Down
42 changes: 24 additions & 18 deletions pyoracc/wrapper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ def check_atf_message((segpathname, atftype, verbose)):
return -1


def check_and_process(pathname, atftype, segment, verbose=False):
def check_and_process(pathname, atftype, whole, verbose=False):
mode = os.stat(pathname)[ST_MODE]
if S_ISREG(mode) and pathname.lower().endswith('.atf'):
# It's a file, call the callback function
if verbose:
click.echo('Info: Parsing {0}.'.format(pathname))
try:
if segment:
if not whole:
pool = Pool()
segmentor = Segmentor(pathname, verbose)
outfolder = segmentor.convert()
Expand Down Expand Up @@ -61,31 +61,37 @@ def check_and_process(pathname, atftype, segment, verbose=False):
@click.option('--atf_type', '-f', type=click.Choice(['cdli', 'oracc']),
prompt=True, required=True,
help='Input the atf file type.')
@click.option('--segment', '-s', default=False, required=False, is_flag=True,
help='Disables the segmentation of the atf file.')
@click.option('--whole', '-w', default=False, required=False, is_flag=True,
help='Disables the segmentation of the atf file and run as a whole.')
@click.option('--verbose', '-v', default=False, required=False, is_flag=True,
help='Enables verbose mode.')
@click.version_option()
def main(input_path, atf_type, segment, verbose):
def main(input_path, atf_type, whole, verbose):
"""My Tool does one work, and one work well."""
tsbegin = time.time()
pool = Pool()
if os.path.isdir(input_path):
process_ids = []
failures = 0
successes = 0
with click.progressbar(os.listdir(input_path),
label='Info: Checking the files') as bar:
for index, f in enumerate(bar):
pathname = os.path.join(input_path, f)
process_ids.append(pool.apply_async(
check_and_process, (pathname, atf_type, segment, verbose)))

result = map(lambda x: x.get(), process_ids)
successes = sum(filter(lambda x: (x == 1), result))
failures = -sum(filter(lambda x: (x == -1), result))
click.echo("Failed with {0} out of {1} ({2}%)"
.format(failures, failures + successes,
failures * 100.0 / (failures + successes)))
try:
check_and_process(pathname, atf_type, whole, verbose)
successes += 1
click.echo('Info: Correctly parsed {0}.'.format(pathname))
except (SyntaxError, IndexError, AttributeError,
UnicodeDecodeError) as e:
failures += 1
click.echo("Info: Failed with message: {0} in {1}"
.format(e, pathname))
finally:
try:
click.echo("Failed with {0} out of {1} ({2}%)"
.format(failures, failures + successes, failures * 100.0 / (failures + successes)))
except ZeroDivisionError:
click.echo("Empty files to process")
else:
check_and_process(input_path, atf_type, segment, verbose)
check_and_process(input_path, atf_type, whole, verbose)
tsend = time.time()
click.echo("Total time taken: {0} minutes)".format((tsend-tsbegin)/60.0))
click.echo("Total time taken: {0} minutes".format((tsend-tsbegin)/60.0))
3 changes: 2 additions & 1 deletion pyoracc/wrapper/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
class Segmentor:
def __init__(self, inputFile, verbose):
self.inputFileName = inputFile
self.outfolder = os.path.join(os.path.dirname(self.inputFileName),
self.outfolder = os.path.join(os.path.dirname(self.inputFileName), "..",
os.path.basename(self.inputFileName)+OUTPUT_FOLDER)
self.verbose = verbose
self.__reset__()
Expand All @@ -27,6 +27,7 @@ def convert(self):
with codecs.open(self.inputFileName, 'r', 'utf-8') as openedFile:
for (i, line) in enumerate(openedFile):
self.__parse(i, line.strip())
self.write2file()
return self.outfolder

def write2file(self):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def run(self):


setup(name='pyoracc',
version='0.2.2',
version='0.2.3',
author='UCL Research IT Services',
author_email='[email protected]',
description='Python tools for working with ORACC/CDLI ATF files',
Expand Down

0 comments on commit 0bae693

Please sign in to comment.