Skip to content

Commit

Permalink
Merge pull request #21 from rafaelpezzuto/fix2-nonpt-detection
Browse files Browse the repository at this point in the history
Faz otimizações em método que gera pré-tabelas
  • Loading branch information
rafaelpezzuto authored Jan 4, 2022
2 parents 0dafaf4 + ea1c16d commit 9c85122
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 27 deletions.
18 changes: 16 additions & 2 deletions app/lib/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,24 @@ def create_file_with_header(path, header=[], delimiter='\t'):
fout.write(delimiter.join(header) + '\n')


def _filename_contains_dates(filename, dates):
for d in dates:
if d in filename:
return True
return False


def get_processed_files(date, processed_logs_directory: str, extension='tsv'):
date_str = date.strftime('%Y-%m-%d')
all_days = [date]

for i in range(1, 3):
all_days.append(date + datetime.timedelta(days=-i))
all_days.append(date + datetime.timedelta(days=+i))

all_days_str = [d.strftime('%Y-%m-%d') for d in all_days]
files = [f for f in os.listdir(processed_logs_directory) if f.endswith(extension)]
return [os.path.join(processed_logs_directory, i) for i in files if date_str in i]

return [os.path.join(processed_logs_directory, f) for f in files if _filename_contains_dates(f, all_days_str)]


def translate_date_to_output_path(date, output_directory, posfix='', extension='tsv'):
Expand Down
16 changes: 9 additions & 7 deletions app/proc/generate_pretable.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,18 @@ def generate_pretables_db(
processed_logs_directory=PROCESSED_LOGS_DIRECTORY,
):
non_pretable_dates = db.get_non_pretable_dates(str_connection, collection)

processed_files = []
for npt in non_pretable_dates:
processed_files = file.get_processed_files(npt, processed_logs_directory)
output_files = {}
processed_files.extend(file.get_processed_files(npt, processed_logs_directory))

for pf in processed_files:
pf_results = generate_pretables(parsed_file=pf, output_directory=output_directory, header=header, extension=extension, delimiter=delimiter)
output_files.update(pf_results)
output_files = {}
for pf in set(sorted(processed_files)):
pf_results = generate_pretables(parsed_file=pf, output_directory=output_directory, header=header, extension=extension, delimiter=delimiter)
output_files.update(pf_results)

for k in output_files:
non_pretable_dates_str = [d.strftime('%Y-%m-%d') for d in non_pretable_dates]
for k in output_files:
if k in non_pretable_dates_str:
db.set_control_date_status(str_connection, collection, k, values.DATE_STATUS_EXTRACTING_PRETABLE)


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

setup(
name='scielo-usage-counter',
version='0.4.1',
version='0.4.2',
description='The SciELO Usage Counter Tool',
author='SciELO',
author_email='[email protected]',
Expand Down
17 changes: 0 additions & 17 deletions tests/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,3 @@ def test_previous_and_next_dates(self):
expected_pn_dates = [datetime.datetime.strptime(d, '%Y-%m-%d') for d in ['2021-01-01', '2020-12-31', '2021-01-02', '2020-12-30', '2021-01-03']]

self.assertListEqual(obtained_pn_dates, expected_pn_dates)

def test_dates_able_to_pretable(self):
date_status = {
datetime.datetime.strptime('2021-01-01', '%Y-%m-%d'): values.DATE_STATUS_LOADED,
datetime.datetime.strptime('2021-01-02', '%Y-%m-%d'): values.DATE_STATUS_LOADED,
datetime.datetime.strptime('2021-01-03', '%Y-%m-%d'): values.DATE_STATUS_LOADED,
datetime.datetime.strptime('2021-01-04', '%Y-%m-%d'): values.DATE_STATUS_LOADED,
datetime.datetime.strptime('2021-01-05', '%Y-%m-%d'): values.DATE_STATUS_LOADED,
datetime.datetime.strptime('2021-01-06', '%Y-%m-%d'): values.DATE_STATUS_LOADED,
}
expected_dates_able_to_extract = [
datetime.datetime.strptime('2021-01-03', '%Y-%m-%d'),
datetime.datetime.strptime('2021-01-04', '%Y-%m-%d')
]
obtained_dates_able_to_extract = db._get_enabled_dates_by_status_value(date_status, values.DATE_STATUS_LOADED)

self.assertListEqual(expected_dates_able_to_extract, obtained_dates_able_to_extract)

0 comments on commit 9c85122

Please sign in to comment.