-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'release/2023-08-03_remove_blanks_kafka_combined'
- Loading branch information
Showing
12 changed files
with
224 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from portality.events.shortcircuit import send_event as shortcircuit_send_event | ||
from portality.core import app | ||
|
||
|
||
def send_event(event): | ||
try: | ||
from portality.events.kafka_producer import send_event as kafka_send_event | ||
kafka_send_event(event) | ||
except Exception as e: | ||
app.logger.exception("Failed to send event to Kafka. " + str(e)) | ||
shortcircuit_send_event(event) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import csv | ||
from typing import Iterable, Union | ||
|
||
|
||
def read_all(csv_path, as_dict=False) -> Iterable[Union[list, dict]]: | ||
reader = csv.DictReader if as_dict else csv.reader | ||
with open(csv_path, 'r') as f: | ||
for row in reader(f): | ||
yield row |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Remove Blank | ||
|
||
remove blank from start or end of string in Journal and Application | ||
|
||
### Run | ||
``` | ||
python portality/upgrade.py -u portality/migrate/903_remove_blanks/migrate.json | ||
``` | ||
|
||
### verify | ||
``` | ||
python -m portality.scripts.blank_field_finder | ||
``` |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
def remove_blanks(obj) -> dict: | ||
if not isinstance(obj, dict): | ||
return obj | ||
|
||
for k, v in obj.items(): | ||
if isinstance(v, dict): | ||
obj[k] = remove_blanks(v) | ||
|
||
elif isinstance(v, list): | ||
if not v: | ||
continue | ||
if isinstance(v[0], dict): | ||
obj[k] = [remove_blanks(item) for item in v] | ||
elif isinstance(v[0], str): | ||
obj[k] = [item.strip() for item in v] | ||
|
||
elif isinstance(v, str) and v != v.strip(): | ||
print(f'remove blanks: {k} = [{v}]') | ||
obj[k] = v.strip() | ||
|
||
return obj |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"batch" : 10000, | ||
"types": [ | ||
{ | ||
"type" : "journal", | ||
"init_with_model" : false, | ||
"keepalive" : "10m", | ||
"functions" : [ | ||
"portality.migrate.903_remove_blanks.functions.remove_blanks" | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import argparse | ||
from pathlib import Path | ||
from typing import Any, Iterable | ||
|
||
from portality.bll.services.journal import JournalService | ||
from portality.lib import csv_utils | ||
from portality.models import Application, Journal | ||
|
||
|
||
def to_k_v(item: Any, prefix: list = None): | ||
if prefix is None: | ||
prefix = [] | ||
|
||
if isinstance(item, dict): | ||
for k, v in item.items(): | ||
yield from to_k_v(v, prefix=prefix + [k]) | ||
|
||
elif isinstance(item, list): | ||
for k, v in enumerate(item): | ||
yield from to_k_v(v, prefix=prefix + [k]) | ||
else: | ||
yield '.'.join(map(str, prefix)), str(item) | ||
|
||
|
||
def tee(txt: str, out_file): | ||
print(txt) | ||
out_file.write(txt + '\n') | ||
|
||
|
||
def write_bad_data_domain_object(domain_object_class: Any, out_path): | ||
with open(out_path, 'w') as f: | ||
items = iter(domain_object_class.iterall()) | ||
while True: | ||
try: | ||
j = next(items, None) | ||
except: | ||
continue | ||
|
||
if j is None: | ||
break | ||
|
||
for k, v in filter_bad_only(to_k_v(j.data)): | ||
tee(f'{j.id} {k} [{v}]', f) | ||
|
||
|
||
def main2(): | ||
with open('/tmp/journals.csv', 'w') as f: | ||
JournalService._make_journals_csv(f) | ||
|
||
|
||
def is_bad_str(v: str): | ||
return isinstance(v, str) and v != v.strip() | ||
|
||
|
||
def filter_bad_only(row: Iterable): | ||
return (i for i in row if is_bad_str(i[1])) | ||
|
||
|
||
def write_bad_data_journals_csv(csv_path, out_path): | ||
with open(out_path, 'w') as out_file: | ||
for row in csv_utils.read_all(csv_path, as_dict=True): | ||
for k, v in filter_bad_only(row.items()): | ||
tee(f'{k} [{v}]', out_file) | ||
|
||
|
||
def write_results(journal_csv_path, out_dir): | ||
# out_dir = Path('/tmp') | ||
# journal_csv_path = '/home/kk/tmp/journals.csv' | ||
out_dir = Path(out_dir) | ||
write_bad_data_domain_object(Application, out_dir / 'bad_app.txt') | ||
write_bad_data_domain_object(Journal, out_dir / 'bad_journals.txt') | ||
if journal_csv_path: | ||
write_bad_data_journals_csv(journal_csv_path, out_dir / 'bad_journals_csv.txt') | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='Output file with bad data') | ||
parser.add_argument('-i', '--input', help='Path of input CSV file', type=str, default=None) | ||
parser.add_argument('-o', '--output', help='Output directory', type=str, default='.') | ||
args = parser.parse_args( | ||
# ['-i', '/home/kk/tmp/journals.csv', '-o', '/tmp'] | ||
) | ||
write_results(args.input, args.output) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters