From dbdd960fe8f817fc3a5d8cfc6d5c2b8dd60d7b30 Mon Sep 17 00:00:00 2001 From: nodiscc Date: Thu, 6 Oct 2022 19:02:31 +0200 Subject: [PATCH] WIP: shaarli_api: add skip_existing: True/False option (don't overwrite items with already existing URL) --- hecat/importers/shaarli_api.py | 19 ++++++++++++++++--- tests/.hecat.import_shaarli.yml | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hecat/importers/shaarli_api.py b/hecat/importers/shaarli_api.py index 6d6f2f6..9d70f69 100644 --- a/hecat/importers/shaarli_api.py +++ b/hecat/importers/shaarli_api.py @@ -11,6 +11,7 @@ module_options: source_file: shaarli.json output_file: shaarli.yml + skip_existing: True # optional, default True, skip importing items whose 'url:' already exists in the output file Source directory structure: └── shaarli.json @@ -19,17 +20,29 @@ └── shaarli.yml """ +import os import logging import ruamel.yaml import json +from ..utils import load_yaml_data yaml = ruamel.yaml.YAML() yaml.indent(sequence=2, offset=0) def import_shaarli_json(step): """Import data from the JSON output of Shaarli API""" + if 'skip_existing' not in step['module_options']: + step['module_options']['skip_existing'] = True with open(step['module_options']['source_file'], 'r', encoding="utf-8") as json_file: data = json.load(json_file) - with open(step['module_options']['output_file'], 'w+', encoding="utf-8") as yaml_file: - logging.debug('writing file %s', step['module_options']['output_file']) - yaml.dump(data, yaml_file) + if os.path.exists(step['module_options']['output_file']) and step['module_options']['skip_existing']: + logging.info('loading existing data from %s', step['module_options']['output_file']) + previous_data = load_yaml_data(step['module_options']['output_file']) + final_data = sorted({x["url"]: x for x in (data + previous_data)}.values(), key=lambda x: x["url"]) + with open(step['module_options']['output_file'], 'w+', encoding="utf-8") as yaml_file: + logging.debug('writing file %s', step['module_options']['output_file']) + yaml.dump(final_data, yaml_file) + else: + with open(step['module_options']['output_file'], 'w+', encoding="utf-8") as yaml_file: + logging.debug('writing file %s', step['module_options']['output_file']) + yaml.dump(data, yaml_file) diff --git a/tests/.hecat.import_shaarli.yml b/tests/.hecat.import_shaarli.yml index aaac64a..6bf1105 100644 --- a/tests/.hecat.import_shaarli.yml +++ b/tests/.hecat.import_shaarli.yml @@ -4,3 +4,4 @@ steps: module_options: source_file: tests/shaarli.json output_file: tests/shaarli.yml + skip_existing: True # optional, default True