-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Restructuring, removing filepaths for the time being. Signed-off-by: Caroline Russell <[email protected]> * Improve tests. Signed-off-by: Caroline Russell <[email protected]> * Release to PyPI. Signed-off-by: Caroline Russell <[email protected]> * Add README.md Signed-off-by: Caroline Russell <[email protected]> * Ready for first release. Signed-off-by: Caroline Russell <[email protected]> * Fix test parameters. Signed-off-by: Caroline Russell <[email protected]> * Add config.toml to specify keys to sort by. Signed-off-by: Caroline Russell <[email protected]> --------- Signed-off-by: Caroline Russell <[email protected]>
- Loading branch information
1 parent
65ea483
commit 28eeeca
Showing
11 changed files
with
3,375 additions
and
5,731 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# These are supported funding model platforms | ||
|
||
github: cerrussell |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: Publish to PyPI | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
tags: | ||
- 'v*' | ||
|
||
jobs: | ||
pypi: | ||
runs-on: ubuntu-latest | ||
permissions: | ||
id-token: write | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.12' | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
python -m pip install build | ||
- name: Build dist | ||
run: | | ||
python -m build | ||
- name: Release to PyPI | ||
uses: pypa/gh-action-pypi-publish@release/v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# custom-json-diff | ||
|
||
Comparing two JSON files presents an issue when the two files have certain fields which are | ||
dynamically generated (e.g. timestamps), variable ordering, or other field which need to be | ||
excluded for one reason or another. Enter custom-json-diff, which allows you to specify fields to | ||
ignore in the comparison and sorts all fields. | ||
|
||
|
||
|
||
## Installation | ||
`pip install custom-json-diff` | ||
|
||
## CLI Usage | ||
``` | ||
usage: cjd [-h] -i INPUT INPUT (-c CONFIG_FILE | -x EXCLUDE [EXCLUDE ...] | -p {cdxgen}) | ||
options: | ||
-h, --help show this help message and exit | ||
-i INPUT INPUT, --input INPUT INPUT | ||
Two JSON files to compare | ||
-c CONFIG_FILE, --config-file CONFIG_FILE | ||
Import TOML configuration file | ||
-x EXCLUDE [EXCLUDE ...], --exclude EXCLUDE [EXCLUDE ...] | ||
Exclude field(s) from comparison | ||
-p {cdxgen}, --preset {cdxgen} | ||
Preset to use | ||
``` | ||
|
||
## Specifying fields to exclude | ||
|
||
To exclude fields from comparison, use the `-x` or `--exclude` flag and specify the field name(s) | ||
to exclude. The json will be flattened, so fields are specified using dot notation. For example: | ||
|
||
```json | ||
{ | ||
"field1": { | ||
"field2": "value", | ||
"field3": [ | ||
{"a": "val1", "b": "val2"}, | ||
{"a": "val3", "b": "val4"} | ||
] | ||
} | ||
} | ||
``` | ||
|
||
is flattened to: | ||
```json | ||
{ | ||
"field1.field2": "value", | ||
"field1.field3.[0].a": "val1", | ||
"field1.field3.[0].b": "val2", | ||
"field1.field3.[1].a": "val3", | ||
"field1.field3.[1].b": "val4" | ||
} | ||
``` | ||
|
||
To exclude field2, you would specify `field1.field2`. To exclude the `a` field in the array of | ||
objects, you would specify `field1.field3.[].a`. custom-json-diff will create a regex which will | ||
account for the array index in the field name. Multiple fields may be specified separated by a | ||
space. To better understand what your fields should be, check out json-flatten, which is the | ||
package used for this function. | ||
|
||
## Sorting | ||
|
||
custom-json-diff will sort the imported JSON alphabetically. If your JSON document contains arrays | ||
of objects, you will need to specify any keys you want to sort by in a toml file or use a preset. | ||
The first key located from the provided keys that is present in the object will be used for sorting. | ||
|
||
## TOML config file example | ||
|
||
```toml | ||
[settings] | ||
excluded_fields = ["serialNumber", "metadata.timestamp"] | ||
sort_keys = ["url", "content", "ref", "name", "value"] | ||
``` |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import argparse | ||
import json | ||
|
||
from custom_json_diff.custom_diff import import_toml, set_excluded_fields, compare_dicts, get_diffs | ||
|
||
|
||
def build_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"-i", | ||
"--input", | ||
action="store", | ||
help="Two JSON files to compare", | ||
required=True, | ||
nargs=2, | ||
dest="input", | ||
) | ||
arg_group = parser.add_mutually_exclusive_group(required=True) | ||
arg_group.add_argument( | ||
"-c", | ||
"--config-file", | ||
action="store", | ||
help="Import TOML configuration file", | ||
) | ||
arg_group.add_argument( | ||
"-x", | ||
"--exclude", | ||
action="store", | ||
help="Exclude field(s) from comparison", | ||
default=[], | ||
dest="exclude", | ||
nargs="+", | ||
) | ||
# parser.add_argument( | ||
# "-s", | ||
# "--skip-filepaths", | ||
# action="store_true", | ||
# help="skip filepaths in comparison", | ||
# default=False, | ||
# dest="skip_filepaths", | ||
# ) | ||
arg_group.add_argument( | ||
"-p", | ||
"--preset", | ||
action="store", | ||
help="Preset to use", | ||
choices=["cdxgen"], | ||
dest="preset", | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def main(): | ||
args = build_args() | ||
if args.preset: | ||
exclude_keys, sort_keys = set_excluded_fields(args.preset) | ||
elif args.config_file: | ||
exclude_keys, sort_keys = import_toml(args.config_file) | ||
else: | ||
exclude_keys = set(args.exclude) | ||
sort_keys = [] | ||
result, j1, j2 = compare_dicts(args.input[0], args.input[1], exclude_keys, sort_keys) | ||
if result == 0: | ||
print("Files are identical") | ||
else: | ||
diffs = get_diffs(args.input[0], args.input[1], j1, j2) | ||
print(json.dumps(diffs, indent=2)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import json | ||
import logging | ||
import re | ||
import sys | ||
from typing import Dict, List, Set, Tuple | ||
|
||
import toml | ||
from json_flatten import flatten, unflatten | ||
|
||
|
||
def check_key(key: str, exclude_keys: Set[str]) -> bool: | ||
return not any(key.startswith(k) for k in exclude_keys) | ||
|
||
|
||
def check_regex(regex_keys: List[re.Pattern], key: str) -> bool: | ||
return any(regex.match(key) for regex in regex_keys) | ||
|
||
|
||
def compare_dicts(json1: str, json2: str, exclude_keys: Set[str], sort_keys: List[str]): | ||
json_1_data = load_json(json1, exclude_keys, sort_keys) | ||
json_2_data = load_json(json2, exclude_keys, sort_keys) | ||
if json_1_data == json_2_data: | ||
return 0, json_1_data, json_2_data | ||
else: | ||
return 1, json_1_data, json_2_data | ||
|
||
|
||
def filter_advanced(flattened_data: Dict, exclude_keys: Set[str]) -> Dict: | ||
mod_data = {} | ||
for key, value in flattened_data.items(): | ||
new_key = key.replace("[", "#").replace("]", "%") | ||
mod_data[new_key] = value | ||
exclude_keys = [re.compile(x.replace("[]", "#[0-9]+%")) for x in exclude_keys] | ||
return { | ||
key.replace("#", "[").replace("%", "]"): value | ||
for key, value in mod_data.items() | ||
if not check_regex(exclude_keys, key) | ||
} | ||
|
||
|
||
def filter_dict(data: Dict, exclude_keys: Set[str], sort_keys: List[str]) -> Dict: | ||
data = sort_dict(data, sort_keys) | ||
flattened = flatten(data) | ||
has_arrays = any("[" in i for i in exclude_keys) | ||
return ( | ||
filter_advanced(flattened, exclude_keys) | ||
if has_arrays | ||
else filter_simple(flattened, exclude_keys) | ||
) | ||
|
||
|
||
def filter_simple(flattened_data: Dict, exclude_keys: Set[str]) -> Dict: | ||
return { | ||
key: value | ||
for key, value in flattened_data.items() | ||
if check_key(key, exclude_keys) | ||
} | ||
|
||
|
||
def get_diffs(file_1: str, file_2: str, json_1_data: Dict, json_2_data: Dict) -> Dict: | ||
j1 = {f"{key}:{value}" for key, value in json_1_data.items()} | ||
j2 = {f"{key}:{value}" for key, value in json_2_data.items()} | ||
result = unflatten({value.split(":")[0]: value.split(":")[1] for value in (j1 - j2)}) | ||
result2 = unflatten({value.split(":")[0]: value.split(":")[1] for value in (j2 - j1)}) | ||
return {file_1: result, file_2: result2} | ||
|
||
|
||
def get_sort_key(data: Dict, sort_keys: List[str]) -> str | bool: | ||
return next((i for i in sort_keys if i in data), False) | ||
|
||
|
||
def import_toml(toml_file_path): | ||
with open(toml_file_path, "r", encoding="utf-8") as f: | ||
try: | ||
toml_data = toml.load(f) | ||
except toml.TomlDecodeError: | ||
logging.error("Invalid TOML.") | ||
sys.exit(1) | ||
try: | ||
return toml_data["settings"]["excluded_fields"], toml_data["settings"]["sort_keys"] | ||
except KeyError: | ||
logging.error("Invalid TOML.") | ||
sys.exit(1) | ||
|
||
|
||
def load_json(json_file: str, exclude_keys: Set[str], sort_keys: List[str]) -> Dict: | ||
try: | ||
with open(json_file, "r", encoding="utf-8") as f: | ||
data = json.load(f) | ||
except FileNotFoundError: | ||
logging.error("File not found: %s", json_file) | ||
sys.exit(1) | ||
except json.JSONDecodeError: | ||
logging.error("Invalid JSON: %s", json_file) | ||
sys.exit(1) | ||
return filter_dict(data, exclude_keys, sort_keys) | ||
|
||
|
||
def remove_filepaths(data: Dict) -> Dict: | ||
# filtered_data = {} | ||
# for key, value in data.items(): | ||
# if isinstance(value, dict): | ||
# filtered_data[key] = remove_filepaths(value) | ||
# elif isinstance(value, list): | ||
# filtered_data[key] = [item for item in value if not ] | ||
# elif not (key == "value" and ("/" in value or r"\\" in value)): | ||
# filtered_data[key] = value | ||
# return filtered_data | ||
raise NotImplementedError | ||
|
||
|
||
def set_excluded_fields(preset: str) -> Tuple[Set[str], List[str]]: | ||
excluded = [] | ||
sort_fields = [] | ||
if preset == "cdxgen": | ||
excluded.extend(["metadata.timestamp", "serialNumber"]) | ||
sort_fields.extend(["url", "content", "ref", "name", "value"]) | ||
return set(excluded), sort_fields | ||
|
||
|
||
def sort_dict(result: Dict, sort_keys: List[str], unflat: bool = False) -> Dict: | ||
"""Sorts a dictionary""" | ||
for k, v in result.items(): | ||
if isinstance(v, dict): | ||
result[k] = sort_dict(v, sort_keys) | ||
elif isinstance(v, list) and len(v) >= 2: | ||
result[k] = sort_list(v, sort_keys) | ||
else: | ||
result[k] = v | ||
if unflat: | ||
result = unflatten(result) | ||
return result | ||
|
||
|
||
def sort_list(lst: List, sort_keys: List[str]) -> List: | ||
"""Sorts a list""" | ||
if isinstance(lst[0], dict): | ||
if sort_key := get_sort_key(lst[0], sort_keys): | ||
return sorted(lst, key=lambda x: x[sort_key]) | ||
logging.warning("No key(s) specified for sorting. Cannot sort list of dictionaries.") | ||
return lst | ||
if isinstance(lst[0], (str, int)): | ||
lst.sort() | ||
return lst |
Oops, something went wrong.