diff --git a/.gitignore b/.gitignore index 79384561..e15801ac 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ .ve *.swp -*~ -tests/__pycache__ +.vscode +tests/__pycache__ \ No newline at end of file diff --git a/developer_docs.md b/developer_docs.md index 8f13ee35..da189a72 100644 --- a/developer_docs.md +++ b/developer_docs.md @@ -1,6 +1,10 @@ # Developer documentation -This page provides [how-to guides](#how-to-guides) and [reference documentation](#reference) for developers of the Risk Data Library Standard. +This page provides the following documentation for developers of the Risk Data Library Standard: + +* [How-to guides](#how-to-guides) +* [Style guides](#style-guides) +* [Reference documentation](#reference) ## How-to guides @@ -19,7 +23,8 @@ The preferred approach for making changes to the standard is to use a [local dev 1. Agree on a proposal in a [GitHub issue](https://github.com/GFDRR/rdl-standard/issues). 1. Create a branch from the `dev` branch. -1. Make your changes. Do not use normative keywords in non-normative content. For more information, see [normative and non-normative content in RDLS](https://docs.google.com/document/d/13g1SZO3ZSHbkymtc69lQOu9vB9vlZVZnodAcxC50l1M/edit#). +1. Make your changes. Do not use normative keywords in non-normative content. For more information, see [normative and non-normative content in RDLS](https://docs.google.com/document/d/13g1SZO3ZSHbkymtc69lQOu9vB9vlZVZnodAcxC50l1M/edit#). +1. Run `./manage.py pre-commit`. 1. [Build the documentation](#build-the-documentation), resolve any errors and preview your changes locally. 1. Commit your changes to your branch and push it to GitHub. Your changes are available for anyone to preview at [https://rdl-standard.readthedocs.io/en/{branch name}](https://rdl-standard.readthedocs.io/en/{branch name}). 1. [Create a pull request](https://github.com/GFDRR/rdl-standard/compare): @@ -136,7 +141,6 @@ If this check fails, run the following command to fix markdown formatting: ```bash mdformat docs ``` - #### tests If this check fails, review the output to identify which test failed: @@ -161,6 +165,28 @@ Review the warnings to identify the invalid JSON files and correct the errors. Review the warnings to identify and correct the errors. For more information on each test, see https://jscc.readthedocs.io/en/latest/api/testing/checks.html#module-jscc.testing.checks. +## Style guides + +### Changelog style guide + +* Use the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +* Begin each entry with a link to the pull request for the change. + +#### Normative content + +Changelog entries should be descriptive: + +* Bad entry: Update schema. +* Good entry: Make `name` required. + +If changes are made to files under the `schema` directory, it is assumed that corresponding changes were made to files under the `docs` directory. Do not add an entry under the "Documentation" heading if the changes directly correspond to entries under the "Codelists" and/or "Schema" headings. + +For changes to schema and codelists, preserve schema/codelist ordering when adding new changelog entries. Otherwise, to reduce merge conflicts, add new changelog entries to the end of the relevant bullet list. + +#### Non-normative content + +Changelog entries should be descriptive. Do not add an entry like "Improve primer." Instead, simply add the PR number to the "Primer" list item. + ## Reference This section contains the following reference documentation: @@ -168,6 +194,7 @@ This section contains the following reference documentation: * [GitHub repository](#github-repository) * [Sphinx](#sphinx) * [Read the Docs](#read-the-docs) +* [manage.py](#managepy) ### GitHub repository @@ -229,3 +256,7 @@ Other than the `main` branch, all branches are hidden from the [flyout menu](htt #### Credentials You can find credentials for Read the Docs in the Open Data Services password database. + +### manage.py + +The standard repository includes a command-line utility for administrative tasks. For information on the available commands, run `./manage.py --help`. diff --git a/docs/data_model/codelists.md b/docs/data_model/codelists.md new file mode 100644 index 00000000..a5dbba3e --- /dev/null +++ b/docs/data_model/codelists.md @@ -0,0 +1,22 @@ +# Codelists + +Some schema fields refer to codelists, to limit and standardise the possible values of the fields, in order to promote data interoperability. + +Codelists can either be open or closed. **Closed codelists** are intended to be comprehensive; for example, the currency codelist covers all currencies in the world. **Open codelists** are intended to be representative, but not comprehensive. + +Publishers must use the codes in the codelists, unless no code is appropriate. If no code is appropriate and the codelist is **open**, then a publisher may use a new code outside those in the codelist. If no code is appropriate and the codelist is **closed**, then a publisher should instead create an issue in the [RDLS GitHub repository](https://github.com/GFDRR/rdl-standard/issues). + +```{admonition} Extending open codelists +--- +class: Tip +--- +If you use new codes outside those in an open codelist, please create an issue in the [RDLS GitHub repository](https://github.com/GFDRR/rdl-standard/issues), so that the codes can be considered for inclusion in the codelist. +``` + +The [schema](index.md) has a `codelist` property to indicate the CSV file that defines the codes in the codelist (shown as tables below). It also has an `openCodelist` property, to indicate whether the codelist is open or closed. + +Codes are case-sensitive, and are generally provided as English language camelCase. Codes must not be translated. + +## Open codelists + +## Closed codelists diff --git a/docs/data_model/index.md b/docs/data_model/index.md index 5093c977..cfc0c2e5 100644 --- a/docs/data_model/index.md +++ b/docs/data_model/index.md @@ -62,5 +62,6 @@ ______________________________________________________________________ exposure vulnerability loss + codelists ``` diff --git a/manage.py b/manage.py new file mode 100644 index 00000000..91a0d401 --- /dev/null +++ b/manage.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +import click +import csv +import glob +import json +import os +import requests +import shutil +import subprocess + +from collections import OrderedDict +from contextlib import contextmanager +from io import StringIO +from pathlib import Path + +basedir = Path(__file__).resolve().parent +codelistdir = basedir / 'codelists' +referencedir = basedir / 'docs' / 'data_model' +schemadir = basedir / 'schema' + + +def read_lines(filename): + """Read a file and return a list of lines.""" + + with open(filename, 'r') as f: + return f.readlines() + + +def write_lines(filename, lines): + """Write a list of lines to a file.""" + + with open(filename, 'w') as f: + f.writelines(lines) + + +def csv_load(url, delimiter=','): + """ + Loads CSV data into a ``csv.DictReader`` from the given URL. + """ + reader = csv.DictReader(StringIO(get(url).text), delimiter=delimiter) + return reader + + +@contextmanager +def csv_dump(path, fieldnames): + """ + Writes CSV headers to the given path, and yields a ``csv.writer``. + """ + f = (Path(path)).open('w') + writer = csv.writer(f, lineterminator='\n') + writer.writerow(fieldnames) + try: + yield writer + finally: + f.close() + + +def get(url): + """ + GETs a URL and returns the response. Raises an exception if the status code is not successful. + """ + response = requests.get(url) + response.raise_for_status() + response.encoding = response.apparent_encoding + return response + + +def json_dump(filename, data): + """ + Writes JSON data to the given filename. + """ + with (schemadir / filename).open('w') as f: + json.dump(data, f, indent=2) + f.write('\n') + + +def delete_directory_contents(directory_path): + """ + Deletes the contents of a directory on disk. + """ + for filename in os.listdir(directory_path): + file_path = os.path.join(directory_path, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print('Failed to delete %s. Reason: %s' % (file_path, e)) + + +def json_load(filename, library=json): + """ + Loads JSON data from the given filename. + """ + with (schemadir / filename).open() as f: + return library.load(f) + + +def get_codelist_references(schema, codelist, parents=None, full_schema=None, defs_path='$defs'): + """ + Recursively generate a list of JSON pointers that reference a codelist in JSON schema. + + :param schema: The JSON schema + :codelist: The name of the definition + :parents: A list of the parents of schema + :full_schema: The full schema + :defs_path: The path under which definitions are located in the schema + """ + + references = [] + + if parents is None: + parents = [] + + if full_schema is None: + full_schema = schema + + if 'properties' in schema: + for key, value in schema['properties'].items(): + if value.get('codelist') == f"{codelist}.csv": + references.append(parents + [key]) + elif value.get('type') == 'array' and '$ref' in value['items']: + references.extend(get_codelist_references(full_schema[defs_path][value['items']['$ref'].split('/')[-1]], codelist, parents + [key, '0'], full_schema)) + elif '$ref' in value: + references.extend(get_codelist_references(full_schema[defs_path][value['$ref'].split('/')[-1]], codelist, parents + [key], full_schema)) + elif 'properties' in value: + references.extend(get_codelist_references(value, codelist, parents + [key], full_schema)) + + if defs_path in schema: + for key, value in schema[defs_path].items(): + references.extend(get_codelist_references(value, codelist, [key], full_schema)) + + return references + + +def generate_codelist_markdown(codelist, type, references, definitions, defs_path): + """Generate reference markdown for codelist""" + + markdown = ["This codelist is referenced by the following properties:\n\n"] + + for ref in references: + # Remove array indices because they do not appear in the HTML anchors generated by the json schema directive + ref = [part for part in ref if part != '0'] + + url = 'rdl_schema_0.1.json,' + + # Omit nested references + if ref[0] in definitions and len(ref) == 2: + url += f'/{defs_path}/' + elif len(ref) == 1: + url += ',' + else: + continue + + url += ','.join(ref) + markdown.append(f"- [`{'/'.join(ref)}`]({url})\n") + + markdown.extend([ + "\nThis codelist has the following codes:\n\n" + "```{csv-table-no-translate}\n", + ":header-rows: 1\n", + ":widths: auto\n", + f":file: ../../codelists/{type}/{codelist}.csv\n", + "```\n\n" + ]) + + return markdown + + +def update_codelist_docs(schema): + """Update docs/data_model/codelists.md""" + + if '$defs' in schema: + defs_path = '$defs' + elif 'definitions' in schema: + defs_path = 'definitions' + else: + raise KeyError("Schema contains neither $defs nor definitions.") + + # Load codelist reference + codelist_reference = read_lines(referencedir / 'codelists.md') + + # Get codelist names and types (open or closed) from the codelist directory and get a list of references for each codelist + codelists = {} + + for path in glob.glob(f"{codelistdir}/*/*.csv"): + codelist = path.split("/")[-1].split(".")[0] + codelists[codelist] = { + "type": path.split("/")[-2], + "content": [f"### {codelist}\n",], + "references": get_codelist_references(schema, codelist, defs_path=defs_path) + } + + # Sort codelists alphabetically + codelists = OrderedDict(sorted(codelists.items())) + + # Preserve content that appears before the generated reference content for each codelist + for i in range(0, len(codelist_reference)): + line = codelist_reference[i] + + if line[:4] == "### ": + codelist = line[4:-1] + + # Drop codelists that don't appear in the codelists directory + if codelist in codelists: + j = i+1 + + while j < len(codelist_reference) and codelist_reference[j] != "This codelist is referenced by the following properties:\n": + codelists[codelist]["content"].append(codelist_reference[j]) + j += 1 + + # Preserve introductory content up to an including the ## Open codelists heading + codelist_reference = codelist_reference[:codelist_reference.index("## Open codelists\n") + 1] + codelist_reference.append("\n") + + # Update reference for open and closed codelists + closed_codelist_reference = ["## Closed codelists\n\n"] + + for key, value in codelists.items(): + value['content'].extend(generate_codelist_markdown(key, value['type'], value['references'], schema[defs_path], defs_path)) + if value["type"] == "open": + codelist_reference.extend(value['content']) + else: + closed_codelist_reference.extend(value['content']) + + codelist_reference.extend(closed_codelist_reference) + + write_lines(referencedir / 'codelists.md', codelist_reference) + + +@click.group() +def cli(): + pass + + +@cli.command() +def pre_commit(): + """Update codelist reference documentation and run mdformat + """ + + # Load schema + schema = json_load('rdl_schema_0.1.json') + + # Update codelists.md + update_codelist_docs(schema) + + # Run mdformat + subprocess.run(['mdformat', 'docs']) + +if __name__ == '__main__': + cli() diff --git a/pull_request_template.md b/pull_request_template.md new file mode 100644 index 00000000..b38f7f60 --- /dev/null +++ b/pull_request_template.md @@ -0,0 +1,14 @@ +**Related issues** + + + +**Description** + + + +**Merge checklist** + + + +- [ ] Update the changelog ([style guide](developer_docs.md#changelog-style-guide)) +- [ ] Run `./manage.py` pre-commit diff --git a/requirements.in b/requirements.in index 4a4f46b0..0df6f5f4 100644 --- a/requirements.in +++ b/requirements.in @@ -10,7 +10,9 @@ sphinx-intl sphinx_rtd_theme myst-parser mdformat-myst +requests +click pytest jscc ocdskit -jsonschema +jsonschema \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8b4c61a5..8e2630ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,9 @@ certifi==2022.12.7 charset-normalizer==2.1.0 # via requests click==8.1.3 - # via sphinx-intl + # via + # -r requirements.in + # sphinx-intl colorama==0.4.6 # via sphinx-autobuild docutils==0.17.1 @@ -122,6 +124,8 @@ pyyaml==6.0 # via myst-parser requests==2.28.1 # via + # -r requirements.in + # sphinx # jscc # ocdsextensionregistry # ocdsmerge