From 5a9b70dee6691750f21f0b31140abc9bd8497e46 Mon Sep 17 00:00:00 2001 From: boryanagoncharenko <3010723+boryanagoncharenko@users.noreply.github.com> Date: Tue, 29 Oct 2024 13:21:40 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=9A=20Stop=20copying=20yaml=20content?= =?UTF-8?q?=20when=20a=20new=20language=20is=20added=20(#5823)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a draft PR meant to collect feedback for a solution of issue #5187. **Description of the problem** When a new language is added in Weblate, the prefill add-on copies the English content into the new language yaml files. This means that when the English content changes, we manually need make the change in all language that do not have the content translated. Instead of this, we would like to keep in the yaml files only the content that has actually been translated. **Yaml files** All newly added content/yaml files are not prefilled with the current English translation and only contain the translated value for the corresponding language. Currently, only the om language has partial content to illustrate the solution. The content of each yaml file is merged with the en.yaml counterpart when the yaml is loaded. - Schema validation. The schema definitions have explicit required fields and only the en.yaml files are checked against them. A python script generates new schema definitions (*.generated.schema.json) that lift the required fields constraints. The generation is part of the doit backend task and the schemas are gitignored. The generated schemas are used for all non-en.yaml files. - Correctness check added in #4020. We have a script that checks if there are different array lengths between English and other languages. This check is amended to ensure the structure of the language yaml file is a subset of the English file. Now it check that: - The nodes in the 2 yaml files are of the same type, specifically arrays and dictionaries. - An array in a language file should not have more elements than the English file. - A dictionary in the language file should not have keys that are not present in the English file. - Note that the merging mechanism silently discards the content of the language file if any of the correctness rules are not met. The assumption is that the English file will have the correct structure. This meant to be inline with the silent empty dictionary if an IOError occurs while loading the yaml file. However, we could have a different approach here: fail and just load the fallback? Or at least log that something went wrong? **Po files** Just like the yaml files, all newly added .po files will not be prefilled with English and, again, the om language is used as an example. The merging of every language with English happens through a custom function that defines a fallback. The regular gettext function is substituted with the custom one. **Deduplication of existing languages** The om language is added to this PR for exemplary purposes and is not an officially supported language. Since all current languages have a version of en.yaml files, the merging will not yield immediate results. It will take effect only when a new language is added. Perhaps it is wise to run a basic form of deduplication of the current files and test immediately. Note that complete deduplication of the yaml files is currently very hard to achieve because every yaml file copies a different version of the Enligsh content. However, we could simplify the problem by only removing the duplication that matches the current English version. Co-Authored-By: Languages add-on Co-Authored-By: weblate --- .gitignore | 5 +- app.py | 8 ++- build-tools/github/validate-yaml | 18 +++-- content/adventures/adventures.schema.json | 9 +-- content/slides/id.yaml | 40 +++++------ content/slides/slides.schema.json | 50 ++++++++++++++ content/tutorials/tutorials.schema.json | 1 + dodo.py | 19 ++++++ hedy.py | 2 +- hedy_error.py | 2 +- highlighting/generate-rules-highlighting.py | 41 ++++-------- tests/tests_z_yamlfile.py | 73 +++++++++++++++++++++ tools/check-yaml-structure.py | 63 ++++++++++++------ tools/generate-yaml-schemas.py | 26 ++++++++ utils.py | 3 +- website/admin.py | 2 +- website/auth.py | 3 +- website/auth_pages.py | 2 +- website/classes.py | 2 +- website/feedback.py | 2 +- website/flask_helpers.py | 13 ++++ website/for_teachers.py | 2 +- website/parsons.py | 2 +- website/profile.py | 2 +- website/programs.py | 2 +- website/public_adventures.py | 2 +- website/tags.py | 2 +- website/user_activity.py | 2 +- website/yaml_file.py | 46 ++++++++++--- 29 files changed, 335 insertions(+), 109 deletions(-) create mode 100644 content/slides/slides.schema.json create mode 100644 tools/generate-yaml-schemas.py diff --git a/.gitignore b/.gitignore index f5d75585028..24cbb7f153a 100644 --- a/.gitignore +++ b/.gitignore @@ -189,4 +189,7 @@ file_logger.json # We submitted a PR to the package, but it's still wating to be approved # Until then, we can include it in .gitignore # More info in #4540 -foo.txt \ No newline at end of file +foo.txt + +# Generated schema json files +*.generated.schema.json diff --git a/app.py b/app.py index 1fac2140da7..42b5e0dfcf8 100644 --- a/app.py +++ b/app.py @@ -24,7 +24,8 @@ from flask import (Flask, Response, abort, after_this_request, g, jsonify, make_response, redirect, request, send_file, url_for, send_from_directory, session) -from flask_babel import Babel, gettext +from flask_babel import Babel +from website.flask_helpers import gettext_with_fallback as gettext from website.flask_commonmark import Commonmark from flask_compress import Compress from urllib.parse import quote_plus @@ -443,6 +444,10 @@ def before_request_https(): Compress(app) Commonmark(app) +# Explicitly substitute the flask gettext function with our custom definition which uses fallback languages +app.jinja_env.globals.update(_=gettext) + + # We don't need to log in offline mode if utils.is_offline_mode(): parse_logger = s3_logger.NullLogger() @@ -2313,7 +2318,6 @@ def favicon(): @app.route('/index.html') def main_page(): sections = hedyweb.PageTranslations('start').get_page_translations(g.lang)['home-sections'] - sections = sections[:] # Sections have 'title', 'text' diff --git a/build-tools/github/validate-yaml b/build-tools/github/validate-yaml index fab8a3f1777..8abdc9d9e47 100755 --- a/build-tools/github/validate-yaml +++ b/build-tools/github/validate-yaml @@ -23,16 +23,26 @@ echo "------> Validating YAML" # 'npx pajv validate' just hangs. Running the 'pajv' binary directly without the use of # 'npx' does work... so we're just going to ¯\_(ツ)_/¯ and do that. -all_schemas=$(find content -name \*.schema.json) +schemas=$(find content -name \*.schema.json) failures=false -for schema in $all_schemas; do +for schema in $schemas; do dir=$(dirname $schema) - echo "------> Validating $(basename $dir)" + + # The non-generated schema files that have required fields. They should be + # used for 'en.yaml' files. The generated schema files allow all fields to be + # optional, so they should be used to validate the rest of the yaml files. + if [[ $schema == *".generated."* ]]; then + files="*"; + echo "------> Validating with optional fields $(basename $dir)/*.yaml" + else + files="en"; + echo "------> Validating with required fields $(basename $dir)/en.yaml" + fi # Run the validator. - if ! check-jsonschema -o text --schemafile $schema $dir/*.yaml > validate.txt; then + if ! check-jsonschema -o text --schemafile $schema $dir/$files.yaml > validate.txt; then cat validate.txt || true failures=true fi diff --git a/content/adventures/adventures.schema.json b/content/adventures/adventures.schema.json index 3092a897f44..fe184d0ef16 100644 --- a/content/adventures/adventures.schema.json +++ b/content/adventures/adventures.schema.json @@ -3,20 +3,13 @@ "type": "object", "additionalProperties": false, "properties": { - "title": { - "type": "string", - "description": "Short title of the adventure" - }, - "subtitle": { - "type": "string", - "description": "Slightly longer introductory description of the adventure" - }, "adventures": { "type": "object", "description": "Individual adventures, key/value map", "additionalProperties": { "$ref": "#/definitions/Adventure" } } }, + "required": ["adventures"], "definitions": { "Adventure": { "type": "object", diff --git a/content/slides/id.yaml b/content/slides/id.yaml index faaace1957f..95143cf8dd4 100644 --- a/content/slides/id.yaml +++ b/content/slides/id.yaml @@ -130,7 +130,7 @@ levels: {print} What song would you like to hear? {ask} I like that song too! {print} Next up... {echo} - debug: true + debug: 'True' 13: header: Let the programming fun begin! text: Enjoy the adventures in level 1! @@ -199,7 +199,7 @@ levels: {print} I'll go get your donut. {sleep} {print} Here you go! A filling donut with toping! {ask} Have a nice day! - debug: true + debug: 'True' 8: header: Biarkan kesenangan pemrograman dimulai! text: Nikmati petualangan di level 2! @@ -264,7 +264,7 @@ levels: {print} or do you prefer... second_choice {at} {random} {remove} second_choice {to} music_genres {print} I like music_genre {random} best! - debug: true + debug: 'True' 8: header: Ayo mulai bekerja! text: Nikmati petualangan di level 3! @@ -279,7 +279,7 @@ levels: code: |- name {is} Sophie {print} My name is name - debug: true + debug: 'True' 3: header: Memperbaikinya dengan tanda kutip text: |- @@ -317,7 +317,7 @@ levels: Silakan coba mencetak kontraksi seperti "Anda" atau "Saya" pada layar di bawah ini dan lihat apa yang terjadi.... code: '{print} ''This won''t work!''' - debug: true + debug: 'True' 9: header: Jelas text: |- @@ -346,7 +346,7 @@ levels: colors {is} 'orange, silver, white, brown' {print} 'I love the colors {at} {random} one!' choice {is} {ask} Which one do you like? - debug: true + debug: 'True' 11: header: Siap, Bersiap, Ayo! text: Nikmati petualangan di level 4! @@ -367,7 +367,7 @@ levels: header: Jangan lupa untuk mencetak text: Saat menggunakan perintah `{if}`, jangan lupa untuk menggunakan perintah `{print}`. code: '{if} name {is} Hedy ''nice''' - debug: true + debug: 'True' 4: header: pula text: |- @@ -413,7 +413,7 @@ levels: item_to_declare {is} {ask} 'What would you like to declare' {else} Alright {print} Thank you. Please head to gate A22.' - debug: true + debug: 'True' 8: header: Ayo pergi! text: Nikmati petualangan di level 5! @@ -478,7 +478,7 @@ levels: {if} day {is} monday total_price = total_price * 0.25 {print} 'That will be total_price please' - debug: true + debug: 'True' 10: header: Ayo mulai bekerja! text: Nikmati petualangan di level 6! @@ -501,7 +501,7 @@ levels: header: Jangan lupa print command text: Saat menggunakan perintah ulangi, jangan lupa perintah `{print}`. code: '{repeat} 5 {times} ''Help!''' - debug: true + debug: 'True' 4: header: Ulangi perintah tanya text: Anda juga dapat mengulangi perintah `{ask}`, `{if}`, atau `{else}` beberapa kali. @@ -523,7 +523,7 @@ levels: {if} yes {print} 'Hurray! {else} 'That's a shame... Oh well... time to build a shelter and find some food.' - debug: true + debug: 'True' 6: header: Siap Berangkat! text: Nikmati petualangan di level 7! @@ -538,7 +538,7 @@ levels: Anda hanya dapat mengulang satu baris kode. code: '{repeat} 5 {times} {print} ''Help!''' - debug: true + debug: 'True' 3: header: '{repeat} perintah sebelumnya' text: |- @@ -567,7 +567,7 @@ levels: code: |- {if} name {is} Hedy {print} 'nice' {else} {print} 'boo!' - debug: true + debug: 'True' 6: header: jika dan yang lain sekarang text: |- @@ -610,7 +610,7 @@ levels: {print} You chose a round trip ticket' price * 2 {print} 'That will be ' price ' euros please' - debug: true + debug: 'True' 10: header: Mari kita lihat petualangannya! text: Nikmati petualangan di level 8! @@ -702,7 +702,7 @@ levels: {else} {print} 'Fun!' {print} 'Thanks for filling in the safety questions everyone. Enjoy your jump!' - debug: true + debug: 'True' 9: header: Ayo pergi! text: Nikmati petualangan di level 9! @@ -739,7 +739,7 @@ levels: {add} chosen_person {from} people {print} 'Come and watch our show tonight!' {print} 'Tickets are only available at the counter - debug: true + debug: 'True' 5: header: Saatnya memprogram! text: Nikmati petualangan di level 10! @@ -776,7 +776,7 @@ levels: {repeat} {for} numbers {in} {range} 1 {to} 10 {times} {print} This is the table of multiplications for factor {print} number ' x ' factor ' = ' i * factor - debug: true + debug: 'True' 5: header: Mari kita mulai pemrograman! text: Nikmati petualangan di level 11! @@ -841,7 +841,7 @@ levels: {call} new member {else} password = {ask} 'Please enter password' - debug: true + debug: 'True' 8: header: Siap mencobanya? text: Nikmati petualangan di level 12! @@ -951,7 +951,7 @@ levels: {call} happiness {with} person {else} mood = sad {define} sadness {to} name - debug: true + debug: 'True' 9: header: Ayo! text: Nikmati petualangan di level 13! @@ -1033,7 +1033,7 @@ levels: {print} 'Shame.. I wont buy it' {else} {print} 'I will buy it! Thank you!' - debug: true + debug: 'True' 7: header: Ayo mulai bekerja! text: Nikmati petualangan di level 14! diff --git a/content/slides/slides.schema.json b/content/slides/slides.schema.json new file mode 100644 index 00000000000..5462ab35bee --- /dev/null +++ b/content/slides/slides.schema.json @@ -0,0 +1,50 @@ +{ + "title": "JSON Schema for Hedy Slides", + "type": "object", + "additionalProperties": false, + "properties": { + "levels": { + "type": "object", + "description": "Levels with exercise for the Hedy Slides", + "additionalProperties": { + "$ref": "#/definitions/Level" + } + } + }, + "required": [ + "levels" + ], + "definitions": { + "Level": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/Exercise" + } + }, + "Exercise": { + "type": "object", + "properties": { + "header": { + "type": "string" + }, + "text": { + "type": "string" + }, + "editor": { + "type": "string" + }, + "code": { + "type": "string" + }, + "debug": { + "type": "string" + } + }, + "required": [ + "header", + "text" + ], + "additionalProperties": false + } + } +} diff --git a/content/tutorials/tutorials.schema.json b/content/tutorials/tutorials.schema.json index bcfba819a33..7205220e12b 100644 --- a/content/tutorials/tutorials.schema.json +++ b/content/tutorials/tutorials.schema.json @@ -6,6 +6,7 @@ "intro": { "$ref": "#/definitions/Tutorial" }, "teacher": { "$ref": "#/definitions/Tutorial" } }, + "required": ["intro", "teacher"], "definitions": { "Tutorial": { "type": "object", diff --git a/dodo.py b/dodo.py index 69818597d5f..74df9508793 100644 --- a/dodo.py +++ b/dodo.py @@ -318,6 +318,24 @@ def task_extract(): ) +def task_generate_optional_yaml_schemas(): + """ + Generate yaml schemas with all fields optional + """ + schemas = glob('content/*/*.schema.json') + + return dict( + title=lambda _: 'Generate optional yaml schemas', + file_dep=[ + 'tools/generate-yaml-schemas.py', + *schemas + ], + actions=[ + [python3, 'tools/generate-yaml-schemas.py'] + ] + ) + + def task_devserver(): """Run a copy of the development server. @@ -372,6 +390,7 @@ def task_backend(): return dict( actions=None, task_dep=[ + 'generate_optional_yaml_schemas', 'compile_babel', 'generate_static_babel_content', 'lark', diff --git a/hedy.py b/hedy.py index 797fbe49324..2301ff97edb 100644 --- a/hedy.py +++ b/hedy.py @@ -2,7 +2,7 @@ from functools import lru_cache import lark -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext from lark import Lark from lark.exceptions import UnexpectedEOF, UnexpectedCharacters, VisitError from lark import Tree, Transformer, visitors, v_args diff --git a/hedy_error.py b/hedy_error.py index dc504964be6..b1edc32b0e4 100644 --- a/hedy_error.py +++ b/hedy_error.py @@ -1,7 +1,7 @@ import hedy import hedy_translation import re -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext # TODO: we should not maintain a list like this. Translation of exception arguments should happen when the exception diff --git a/highlighting/generate-rules-highlighting.py b/highlighting/generate-rules-highlighting.py index 0ac6f3fa41d..cde1b7d0a91 100644 --- a/highlighting/generate-rules-highlighting.py +++ b/highlighting/generate-rules-highlighting.py @@ -1,10 +1,15 @@ import json -import os import regex as re -import yaml +from os import chdir, listdir, path from definition import TRANSLATE_WORDS +# Import packages from the website app (AutoPep8 will mess this up, so disable it) +import sys +sys.path.append(path.abspath(path.join(path.dirname(__file__), '..'))) # noqa +from website.yaml_file import YamlFile # noqa + + # destinations of files containing syntax highlighting rules OUTPUT_PATH_TRANSLATION = "highlighting/highlighting-trad.json" @@ -14,7 +19,7 @@ def main(): - os.chdir(os.path.dirname(__file__) + "/..") + chdir(path.dirname(__file__) + "/..") print("Generation of translations.....................", end="") language_keywords = get_translations(KEYWORDS_PATH, KEYWORDS_PATTERN) @@ -24,30 +29,6 @@ def main(): print(" Done !") -def get_yaml_content(file_name): - """Recover the content of YAML files - - For each yaml file, the function returns a dictionary - containing the contents of the file. - All keys and values are strings - - Arguments : - - file_name : str, The full path of the file - - Returns a dict. - """ - - try: - with open(file_name, newline="", encoding='utf-8') as keywords_file: - yaml_file = yaml.safe_load(keywords_file) - except Exception as e: - raise RuntimeError(f'Unable to read file {file_name}') from e - commandes = {} - for k in yaml_file: - commandes[str(k)] = str(yaml_file[k]) - return commandes - - def get_commands(language_code, keywords, keywords_ref, translate_words): """Create keyword translations @@ -55,7 +36,7 @@ def get_commands(language_code, keywords, keywords_ref, translate_words): with the translation of the keyword, usable by the regex Arguments : - - language_code : str, Language code (for execption creation) + - language_code : str, Language code (for exception creation) - keywords : str, The yaml content of the language you want to translate - keywords_ref : str, The content of the reference language yaml - translate_words : str, List of keywords to be translated @@ -111,14 +92,14 @@ def get_digits(keywords, keywords_ref): def get_translations(KEYWORDS_PATH, KEYWORDS_PATTERN): tmp = {} - list_language_file = os.listdir(KEYWORDS_PATH) + list_language_file = listdir(KEYWORDS_PATH) # get content for language_file in list_language_file: # Only check *.yaml files if m := re.search(KEYWORDS_PATTERN, language_file): language_code = m.group(1) - tmp[language_code] = get_yaml_content(os.path.join(KEYWORDS_PATH, language_file)) + tmp[language_code] = YamlFile.for_file(path.join(KEYWORDS_PATH, language_file)) # english is ref reference = tmp["en"] diff --git a/tests/tests_z_yamlfile.py b/tests/tests_z_yamlfile.py index f280bff2403..8aa96d4f93f 100644 --- a/tests/tests_z_yamlfile.py +++ b/tests/tests_z_yamlfile.py @@ -41,3 +41,76 @@ def test_load_yaml_equivalent(self): print( f'YAML loading takes {original_seconds / n} seconds, unpickling takes {cached_seconds / n}' f'({original_seconds / cached_seconds:.1f}x faster)') + + # Merging of YAML content + # Key of type dict + def test_merge_dicts_prefers_source(self): + result = YamlFile.merge_yaml({"key1": "source"}, {"key1": "fallback"}) + self.assertEqual({"key1": "source"}, result) + + def test_merge_dicts_uses_fallback_if_source_key_not_present(self): + result = YamlFile.merge_yaml({}, {"key1": "fallback"}) + self.assertEqual({"key1": "fallback"}, result) + + def test_merge_dicts_skips_key_if_not_present_in_fallback_empty(self): + result = YamlFile.merge_yaml({"key1": "source"}, {}) + self.assertEqual({}, result) + + def test_merge_dicts_skips_key_if_not_present_in_fallback(self): + result = YamlFile.merge_yaml({"key2": "source"}, {"key1": "fallback"}) + self.assertEqual({"key1": "fallback"}, result) + + # Key of type list + def test_merge_lists_prefers_source(self): + result = YamlFile.merge_yaml({"key1": ["a", "b"]}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["a", "b"]}, result) + + def test_merge_lists_skips_key_if_not_present_in_fallback(self): + result = YamlFile.merge_yaml({"key1": ["a", "b"]}, {}) + self.assertEqual({}, result) + + def test_merge_lists_uses_fallback_if_source_key_not_present_empty(self): + result = YamlFile.merge_yaml({}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["c", "d"]}, result) + + def test_merge_lists_uses_fallback_if_source_key_not_present(self): + result = YamlFile.merge_yaml({"key2": ["a", "b"]}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["c", "d"]}, result) + + # Elements in list + def test_merge_lists_values_prefers_source(self): + result = YamlFile.merge_yaml({"key1": [None, "b"]}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["c", "b"]}, result) + + def test_merge_lists_values_uses_fallback_if_value_is_empty(self): + result = YamlFile.merge_yaml({"key1": ["", "b"]}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["c", "b"]}, result) + + def test_merge_lists_values_prefers_len_of_fallback(self): + result = YamlFile.merge_yaml({"key1": ["a", "b", "e"]}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["a", "b"]}, result) + + def test_merge_lists_values_prefers_source_values(self): + result = YamlFile.merge_yaml({"key1": ["a"]}, {"key1": ["c", "d"]}) + self.assertEqual({"key1": ["a", "d"]}, result) + + # Keys with mismatched types + def test_merge_dicts_prefers_fallback_type_dict(self): + result = YamlFile.merge_yaml({"key1": ["a", "b"]}, {"key1": {"a": "c", "b": "d"}}) + self.assertEqual({"key1": {"a": "c", "b": "d"}}, result) + + def test_merge_dicts_prefers_fallback_type_str(self): + result = YamlFile.merge_yaml({"key1": ["a", "b"]}, {"key1": "string value"}) + self.assertEqual({"key1": "string value"}, result) + + def test_merge_dicts_prefers_fallback_type_list(self): + result = YamlFile.merge_yaml({"key1": {"a": "c", "b": "d"}}, {"key1": ["a", "b"]}) + self.assertEqual({"key1": ["a", "b"]}, result) + + def test_merge_dicts_prefers_fallback_type_string(self): + result = YamlFile.merge_yaml({"key1": {"a": "c", "b": "d"}}, {"key1": "string value"}) + self.assertEqual({"key1": "string value"}, result) + + def test_merge_dicts_prefers_fallback_type_bool(self): + result = YamlFile.merge_yaml({"key1": True}, {"key1": "string value"}) + self.assertEqual({"key1": True}, result) diff --git a/tools/check-yaml-structure.py b/tools/check-yaml-structure.py index ff210e9bac5..64905c074f7 100644 --- a/tools/check-yaml-structure.py +++ b/tools/check-yaml-structure.py @@ -15,7 +15,7 @@ def main(): en = load_yaml(reference_file) structure_dir = path.basename(path.dirname(reference_file)) - mismatches = {comparison_file: find_mismatched_arrays(en, load_yaml(comparison_file)) + mismatches = {comparison_file: find_mismatched_types(en, load_yaml(comparison_file)) for comparison_file in glob.glob(f'content/{structure_dir}/*.yaml') if comparison_file != reference_file} mismatches = {file: mis for file, mis in mismatches.items() if mis} @@ -24,9 +24,11 @@ def main(): any_failure = True print(f'==================== {path.dirname(reference_file)} =======================') - print(' Different array lengths between English and other languages.') - print(' Please make the arrays the same by copying the new English content') - print(' to the right places in the other files.') + print('The script ensures that the structure of the language yamls is a subset of the English yaml.') + print('It checks that the following are true:') + print(' - properties are of the same type (e.g. list, dict)') + print(' - a list in the language yaml does not have more elements than the English yaml') + print(' - a dict in the language yaml does not have a key that is not present in the English yaml') print() # If there are many mismatches, the most natural way to present this information @@ -38,13 +40,15 @@ def main(): for lang_file, mis_by_path in mismatches.items() if mis_by_path.get(p)} first_mis = mis_by_file[next(iter(mis_by_file.keys()))] - print(f'---------------[ Path in YAML: {p} ]---------------------') - print(f'File: {reference_file} ({len(first_mis.left)} elements)') + print(f'---------------[ Path in YAML: {p}, Error: {first_mis.err} ]---------------------') + print(f'{first_mis.msg}') + print('') + print(f'Reference file: {reference_file}') print('') print(indent(4, yaml_to_string(shortened(first_mis.left)))) for file, mis in mis_by_file.items(): - print(f'File: {file} ({len(mis.right)} elements)') + print(f'Language file: {file}') print('') print(indent(4, yaml_to_string(shortened(mis.right)))) @@ -52,10 +56,10 @@ def main(): return 1 if any_failure else 0 -Mismatch = collections.namedtuple('Mismatch', ('left', 'right')) +Mismatch = collections.namedtuple('Mismatch', ('left', 'right', 'err', 'msg')) -def find_mismatched_arrays(reference, other): +def find_mismatched_types(reference, other): """Recurse through the given structure and find mismatched arrays. Disregard mismatched structure, if types aren't correct or not all @@ -65,23 +69,42 @@ def find_mismatched_arrays(reference, other): ret = {} def recurse(ref, oth, p): - if isinstance(ref, dict) and isinstance(oth, dict): - for key in set(ref.keys()) & set(oth.keys()): - recurse(ref[key], oth[key], p + [f'.{key}']) - return - - if isinstance(ref, list) and isinstance(oth, list): - if len(ref) != len(oth): - ret[''.join(p)] = Mismatch(ref, oth) + + if isinstance(ref, dict) and oth: + if not isinstance(oth, dict): + pth = ''.join(p) + ret[pth] = Mismatch(ref, oth, 'Type mismatch', (f'The path {pth} is of type dict in the reference file' + f'but not in the lang file.')) + else: + exk = set(oth.keys()) - set(ref.keys()) + for e in exk: + pth = ''.join(p + [e]) + ret[pth] = Mismatch(ref, oth, 'Extra keys in dict', (f'The path {pth} is a dict that contains more' + f'keys than the reference file.')) + for key in set(ref.keys()) & set(oth.keys()): + recurse(ref[key], oth[key], p + [f'.{key}']) + elif isinstance(ref, list) and oth: + if not isinstance(oth, list): + pth = ''.join(p) + ret[pth] = Mismatch(ref, oth, 'Type mismatch', (f'The path {pth} is of type list in the reference file' + f'but not in the lang file.')) else: - for i in range(min(len(ref), len(oth))): - recurse(ref[i], oth[i], p + [f'[{i}]']) - return + if len(ref) < len(oth): + pth = ''.join(p) + ret[pth] = Mismatch(ref, oth, 'Array length mismatch', (f'The path {pth} is a list with more' + f'elements than in the reference file.')) + else: + for i in range(min(len(ref), len(oth))): + recurse(ref[i], oth[i], p + [f'[{i}]']) recurse(reference, other, []) return ret +def diff_type(ref, oth, cl): + return (isinstance(ref, cl) and not isinstance(oth, cl)) or (not isinstance(ref, cl) and isinstance(oth, cl)) + + def shortened(obj, depth=2): """Recurse through the given structure and make strings shorter for printing, as well as stopping recursion after a certain limit. diff --git a/tools/generate-yaml-schemas.py b/tools/generate-yaml-schemas.py new file mode 100644 index 00000000000..438ae8e5960 --- /dev/null +++ b/tools/generate-yaml-schemas.py @@ -0,0 +1,26 @@ +from glob import glob +import json + + +def main(): + schemas = glob('../content/*/*.schema.json') + for schema_filename in schemas: + with open(schema_filename, 'r', encoding='utf-8') as file: + schema = json.load(file) + + schema_with_optional_fields = remove_required_fields(schema) + + output_filename = schema_filename.replace('.schema.json', '.generated.schema.json') + with open(output_filename, 'w', encoding='utf-8') as output_file: + json.dump(schema_with_optional_fields, output_file) + + +def remove_required_fields(schema): + if not isinstance(schema, dict): + return schema + else: + return {key: remove_required_fields(value) for key, value in schema.items() if key != 'required'} + + +if __name__ == "__main__": + main() diff --git a/utils.py b/utils.py index 4bd5c7c1f05..661b7ed6505 100644 --- a/utils.py +++ b/utils.py @@ -18,7 +18,8 @@ import collections from email_validator import EmailNotValidError, validate_email -from flask_babel import gettext, format_date, format_datetime, format_timedelta +from flask_babel import format_date, format_datetime, format_timedelta +from website.flask_helpers import gettext_with_fallback as gettext from ruamel import yaml import commonmark diff --git a/website/admin.py b/website/admin.py index 704552a90d4..22a7822e4cd 100644 --- a/website/admin.py +++ b/website/admin.py @@ -1,5 +1,5 @@ from flask import make_response, request -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import utils from website.flask_helpers import render_template diff --git a/website/auth.py b/website/auth.py index 14803f70028..96d6793211e 100644 --- a/website/auth.py +++ b/website/auth.py @@ -11,7 +11,8 @@ from botocore.exceptions import ClientError as email_error from botocore.exceptions import NoCredentialsError from flask import g, request, session, redirect -from flask_babel import force_locale, gettext +from flask_babel import force_locale +from website.flask_helpers import gettext_with_fallback as gettext import utils from config import config diff --git a/website/auth_pages.py b/website/auth_pages.py index 2e22abd40b1..d708fdad6d7 100644 --- a/website/auth_pages.py +++ b/website/auth_pages.py @@ -1,7 +1,7 @@ import datetime from flask import make_response, redirect, request, session -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext from config import config from safe_format import safe_format diff --git a/website/classes.py b/website/classes.py index 159cf7de954..59487aa3a27 100644 --- a/website/classes.py +++ b/website/classes.py @@ -2,7 +2,7 @@ from flask import make_response, redirect, request, session from jinja_partials import render_partial -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import utils from config import config diff --git a/website/feedback.py b/website/feedback.py index 4a73c979202..42474d3a3a5 100644 --- a/website/feedback.py +++ b/website/feedback.py @@ -1,6 +1,6 @@ from flask import request, make_response, render_template -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import json import uuid from collections import defaultdict diff --git a/website/flask_helpers.py b/website/flask_helpers.py index 585e75d157e..cca77e0fe7a 100644 --- a/website/flask_helpers.py +++ b/website/flask_helpers.py @@ -3,6 +3,7 @@ from . import querylog import flask +from flask_babel import force_locale, gettext from flask.json.provider import JSONProvider from jinja2 import Undefined @@ -31,6 +32,18 @@ def proper_tojson(x): return proper_json_dumps(x) +def gettext_with_fallback(x): + if flask.session: + locale = flask.session['lang'] + else: + locale = 'en' + res = gettext(x) + if locale != 'en' and res == x: + with force_locale('en'): + res = gettext(x) + return res + + class EnhancedJSONEncoder(json.JSONEncoder): def default(self, o): if dataclasses.is_dataclass(o): diff --git a/website/for_teachers.py b/website/for_teachers.py index cecd06ce41c..40debb2bdb6 100644 --- a/website/for_teachers.py +++ b/website/for_teachers.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup from flask import g, make_response, request, session, url_for, redirect from jinja_partials import render_partial -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import jinja_partials import hedy diff --git a/website/parsons.py b/website/parsons.py index c131d5ec196..898cf26e9de 100644 --- a/website/parsons.py +++ b/website/parsons.py @@ -1,5 +1,5 @@ from flask import g, make_response -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext from .website_module import WebsiteModule, route diff --git a/website/profile.py b/website/profile.py index fa27bbd4d6f..a68dbd1eadd 100644 --- a/website/profile.py +++ b/website/profile.py @@ -3,7 +3,7 @@ import requests from flask import make_response, request, session -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext from safe_format import safe_format from hedy_content import ALL_KEYWORD_LANGUAGES, ALL_LANGUAGES, COUNTRIES diff --git a/website/programs.py b/website/programs.py index 4fac625aa92..7e93d84ffaf 100644 --- a/website/programs.py +++ b/website/programs.py @@ -3,7 +3,7 @@ from typing import Optional from flask import g, make_response, request -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import jinja_partials import hedy_content diff --git a/website/public_adventures.py b/website/public_adventures.py index b7abf235627..9e19c3d9191 100644 --- a/website/public_adventures.py +++ b/website/public_adventures.py @@ -1,6 +1,6 @@ import uuid from flask import g, request, make_response -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import json import hedy diff --git a/website/tags.py b/website/tags.py index 8a8fd8ba455..5c9c9590878 100644 --- a/website/tags.py +++ b/website/tags.py @@ -1,5 +1,5 @@ from flask import make_response, request, g -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import jinja_partials import uuid diff --git a/website/user_activity.py b/website/user_activity.py index ea1560e93a0..5b5cbd68a03 100644 --- a/website/user_activity.py +++ b/website/user_activity.py @@ -1,5 +1,5 @@ -from flask_babel import gettext +from website.flask_helpers import gettext_with_fallback as gettext import os from flask import make_response, request, session diff --git a/website/yaml_file.py b/website/yaml_file.py index bb111833d1f..d5e5bf9be25 100644 --- a/website/yaml_file.py +++ b/website/yaml_file.py @@ -59,14 +59,7 @@ def for_file(filename): return YAML_FILES_CACHE[filename] def __init__(self, filename): - """Create a new YamlFile for the given filename. - - try_pickle controls on whether we pickle or not. Can be - `True`, `False` or `None` -- in case of `None` pickling is - determined automatically based on whether or not we appear - to be running on Heroku. We don't pickle on dev workstations - because it creates a mess of files. - """ + """Create a new YamlFile for the given filename.""" self.filename = filename self.pickle_filename = path.join(tempfile.gettempdir(), 'hedy_pickles', f"{pathname_slug(self.filename)}.pickle") @@ -138,12 +131,47 @@ def load_pickle(self): @querylog.timed_as('load_yaml_uncached') def load_uncached(self): """Load the source YAML file.""" + file = self._load_yaml(self.filename) + base_filename = path.join(path.dirname(self.filename), 'en.yaml') + if self.filename != base_filename: + base_file = self._load_yaml(base_filename) + merged = self.merge_yaml(file, base_file) + return merged + return file + + def _load_yaml(self, filename): try: - with open(self.filename, "r", encoding="utf-8") as f: + with open(filename, 'r', encoding="utf-8") as f: return yaml_loader.load(f) except IOError: return {} + @staticmethod + def merge_yaml(source, fallback): + """Merge the language file with the fallback file. + + The source yaml is merged with the fallback yaml, so that the values missing in the source are taken from + the fallback. Note that if a mismatch occurs (the property is a list in the source but a dict in the fallback, + or the source file has an array with more elements than its counterpart in the target file), the fallback + content is preferred and the source content is discarded.""" + if source and isinstance(source, dict): + if not isinstance(fallback, dict): + return fallback + for key, value in source.items(): + if key in fallback: + returned = YamlFile.merge_yaml(value, fallback.get(key)) + fallback[key] = returned + elif source and isinstance(source, list): + if not isinstance(fallback, list): + return fallback + for i, value in enumerate(source): + if i < len(fallback): + returned = YamlFile.merge_yaml(value, fallback[i]) + fallback[i] = returned + else: + return source if source else fallback + return fallback + def _file_timestamp(self, filename): try: return os.stat(filename).st_mtime