From 3a21a78a195faad150eaf35abe43319ec4ad1b99 Mon Sep 17 00:00:00 2001 From: axif Date: Thu, 17 Oct 2024 00:26:17 +0600 Subject: [PATCH 1/4] added check for sparql and for json in workflow --- .../check/check_project_structure.py | 77 +++++++++++++++---- ...ition.sparql => query_prepositions.sparql} | 0 ...jective.sparql => query_adjectives.sparql} | 0 ...ery_adverb.sparql => query_adverbs.sparql} | 0 ...ition.sparql => query_prepositions.sparql} | 0 ...ery_adverb.sparql => query_adverbs.sparql} | 0 ...jective.sparql => query_adjectives.sparql} | 0 .../{query_verb.sparql => query_verbs.sparql} | 0 8 files changed, 60 insertions(+), 17 deletions(-) rename src/scribe_data/language_data_extraction/Finnish/prepositions/{query_preposition.sparql => query_prepositions.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/adjectives/{query_adjective.sparql => query_adjectives.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/adverbs/{query_adverb.sparql => query_adverbs.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/prepositions/{query_preposition.sparql => query_prepositions.sparql} (100%) rename src/scribe_data/language_data_extraction/Swahili/adverbs/{query_adverb.sparql => query_adverbs.sparql} (100%) rename src/scribe_data/language_data_extraction/Yoruba/adjectives/{query_adjective.sparql => query_adjectives.sparql} (100%) rename src/scribe_data/language_data_extraction/Yoruba/verbs/{query_verb.sparql => query_verbs.sparql} (100%) diff --git a/src/scribe_data/check/check_project_structure.py b/src/scribe_data/check/check_project_structure.py index 4dcb21e32..0e1e8cd26 100644 --- a/src/scribe_data/check/check_project_structure.py +++ b/src/scribe_data/check/check_project_structure.py @@ -72,9 +72,64 @@ BASE_DIR = "../language_data_extraction" +def check_data_type_folders(path, language, subdir, errors): + """ + Validate the contents of data type folders within a language directory. + + This function checks each data type folder for the presence of expected files + and reports any unexpected files. It allows for multiple SPARQL query files, + a format Python file, and a queried JSON file for each data type. + + Args: + path (str): The path to the directory containing data type folders. + language (str): The name of the language being processed. + subdir (str or None): The name of the sub-directory (for languages with sub-dialects), or None. + errors (list): A list to which error messages will be appended. + + The function checks for the following valid files in each data type folder: + - Files starting with 'query_' and ending with '.sparql' + - A 'format_{data_type}.py' file + - A '{data_type}_queried.json' file + + It skips validation for the 'emoji_keywords' data type folder. + + Any files not matching these patterns (except '__init__.py') are reported as unexpected. + """ + for item in os.listdir(path): + item_path = os.path.join(path, item) + if os.path.isfile(item_path) and item != "__init__.py": + errors.append(f"Unexpected file found in {language}/{subdir or ''}: {item}") + elif os.path.isdir(item_path): + if item not in DATA_TYPES: + errors.append( + f"Unexpected directory found in {language}/{subdir or ''}: {item}" + ) + else: + # Skip validation for emoji_keywords + if item == "emoji_keywords": + continue + + # Check for correctly formatted files + valid_files = [ + f + for f in os.listdir(item_path) + if (f.startswith(f"query_{item}") and f.endswith(".sparql")) + or f == f"format_{item}.py" + or f == f"{item}_queried.json" + ] + + for file in os.listdir(item_path): + if file not in valid_files and file != "__init__.py": + errors.append( + f"Unexpected file in {language}/{subdir or ''}/{item}: {file}" + ) + + def validate_project_structure(): """ - Validate that all directories follow the expected project structure and check for unexpected files and directories.""" + Validate that all directories follow the expected project structure and check for unexpected files and directories. + Also validate SPARQL query file names in data_type folders and SUBDIRECTORIES. + """ errors = [] if not os.path.exists(BASE_DIR): @@ -129,22 +184,10 @@ def validate_project_structure(): for subdir in expected_subdirs: subdir_path = os.path.join(language_path, subdir) if os.path.exists(subdir_path): - for item in os.listdir(subdir_path): - item_path = os.path.join(subdir_path, item) - if os.path.isfile(item_path) and item != "__init__.py": - errors.append( - f"Unexpected file found in {language}/{subdir}: {item}" - ) - - elif os.path.isdir(item_path) and item not in DATA_TYPES: - errors.append( - f"Unexpected directory found in {language}/{subdir}: {item}" - ) - - elif unexpected_data_types := found_subdirs - DATA_TYPES: - errors.append( - f"Unexpected subdirectories in '{language}': {unexpected_data_types}" - ) + check_data_type_folders(subdir_path, language, subdir, errors) + + else: + check_data_type_folders(language_path, language, None, errors) if errors: print("Errors found:") diff --git a/src/scribe_data/language_data_extraction/Finnish/prepositions/query_preposition.sparql b/src/scribe_data/language_data_extraction/Finnish/prepositions/query_prepositions.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Finnish/prepositions/query_preposition.sparql rename to src/scribe_data/language_data_extraction/Finnish/prepositions/query_prepositions.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjective.sparql b/src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjectives.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjective.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjectives.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverb.sparql b/src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverbs.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverb.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverbs.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_preposition.sparql b/src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_prepositions.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_preposition.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_prepositions.sparql diff --git a/src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverb.sparql b/src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverbs.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverb.sparql rename to src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverbs.sparql diff --git a/src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjective.sparql b/src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjectives.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjective.sparql rename to src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjectives.sparql diff --git a/src/scribe_data/language_data_extraction/Yoruba/verbs/query_verb.sparql b/src/scribe_data/language_data_extraction/Yoruba/verbs/query_verbs.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Yoruba/verbs/query_verb.sparql rename to src/scribe_data/language_data_extraction/Yoruba/verbs/query_verbs.sparql From 81e040086e3c315c82e17c0ad6c47dc4a5a4cec3 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Thu, 17 Oct 2024 00:25:21 +0200 Subject: [PATCH 2/4] Switch name back to then merge in main --- .../{query_prepositions.sparql => query_preposition.sparql} | 0 .../{query_adjectives.sparql => query_adjective.sparql} | 0 .../adverbs/{query_adverbs.sparql => query_adverb.sparql} | 0 .../{query_prepositions.sparql => query_preposition.sparql} | 0 .../Swahili/adverbs/{query_adverbs.sparql => query_adverb.sparql} | 0 .../{query_adjectives.sparql => query_adjective.sparql} | 0 .../Yoruba/verbs/{query_verbs.sparql => query_verb.sparql} | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename src/scribe_data/language_data_extraction/Finnish/prepositions/{query_prepositions.sparql => query_preposition.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/adjectives/{query_adjectives.sparql => query_adjective.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/adverbs/{query_adverbs.sparql => query_adverb.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/prepositions/{query_prepositions.sparql => query_preposition.sparql} (100%) rename src/scribe_data/language_data_extraction/Swahili/adverbs/{query_adverbs.sparql => query_adverb.sparql} (100%) rename src/scribe_data/language_data_extraction/Yoruba/adjectives/{query_adjectives.sparql => query_adjective.sparql} (100%) rename src/scribe_data/language_data_extraction/Yoruba/verbs/{query_verbs.sparql => query_verb.sparql} (100%) diff --git a/src/scribe_data/language_data_extraction/Finnish/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Finnish/prepositions/query_preposition.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Finnish/prepositions/query_prepositions.sparql rename to src/scribe_data/language_data_extraction/Finnish/prepositions/query_preposition.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjective.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjectives.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjective.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverb.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverbs.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverb.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_prepositions.sparql b/src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_preposition.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_prepositions.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_preposition.sparql diff --git a/src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverbs.sparql b/src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverb.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverbs.sparql rename to src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverb.sparql diff --git a/src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjectives.sparql b/src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjective.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjectives.sparql rename to src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjective.sparql diff --git a/src/scribe_data/language_data_extraction/Yoruba/verbs/query_verbs.sparql b/src/scribe_data/language_data_extraction/Yoruba/verbs/query_verb.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Yoruba/verbs/query_verbs.sparql rename to src/scribe_data/language_data_extraction/Yoruba/verbs/query_verb.sparql From 6cc1b79366c5f6f715d2880fd0e264f6d4d566c8 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Thu, 17 Oct 2024 00:26:47 +0200 Subject: [PATCH 3/4] Correct file names --- .../{query_preposition.sparql => query_prepositions.sparql} | 0 .../{query_adjective.sparql => query_adjectives.sparql} | 0 .../adverbs/{query_adverb.sparql => query_adverbs.sparql} | 0 .../{query_preposition.sparql => query_prepositions.sparql} | 0 .../Swahili/adverbs/{query_adverb.sparql => query_adverbs.sparql} | 0 .../{query_adjective.sparql => query_adjectives.sparql} | 0 .../Yoruba/verbs/{query_verb.sparql => query_verbs.sparql} | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename src/scribe_data/language_data_extraction/Finnish/prepositions/{query_preposition.sparql => query_prepositions.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/adjectives/{query_adjective.sparql => query_adjectives.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/adverbs/{query_adverb.sparql => query_adverbs.sparql} (100%) rename src/scribe_data/language_data_extraction/Kurmanji/prepositions/{query_preposition.sparql => query_prepositions.sparql} (100%) rename src/scribe_data/language_data_extraction/Swahili/adverbs/{query_adverb.sparql => query_adverbs.sparql} (100%) rename src/scribe_data/language_data_extraction/Yoruba/adjectives/{query_adjective.sparql => query_adjectives.sparql} (100%) rename src/scribe_data/language_data_extraction/Yoruba/verbs/{query_verb.sparql => query_verbs.sparql} (100%) diff --git a/src/scribe_data/language_data_extraction/Finnish/prepositions/query_preposition.sparql b/src/scribe_data/language_data_extraction/Finnish/prepositions/query_prepositions.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Finnish/prepositions/query_preposition.sparql rename to src/scribe_data/language_data_extraction/Finnish/prepositions/query_prepositions.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjective.sparql b/src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjectives.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjective.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/adjectives/query_adjectives.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverb.sparql b/src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverbs.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverb.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/adverbs/query_adverbs.sparql diff --git a/src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_preposition.sparql b/src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_prepositions.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_preposition.sparql rename to src/scribe_data/language_data_extraction/Kurmanji/prepositions/query_prepositions.sparql diff --git a/src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverb.sparql b/src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverbs.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverb.sparql rename to src/scribe_data/language_data_extraction/Swahili/adverbs/query_adverbs.sparql diff --git a/src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjective.sparql b/src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjectives.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjective.sparql rename to src/scribe_data/language_data_extraction/Yoruba/adjectives/query_adjectives.sparql diff --git a/src/scribe_data/language_data_extraction/Yoruba/verbs/query_verb.sparql b/src/scribe_data/language_data_extraction/Yoruba/verbs/query_verbs.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Yoruba/verbs/query_verb.sparql rename to src/scribe_data/language_data_extraction/Yoruba/verbs/query_verbs.sparql From 6f534115f06922b62992a243f03eb05b1fe55e11 Mon Sep 17 00:00:00 2001 From: Andrew Tavis McAllister Date: Thu, 17 Oct 2024 00:30:26 +0200 Subject: [PATCH 4/4] Fix file name and edit error output --- src/scribe_data/check/check_project_structure.py | 3 ++- .../{query_nouns.sparql => query_proper_nouns.sparql} | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename src/scribe_data/language_data_extraction/Esperanto/proper_nouns/{query_nouns.sparql => query_proper_nouns.sparql} (100%) diff --git a/src/scribe_data/check/check_project_structure.py b/src/scribe_data/check/check_project_structure.py index 0e1e8cd26..832ed4419 100644 --- a/src/scribe_data/check/check_project_structure.py +++ b/src/scribe_data/check/check_project_structure.py @@ -120,8 +120,9 @@ def check_data_type_folders(path, language, subdir, errors): for file in os.listdir(item_path): if file not in valid_files and file != "__init__.py": + error_subdir = f"{subdir}/" or "" errors.append( - f"Unexpected file in {language}/{subdir or ''}/{item}: {file}" + f"Unexpected file in {language}/{error_subdir}{item}: {file}" ) diff --git a/src/scribe_data/language_data_extraction/Esperanto/proper_nouns/query_nouns.sparql b/src/scribe_data/language_data_extraction/Esperanto/proper_nouns/query_proper_nouns.sparql similarity index 100% rename from src/scribe_data/language_data_extraction/Esperanto/proper_nouns/query_nouns.sparql rename to src/scribe_data/language_data_extraction/Esperanto/proper_nouns/query_proper_nouns.sparql