Merge pull request #204 from mhmohona/Interactive

Fix more bug in CLI - Interactive Learning module
scribe-org · Sep 26, 2024 · baab052 · baab052
2 parents 53688c2 + c1cbd0c
commit baab052
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 68 deletions.
diff --git a/src/scribe_data/cli/convert.py b/src/scribe_data/cli/convert.py
@@ -29,27 +29,26 @@
 from scribe_data.cli.cli_utils import language_map
 from scribe_data.load.data_to_sqlite import data_to_sqlite
 from scribe_data.utils import (
-    DEFAULT_JSON_EXPORT_DIR,
     DEFAULT_SQLITE_EXPORT_DIR,
     get_language_iso,
 )
 
-DATA_DIR = Path(DEFAULT_JSON_EXPORT_DIR)
-
 
 def export_json(
     language: str, data_type: str, output_dir: Path, overwrite: bool
 ) -> None:
     normalized_language = language_map.get(language.lower())
-    language_capitalized = language.capitalize()
 
     if not normalized_language:
-        raise ValueError(f"Language '{language_capitalized}' is not recognized.")
+        raise ValueError(f"Language '{language.capitalize()}' is not recognized.")
 
+    data_type = data_type[0] if isinstance(data_type, list) else data_type
     data_file = (
-        DATA_DIR / normalized_language["language"].capitalize() / f"{data_type}.json"
+        output_dir / normalized_language["language"].capitalize() / f"{data_type}.json"
     )
 
+    print(data_file)
+
     if not data_file.exists():
         print(
             f"No data found for language '{normalized_language['language']}' and data type '{data_type}'."
@@ -64,11 +63,7 @@ def export_json(
         print(f"Error reading '{data_file}': {e}")
         return
 
-    json_output_dir = (
-        output_dir
-        / DEFAULT_JSON_EXPORT_DIR
-        / normalized_language["language"].capitalize()
-    )
+    json_output_dir = output_dir / normalized_language["language"].capitalize()
     json_output_dir.mkdir(parents=True, exist_ok=True)
 
     output_file = json_output_dir / f"{data_type}.json"
@@ -80,12 +75,13 @@ def export_json(
 
     try:
         with output_file.open("w") as file:
-            json.dump(data, file, indent=2)
+            json.dump(data, file, indent=0)
+
     except IOError as e:
         raise IOError(f"Error writing to '{output_file}': {e}") from e
 
     print(
-        f"Data for language '{normalized_language['language']}' and data type '{data_type}' written to '{output_file}'"
+        f"Data for {normalized_language['language'].capitalize()} {data_type} written to {output_file}"
     )
 
 
@@ -98,12 +94,20 @@ def convert_to_csv_or_tsv(
         return
 
     for dtype in data_type:
+        # Replace non-JSON default paths with JSON path for where exported data is.
         file_path = (
-            DATA_DIR / normalized_language["language"].capitalize() / f"{dtype}.json"
+            Path(
+                str(output_dir)
+                .replace("scribe_data_csv_export", "scribe_data_json_export")
+                .replace("scribe_data_tsv_export", "scribe_data_json_export")
+            )
+            / normalized_language["language"].capitalize()
+            / f"{dtype}.json"
         )
         if not file_path.exists():
-            print(f"No data found for {dtype} conversion at '{file_path}'.")
-            continue
+            raise FileNotFoundError(
+                f"No data found for {dtype} conversion at '{file_path}'."
+            )
 
         try:
             with file_path.open("r") as f:

diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
@@ -43,17 +43,11 @@ def get_data(
     """
     Function for controlling the data get process for the CLI.
     """
-    if not outputs_per_entry and (data_type in ["emoji-keywords", "emoji_keywords"]):
-        print(
-            "\nNo value set for 'outputs-per-entry'. Setting a default value of 3 outputs per entry.\n"
-        )
-        outputs_per_entry = 3
-
     languages = [language] if language else None
 
     if all:
         print("Updating all languages and data types ...")
-        query_data()
+        query_data(None, None, overwrite)
 
     elif data_type in ["emoji-keywords", "emoji_keywords"]:
         for lang in languages:
@@ -80,17 +74,19 @@ def get_data(
             os.system(f"python3 {translation_generation_script}")
 
     elif language or data_type:
+        data_type = data_type[0] if isinstance(data_type, list) else data_type
+
         data_type = [data_type] if data_type else None
         print(f"Updating data for language: {language}, data type: {data_type}")
-        query_data(languages, data_type)
+        query_data(languages, data_type, overwrite)
 
     else:
         raise ValueError(
             "You must provide either at least one of the --language (-l) or --data-type (-dt) options, or use --all (-a)."
         )
 
     if output_dir:
-        output_dir = Path(output_dir)
+        output_dir = Path(output_dir).resolve()
         if not output_dir.exists():
             output_dir.mkdir(parents=True, exist_ok=True)
 

diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py
@@ -164,16 +164,6 @@ def run_interactive_mode():
     selected_data_types = select_data_types()
     output_options = get_output_options()
 
-    if len(selected_languages) == 1:
-        print(
-            f"\nGetting {', '.join(selected_data_types)} for {', '.join(selected_languages)}."
-        )
-
-    else:
-        print(
-            f"\nQuerying {', '.join(selected_data_types)} for {', '.join(selected_languages)} languages."
-        )
-
     print(
         f"Data will be exported as {output_options['type'].upper()} files to '{output_options['dir']}'."
     )

diff --git a/src/scribe_data/translation/translation_utils.py b/src/scribe_data/translation/translation_utils.py
@@ -144,7 +144,7 @@ def translation_interrupt_handler(source_language, translations):
         "w",
         encoding="utf-8",
     ) as file:
-        json.dump(translations, file, ensure_ascii=False, indent=4)
+        json.dump(translations, file, ensure_ascii=False, indent=0)
 
     print("The current progress is saved to the translations.json file.")
     exit()
@@ -238,7 +238,7 @@ def translate_to_other_languages(
             "w",
             encoding="utf-8",
         ) as file:
-            file.write(json.dumps(translations, ensure_ascii=False, indent=2))
+            file.write(json.dumps(translations, ensure_ascii=False, indent=0))
             file.write("\n")
 
     print(

diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
@@ -264,7 +264,9 @@ def load_queried_data(language: str, data_type: str) -> tuple[Any, bool, str]:
         return json.load(f), data_path
 
 
-def export_formatted_data(formatted_data: dict, language: str, data_type: str) -> None:
+def export_formatted_data(
+    formatted_data: dict, language: str, data_type: str, query_data_in_use: bool = False
+) -> None:
     """
     Exports formatted data to a JSON file for a specific language and data type.
 

diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py
@@ -31,7 +31,7 @@
 from scribe_data.wikidata.wikidata_utils import sparql
 
 
-def query_data(languages=None, word_types=None):
+def query_data(languages=None, word_types=None, overwrite=None):
     SCRIBE_DATA_SRC_PATH = Path(__file__).parent.parent
     PATH_TO_LANGUAGE_EXTRACTION_FILES = (
         SCRIBE_DATA_SRC_PATH / "language_data_extraction"
@@ -69,8 +69,6 @@ def query_data(languages=None, word_types=None):
     ]
     queries_to_run = sorted(queries_to_run)
 
-    print(queries_to_run)
-
     # Run queries and format data.
     for q in tqdm(
         queries_to_run,
@@ -87,33 +85,35 @@ def query_data(languages=None, word_types=None):
         file_name = f"{target_type}.json"
 
         if existing_files := list(export_dir.glob(f"{target_type}*.json")):
-            print(
-                f"Existing file(s) found for {lang} {target_type} in the outputs directory:\n"
-            )
-            for i, file in enumerate(existing_files, 1):
-                print(f"{i}. {file.name}")
-
-            # choice = input(
-            #     "\nChoose an option:\n1. Overwrite existing (press 'o')\n2. Keep all (press 'k')\n3. Skip process (press anything else)\nEnter your choice: "
-            # )
-            choice = input(
-                "\nChoose an option:\n1. Overwrite existing data (press 'o')\n2. Skip process (press anything else)\nEnter your choice: "
-            )
-
-            print(f"You entered: {choice}")
-
-            if choice in ["o", "O"]:
-                print("Removing existing files...")
+            if overwrite:
+                print("Overwrite is enabled. Removing existing files...")
                 for file in existing_files:
                     file.unlink()
-
-            # elif choice in ["k", "K"]:
-            #     timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
-            #     file_name = f"{target_type}_{timestamp}.json"
-
             else:
-                print(f"Skipping update for {lang} {target_type}.")
-                continue
+                print(
+                    f"\nExisting file(s) found for {lang} {target_type} in the outputs directory:\n"
+                )
+                for i, file in enumerate(existing_files, 1):
+                    print(f"{i}. {file.name}")
+                # choice = input(
+                #     "\nChoose an option:\n1. Overwrite existing (press 'o')\n2. Keep all (press 'k')\n3. Skip process (press anything else)\nEnter your choice: "
+                # )
+                choice = input(
+                    "\nChoose an option:\n1. Overwrite existing data (press 'o')\n2. Skip process (press anything else)\nEnter your choice: "
+                )
+
+                print(f"You entered: {choice}")
+
+                if choice.lower() == "o":
+                    print("Removing existing files...")
+                    for file in existing_files:
+                        file.unlink()
+                # elif choice in ["k", "K"]:
+                #     timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
+                #     file_name = f"{target_type}_{timestamp}.json"
+                else:
+                    print(f"Skipping update for {lang} {target_type}.")
+                    continue
 
         file_path = export_dir / file_name
         print(f"Querying and formatting {lang} {target_type}")

diff --git a/tests/cli/test_get.py b/tests/cli/test_get.py
@@ -35,9 +35,9 @@ def test_get_command(
         self, mock_system, mock_convert, mock_export_json, mock_query_data
     ):
         expected_calls = [
-            call(["English"], ["nouns"]),
-            call(["English"], ["nouns"]),
-            call(),
+            call(["English"], ["nouns"], False),
+            call(["English"], ["nouns"], False),
+            call(None, None, False),
         ]
 
         # Execute the test