sungchun12 · sungchun12 · Mar 21, 2023 · Mar 21, 2023 · Mar 21, 2023 · Mar 21, 2023
diff --git a/README.md b/README.md
@@ -43,6 +43,7 @@ It works with any SQL file within the `models/` directory of the dbt project. Yo
 
 You'll notice for the sake of MVP, I am running nested git clones to get this working. I'll release to pypi soon.
 
+
 ```bash
 # my command to run this tool in an infinite loop in a split terminal
 git clone https://github.com/sungchun12/fst.git
@@ -55,38 +56,64 @@ python3 -m pip install --upgrade pip setuptools wheel
 source venv/bin/activate
 pip3 install -r requirements.txt
 pip3 install -e ../ # installing the fst package locally
-# fst start --file-path <file path>
+dbt build # Optional, so that upstream dependencies are created
 code .
-fst start --file-path models/customers.sql
+fst start
 ```
 
 ```shell
-# example of running this tool on each modification to the sql file
+# example of running this tool on each modification to any SQL file within models/
 # pro tip: open up the compiled query in a split IDE window for hot reloading as you develope
- ~/De/fst/jaffle_shop_duckdb python ../fst_query.py /Users/sung/Desktop/fst/jaffle_shop_duckdb/models/new_file.sql
-2023-03-18 18:39:15 - INFO - Watching directory: /Users/sung/Desktop/fst/jaffle_shop_duckdb
-2023-03-18 18:39:34 - INFO - Detected modification: /Users/sung/Desktop/fst/jaffle_shop_duckdb/models/new_file.sql
-2023-03-18 18:39:34 - INFO - Running `dbt build` with the modified SQL file (new_file)...
-2023-03-18 18:39:37 - INFO - `dbt build` was successful.
-2023-03-18 18:39:37 - INFO - project_name: jaffle_shop
-2023-03-18 18:39:37 - WARNING - Warning: No tests were run with the `dbt build` command. Consider adding tests to your project.
-2023-03-18 18:39:37 - WARNING - Generated test YAML file: /Users/sung/Desktop/fst/jaffle_shop_duckdb/models/new_file.yml
-2023-03-18 18:39:37 - INFO - Executing compiled query from: /Users/sung/Desktop/fst/jaffle_shop_duckdb/target/compiled/jaffle_shop/models/new_file.sql
-2023-03-18 18:39:37 - INFO - Using DuckDB file: jaffle_shop.duckdb
-2023-03-18 18:39:37 - INFO - `dbt build` time: 3.38 seconds
-2023-03-18 18:39:37 - INFO - Query time: 0.00 seconds
-2023-03-18 18:39:37 - INFO - Result Preview
+2023-03-22 11:05:29 - INFO - Running `dbt build` with the modified SQL file (/Users/sung/fst/jaffle_shop_duckdb/models/new_file.sql)...
+2023-03-22 11:05:33 - INFO - `dbt build` was successful.
+2023-03-22 11:05:33 - INFO - 18:05:32  Running with dbt=1.4.5
+18:05:32  Found 6 models, 20 tests, 0 snapshots, 0 analyses, 297 macros, 0 operations, 3 seed files, 0 sources, 0 exposures, 0 metrics
+18:05:32  
+18:05:32  Concurrency: 24 threads (target='dev')
+18:05:32  
+18:05:32  1 of 1 START sql table model main.new_file ..................................... [RUN]
+18:05:33  1 of 1 OK created sql table model main.new_file ................................ [OK in 0.12s]
+18:05:33  
+18:05:33  Finished running 1 table model in 0 hours 0 minutes and 0.25 seconds (0.25s).
+18:05:33  
+18:05:33  Completed successfully
+18:05:33  
+18:05:33  Done. PASS=1 WARN=0 ERROR=0 SKIP=0 TOTAL=1
+
+2023-03-22 11:05:33 - WARNING - Warning: No tests were run with the `dbt build` command. Consider adding tests to your project.
+2023-03-22 11:05:33 - WARNING - Generated test YAML file: /Users/sung/fst/jaffle_shop_duckdb/models/new_file.yml
+2023-03-22 11:05:33 - WARNING - Running `dbt test` with the generated test YAML file...
+2023-03-22 11:05:37 - INFO - `dbt test` with generated tests was successful.
+2023-03-22 11:05:37 - INFO - 18:05:36  Running with dbt=1.4.5
+18:05:36  Found 6 models, 22 tests, 0 snapshots, 0 analyses, 297 macros, 0 operations, 3 seed files, 0 sources, 0 exposures, 0 metrics
+18:05:36  
+18:05:36  Concurrency: 24 threads (target='dev')
+18:05:36  
+18:05:36  1 of 2 START test not_null_new_file_customer_id ................................ [RUN]
+18:05:36  2 of 2 START test unique_new_file_customer_id .................................. [RUN]
+18:05:36  1 of 2 PASS not_null_new_file_customer_id ...................................... [PASS in 0.07s]
+18:05:36  2 of 2 PASS unique_new_file_customer_id ........................................ [PASS in 0.07s]
+18:05:36  
+18:05:36  Finished running 2 tests in 0 hours 0 minutes and 0.17 seconds (0.17s).
+18:05:36  
+18:05:36  Completed successfully
+18:05:36  
+18:05:36  Done. PASS=2 WARN=0 ERROR=0 SKIP=0 TOTAL=2
+
+2023-03-22 11:05:37 - INFO - Executing compiled query from: /Users/sung/fst/jaffle_shop_duckdb/target/compiled/jaffle_shop/models/new_file.sql
+2023-03-22 11:05:37 - INFO - Using DuckDB file: jaffle_shop.duckdb
+2023-03-22 11:05:37 - INFO - `dbt build` time: 4.28 seconds
+2023-03-22 11:05:37 - INFO - Query time: 0.00 seconds
+2023-03-22 11:05:37 - INFO - Result Preview
 +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+
 |   customer_id | first_name   | last_name   | first_order   | most_recent_order   |   number_of_orders |   customer_lifetime_value |
 +===============+==============+=============+===============+=====================+====================+===========================+
-|            51 | Howard       | R.          | 2018-01-28    | 2018-02-23          |                  3 |                        99 |
-+---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+
-|             3 | Kathleen     | P.          | 2018-01-02    | 2018-03-11          |                  3 |                        65 |
+|             1 | Michael      | P.          | 2018-01-01    | 2018-02-10          |                  2 |                        33 |
 +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+
-|            46 | Norma        | C.          | 2018-03-24    | 2018-03-27          |                  2 |                        64 |
+|             2 | Shawn        | M.          | 2018-01-11    | 2018-01-11          |                  1 |                        23 |
 +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+
-|            30 | Christina    | W.          | 2018-03-02    | 2018-03-14          |                  2 |                        57 |
+|             3 | Kathleen     | P.          | 2018-01-02    | 2018-03-11          |                  3 |                        65 |
 +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+
-|            54 | Rose         | M.          | 2018-01-07    | 2018-03-24          |                  5 |                        57 |
+|             6 | Sarah        | R.          | 2018-02-19    | 2018-02-19          |                  1 |                         8 |
 +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+
 ```
diff --git a/fst/config_defaults.py b/fst/config_defaults.py
@@ -0,0 +1,10 @@
+import os
+import yaml
+
+CURRENT_WORKING_DIR = os.getcwd()
+DISABLE_TESTS = False
+
+# Load profiles.yml only once
+profiles_path = os.path.join(CURRENT_WORKING_DIR, "profiles.yml")
+with open(profiles_path, "r") as file:
+    PROFILES = yaml.safe_load(file)
diff --git a/fst/db_utils.py b/fst/db_utils.py
@@ -0,0 +1,28 @@
+import duckdb
+import os
+from functools import lru_cache
+from fst.config_defaults import PROFILES
+import logging
+
+logger = logging.getLogger(__name__)
+
+@lru_cache
+def execute_query(query: str, db_file: str):
+    connection = duckdb.connect(database=db_file, read_only=False)
+    result = connection.execute(query).fetchmany(5)
+    column_names = [desc[0] for desc in connection.description]
+    connection.close()
+    return result, column_names
+
+@lru_cache
+def get_duckdb_file_path():
+    target = PROFILES["jaffle_shop"]["target"]
+    db_path = PROFILES["jaffle_shop"]["outputs"][target]["path"]
+    return db_path
+
+
+@lru_cache
+def get_project_name():
+    project_name = list(PROFILES.keys())[0]
+    logger.info(f"project_name: {project_name}")
+    return project_name
diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py
@@ -0,0 +1,23 @@
+import os
+import time
+from watchdog.observers.polling import PollingObserver
+import logging
+
+logger = logging.getLogger(__name__)
+
+observer = None
+
+def watch_directory(event_handler, file_path: str, ):
+    global observer
+    logger.info(f"Started watching directory: {file_path}")
+    observer = PollingObserver()
+    observer.schedule(event_handler, path=file_path, recursive=False)
+    observer.start()
+
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        observer.stop()
+        observer.join()
+        logger.info(f"Stopped watching directory: {file_path}")
diff --git a/fst/file_utils.py b/fst/file_utils.py
@@ -0,0 +1,144 @@
+import os
+import yaml
+import logging
+import re
+from fst.config_defaults import CURRENT_WORKING_DIR
+from fst.db_utils import get_project_name
+
+logger = logging.getLogger(__name__)
+
+def get_active_file(file_path: str):
+    if file_path and file_path.endswith(".sql"):
+        return file_path
+    else:
+        logger.warning("No active SQL file found.")
+        return None
+
+def find_compiled_sql_file(file_path):
+    active_file = get_active_file(file_path)
+    if not active_file:
+        return None
+    project_directory = CURRENT_WORKING_DIR
+    project_name = get_project_name()
+    relative_file_path = os.path.relpath(active_file, project_directory)
+    compiled_directory = os.path.join(
+        project_directory, "target", "compiled", project_name
+    )
+    compiled_file_path = os.path.join(compiled_directory, relative_file_path)
+    return compiled_file_path if os.path.exists(compiled_file_path) else None
+
+def find_tests_for_model(model_name, directory='models'):
+    """
+    Check if tests are generated for a given model in a dbt project.
+
+    Args:
+        model_name (str): The name of the model to search for tests.
+        directory (str, optional): The root directory to start the search. Defaults to 'models'.
+
+    Returns:
+        dict: A dictionary containing information about the tests found, including the model name, column name, file type, and tests.
+    """
+    tests_data = {}
+
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(('.schema.yml', '.yml')):
+                filepath = os.path.join(root, file)
+                with open(filepath, 'r') as f:
+                    schema_data = yaml.safe_load(f)
+
+                for model in schema_data.get('models', []):
+                    if model['name'] == model_name:
+                        columns = model.get('columns', {})
+                        for column_data in columns:
+                            column_name = column_data['name']
+                            tests = column_data.get('tests', [])
+                            if tests:
+                                tests_data.append({'file': filepath, 'column': column_name, 'tests': tests})
+
+    return tests_data
+
+def get_model_name_from_file(file_path: str):
+    project_directory = CURRENT_WORKING_DIR
+    models_directory = os.path.join(project_directory, "models")
+    relative_file_path = os.path.relpath(file_path, models_directory)
+    model_name, _ = os.path.splitext(relative_file_path)
+    return model_name.replace(os.sep, ".")
+
+import yaml
+import re
+import os
+
+def generate_test_yaml(model_name, column_names, active_file_path, tests_data):
+    yaml_files = {}
+
+    for column in column_names:
+        tests_to_add = []
+        if re.search(r"(_id|_ID)$", column):
+            tests_to_add = ["unique", "not_null"]
+
+        # Check if tests for this column already exist
+        existing_tests = [data for data in tests_data if data['column'] == column]
+
+        if existing_tests:
+            # Update the existing YAML file with new tests
+            for test_data in existing_tests:
+                yaml_file = test_data['file']
+                if yaml_file not in yaml_files:
+                    with open(yaml_file, 'r') as f:
+                        yaml_files[yaml_file] = yaml.safe_load(f)
+
+                models = yaml_files[yaml_file].get('models', [])
+                for model in models:
+                    if model['name'] == model_name:
+                        columns = model.get('columns', [])
+                        for existing_column in columns:
+                            if existing_column['name'] == column:
+                                tests = existing_column.get('tests', [])
+                                for test in tests_to_add:
+                                    if test not in tests:
+                                        tests.append(test)
+                                existing_column['tests'] = tests
+        else:
+            # If no tests exist, add the tests to the schema.yml file
+            schema_yml_path = os.path.join(os.path.dirname(active_file_path), "schema.yml")
+            if os.path.exists(schema_yml_path):
+                with open(schema_yml_path, "r") as f:
+                    schema_yml_data = yaml.safe_load(f)
+
+                for model in schema_yml_data.get("models", []):
+                    if model["name"] == model_name:
+                        if "columns" not in model:
+                            model["columns"] = []
+
+                        new_column = {
+                            "name": column,
+                            "description": f"A placeholder description for {column}",
+                            "tests": tests_to_add,
+                        }
+                        model["columns"].append(new_column)
+                        break
+
+                with open(schema_yml_path, "w") as f:
+                    yaml.dump(schema_yml_data, f)
+
+                return schema_yml_path
+
+    # Return the first file path where tests were found
+    return next(iter(yaml_files))
+
+
+def get_model_paths():
+    with open("dbt_project.yml", "r") as file:
+        dbt_project = yaml.safe_load(file)
+        model_paths = dbt_project.get("model-paths", [])
+        return [
+            os.path.join(os.getcwd(), path) for path in model_paths
+        ]
+
+def get_models_directory(project_dir):
+    dbt_project_file = os.path.join(project_dir, 'dbt_project.yml')
+    with open(dbt_project_file, 'r') as file:
+        dbt_project = yaml.safe_load(file)
+    models_subdir = dbt_project.get('model-paths')[0]
+    return os.path.join(project_dir, models_subdir)