From 3a69477b56b39ae85e4f4982a7e07be49782acaa Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Tue, 21 Mar 2023 15:05:55 -0700 Subject: [PATCH 01/21] first cut --- fst/db_utils.py | 23 ++++++ fst/directory_watcher.py | 17 +++++ fst/file_utils.py | 58 +++++++++++++++ fst/logger.py | 26 +++++++ fst/main.py | 64 ++-------------- fst/query_handler.py | 121 +++++++++++++++++++++++++++++++ fst/fst_query.py => fst_query.py | 0 7 files changed, 252 insertions(+), 57 deletions(-) create mode 100644 fst/db_utils.py create mode 100644 fst/directory_watcher.py create mode 100644 fst/file_utils.py create mode 100644 fst/logger.py create mode 100644 fst/query_handler.py rename fst/fst_query.py => fst_query.py (100%) diff --git a/fst/db_utils.py b/fst/db_utils.py new file mode 100644 index 0000000..4421efc --- /dev/null +++ b/fst/db_utils.py @@ -0,0 +1,23 @@ +import duckdb +import os + +@lru_cache +def execute_query(query: str, db_file: str): + connection = duckdb.connect(database=db_file, read_only=False) + result = connection.execute(query).fetchmany(5) + column_names = [desc[0] for desc in connection.description] + connection.close() + return result, column_names + +@lru_cache +def get_duckdb_file_path(): + target = PROFILES["jaffle_shop"]["target"] + db_path = PROFILES["jaffle_shop"]["outputs"][target]["path"] + return db_path + + +@lru_cache +def get_project_name(): + project_name = list(PROFILES.keys())[0] + logging.info(f"project_name: {project_name}") + return project_name diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py new file mode 100644 index 0000000..dbdf199 --- /dev/null +++ b/fst/directory_watcher.py @@ -0,0 +1,17 @@ +import os +from watchdog.observers import Observer +from fst.query_handler import QueryHandler + +def watch_directory(directory: str, callback, active_file_path: str): + event_handler = QueryHandler(callback, active_file_path) + observer = Observer() + observer.schedule(event_handler, directory, recursive=True) + observer.start() + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + observer.stop() + + observer.join() diff --git a/fst/file_utils.py b/fst/file_utils.py new file mode 100644 index 0000000..f5029d6 --- /dev/null +++ b/fst/file_utils.py @@ -0,0 +1,58 @@ +import os +import yaml +import logging +import re + +def get_active_file(file_path: str): + if file_path and file_path.endswith(".sql"): + return file_path + else: + logging.warning("No active SQL file found.") + return None + +def find_compiled_sql_file(file_path): + active_file = get_active_file(file_path) + if not active_file: + return None + project_directory = CURRENT_WORKING_DIR + project_name = get_project_name() + relative_file_path = os.path.relpath(active_file, project_directory) + compiled_directory = os.path.join( + project_directory, "target", "compiled", project_name + ) + compiled_file_path = os.path.join(compiled_directory, relative_file_path) + return compiled_file_path if os.path.exists(compiled_file_path) else None + +def get_model_name_from_file(file_path: str): + project_directory = CURRENT_WORKING_DIR + models_directory = os.path.join(project_directory, "models") + relative_file_path = os.path.relpath(file_path, models_directory) + model_name, _ = os.path.splitext(relative_file_path) + return model_name.replace(os.sep, ".") + +def generate_test_yaml(model_name, column_names, active_file_path): + test_yaml = f"version: 2\n\nmodels:\n - name: {model_name}\n columns:" + + for column in column_names: + test_yaml += f"\n - name: {column}\n description: 'A placeholder description for {column}'" + + if re.search(r"(_id|_ID)$", column): + test_yaml += "\n tests:\n - unique\n - not_null" + + active_file_directory = os.path.dirname(active_file_path) + active_file_name, _ = os.path.splitext(os.path.basename(active_file_path)) + new_yaml_file_name = f"{active_file_name}.yml" + new_yaml_file_path = os.path.join(active_file_directory, new_yaml_file_name) + + with open(new_yaml_file_path, "w") as file: + file.write(test_yaml) + + return new_yaml_file_path + +def get_model_paths(): + with open("dbt_project.yml", "r") as file: + dbt_project = yaml.safe_load(file) + model_paths = dbt_project.get("model-paths", []) + return [ + os.path.join(os.getcwd(), path) for path in model_paths + ] diff --git a/fst/logger.py b/fst/logger.py new file mode 100644 index 0000000..b0d0d32 --- /dev/null +++ b/fst/logger.py @@ -0,0 +1,26 @@ +import logging +from colorlog import ColoredFormatter + +def setup_logger(): + log_format = "%(asctime)s - %(levelname)s - %(log_color)s%(message)s%(reset)s" + + formatter = ColoredFormatter( + log_format, + datefmt="%Y-%m-%d %H:%M:%S", + reset=True, + log_colors={ + "DEBUG": "cyan", + "INFO": "light_blue", + "WARNING": "yellow", + "ERROR": "red", + "CRITICAL": "red,bg_white", + }, + ) + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(formatter) + + logger.addHandler(stream_handler) diff --git a/fst/main.py b/fst/main.py index 7d6313d..288490f 100644 --- a/fst/main.py +++ b/fst/main.py @@ -1,23 +1,14 @@ import click import os -import yaml -from fst import fst_query - +from fst.logger import setup_logger +from fst.query_handler import handle_query +from fst.directory_watcher import watch_directory +from fst.file_utils import get_active_file, get_model_paths @click.group() def main(): pass - -def get_model_paths(): - with open("dbt_project.yml", "r") as file: - dbt_project = yaml.safe_load(file) - model_paths = dbt_project.get("model-paths", []) - return [ - os.path.join(fst_query.CURRENT_WORKING_DIR, path) for path in model_paths - ] - - @main.command() @click.option( "--file-path", @@ -30,56 +21,15 @@ def start(file_path): model_paths = get_model_paths() if file_path: click.echo(f"Started watching directory: {os.path.dirname(file_path)}") - fst_query.watch_directory( - os.path.dirname(file_path), fst_query.handle_query, file_path - ) - elif model_paths: - for path in model_paths: - click.echo(f"Started watching directory: {path}") - fst_query.watch_directory(path, fst_query.handle_query, None) - else: - click.echo("Please provide a file path using the --file-path option.") - - -@main.command() -def stop(): - if fst_query.observer: - fst_query.observer.stop() - fst_query.observer.join() - click.echo("Stopped watching the directory.") - else: - click.echo("No observer is currently running.") - - -@main.command() -@click.option( - "--file-path", - type=click.Path( - exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True - ), - help="Path to the SQL file you want to watch.", -) -def restart(file_path): - if fst_query.observer: - fst_query.observer.stop() - fst_query.observer.join() - click.echo("Stopped watching the directory.") - else: - click.echo("No observer was running. Starting a new one.") - - model_paths = get_model_paths() - if file_path: - click.echo(f"Started watching directory: {os.path.dirname(file_path)}") - fst_query.watch_directory( - os.path.dirname(file_path), fst_query.handle_query, file_path + watch_directory( + os.path.dirname(file_path), handle_query, file_path ) elif model_paths: for path in model_paths: click.echo(f"Started watching directory: {path}") - fst_query.watch_directory(path, fst_query.handle_query, None) + watch_directory(path, handle_query, None) else: click.echo("Please provide a file path using the --file-path option.") - if __name__ == "__main__": main() diff --git a/fst/query_handler.py b/fst/query_handler.py new file mode 100644 index 0000000..6cc19e9 --- /dev/null +++ b/fst/query_handler.py @@ -0,0 +1,121 @@ +from watchdog.events import FileSystemEventHandler +from threading import Timer +import logging + +class QueryHandler(FileSystemEventHandler): + def __init__(self, callback, active_file_path: str): + self.callback = callback + self.active_file_path = active_file_path + self.debounce_timer = None + + def on_modified(self, event): + if event.src_path.endswith(".sql"): + active_file = get_active_file(self.active_file_path) + if active_file and active_file == event.src_path: + if self.debounce_timer is None: + self.debounce_timer = Timer(1.5, self.debounce_query) + self.debounce_timer.start() + else: + self.debounce_timer.cancel() + self.debounce_timer = Timer(1.5, self.debounce_query) + self.debounce_timer.start() + + def debounce_query(self): + if self.debounce_timer is not None: + self.debounce_timer.cancel() + self.debounce_timer = None + query = None + with open(self.active_file_path, "r") as file: + query = file.read() + if query is not None and query.strip(): + logging.info(f"Detected modification: {self.active_file_path}") + self.callback(query, self.active_file_path) + +def handle_query(query, file_path): + if query.strip(): + try: + start_time = time.time() + + active_file = get_active_file(file_path) + if not active_file: + return + model_name = get_model_name_from_file(active_file) + logging.info( + f"Running `dbt build` with the modified SQL file ({model_name})..." + ) + result = subprocess.run( + ["dbt", "build", "--select", model_name], + capture_output=True, + text=True, + ) + compile_time = time.time() - start_time + + stdout_without_finished = result.stdout.split("Finished running")[0] + + if result.returncode == 0: + logging.info("`dbt build` was successful.") + logging.info(result.stdout) + else: + logging.error("Error running `dbt build`:") + logging.error(result.stdout) + + if ( + "PASS" not in stdout_without_finished + and "FAIL" not in stdout_without_finished + and "ERROR" not in stdout_without_finished + ): + compiled_sql_file = find_compiled_sql_file(file_path) + if compiled_sql_file: + with open(compiled_sql_file, "r") as file: + compiled_query = file.read() + duckdb_file_path = get_duckdb_file_path() + _, column_names = execute_query(compiled_query, duckdb_file_path) + + warning_message = colored( + "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project.", + "yellow", + attrs=["bold"], + ) + logging.warning(warning_message) + + test_yaml_path = generate_test_yaml( + model_name, column_names, active_file + ) + test_yaml_path_warning_message = colored( + f"Generated test YAML file: {test_yaml_path}", + "yellow", + attrs=["bold"], + ) + logging.warning(test_yaml_path_warning_message) + + compiled_sql_file = find_compiled_sql_file(file_path) + if compiled_sql_file: + with open(compiled_sql_file, "r") as file: + compiled_query = file.read() + colored_compiled_query = highlight( + compiled_query, SqlLexer(), TerminalFormatter() + ) + logging.info(f"Executing compiled query from: {compiled_sql_file}") + duckdb_file_path = get_duckdb_file_path() + logging.info(f"Using DuckDB file: {duckdb_file_path}") + + start_time = time.time() + result, column_names = execute_query( + compiled_query, duckdb_file_path + ) + query_time = time.time() - start_time + + logging.info(f"`dbt build` time: {compile_time:.2f} seconds") + logging.info(f"Query time: {query_time:.2f} seconds") + + logging.info( + "Result Preview" + + "\n" + + tabulate(result, headers=column_names, tablefmt="grid") + ) + else: + logging.error("Couldn't find the compiled SQL file.") + except Exception as e: + logging.error(f"Error: {e}") + else: + logging.error("Empty query.") diff --git a/fst/fst_query.py b/fst_query.py similarity index 100% rename from fst/fst_query.py rename to fst_query.py From 3112cea565fa9f2d04d7f2bfa6513480a43193b7 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Tue, 21 Mar 2023 15:41:29 -0700 Subject: [PATCH 02/21] full parity --- fst/constants.py | 9 +++++++ fst/db_utils.py | 7 ++++- fst/directory_watcher.py | 14 +++++++--- fst/file_utils.py | 6 ++++- fst/main.py | 1 + fst/query_handler.py | 57 ++++++++++++++++++++-------------------- 6 files changed, 60 insertions(+), 34 deletions(-) create mode 100644 fst/constants.py diff --git a/fst/constants.py b/fst/constants.py new file mode 100644 index 0000000..752b7a0 --- /dev/null +++ b/fst/constants.py @@ -0,0 +1,9 @@ +import os +import yaml + +CURRENT_WORKING_DIR = os.getcwd() + +# Load profiles.yml only once +profiles_path = os.path.join(CURRENT_WORKING_DIR, "profiles.yml") +with open(profiles_path, "r") as file: + PROFILES = yaml.safe_load(file) \ No newline at end of file diff --git a/fst/db_utils.py b/fst/db_utils.py index 4421efc..5bfd34e 100644 --- a/fst/db_utils.py +++ b/fst/db_utils.py @@ -1,5 +1,10 @@ import duckdb import os +from functools import lru_cache +from fst.constants import PROFILES +import logging + +logger = logging.getLogger(__name__) @lru_cache def execute_query(query: str, db_file: str): @@ -19,5 +24,5 @@ def get_duckdb_file_path(): @lru_cache def get_project_name(): project_name = list(PROFILES.keys())[0] - logging.info(f"project_name: {project_name}") + logger.info(f"project_name: {project_name}") return project_name diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py index dbdf199..6d105db 100644 --- a/fst/directory_watcher.py +++ b/fst/directory_watcher.py @@ -1,11 +1,19 @@ import os +import time from watchdog.observers import Observer from fst.query_handler import QueryHandler +import logging + +logger = logging.getLogger(__name__) + +observer = None def watch_directory(directory: str, callback, active_file_path: str): + global observer + logger.info("Started watching directory...") event_handler = QueryHandler(callback, active_file_path) observer = Observer() - observer.schedule(event_handler, directory, recursive=True) + observer.schedule(event_handler, path=directory, recursive=True) observer.start() try: @@ -13,5 +21,5 @@ def watch_directory(directory: str, callback, active_file_path: str): time.sleep(1) except KeyboardInterrupt: observer.stop() - - observer.join() + observer.join() + logger.info("Stopped watching directory.") \ No newline at end of file diff --git a/fst/file_utils.py b/fst/file_utils.py index f5029d6..953f289 100644 --- a/fst/file_utils.py +++ b/fst/file_utils.py @@ -2,12 +2,16 @@ import yaml import logging import re +from fst.constants import CURRENT_WORKING_DIR +from fst.db_utils import get_project_name + +logger = logging.getLogger(__name__) def get_active_file(file_path: str): if file_path and file_path.endswith(".sql"): return file_path else: - logging.warning("No active SQL file found.") + logger.warning("No active SQL file found.") return None def find_compiled_sql_file(file_path): diff --git a/fst/main.py b/fst/main.py index 288490f..0d73376 100644 --- a/fst/main.py +++ b/fst/main.py @@ -7,6 +7,7 @@ @click.group() def main(): + setup_logger() pass @main.command() diff --git a/fst/query_handler.py b/fst/query_handler.py index 6cc19e9..c761eaf 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -1,6 +1,14 @@ from watchdog.events import FileSystemEventHandler from threading import Timer import logging +import time +import subprocess +from tabulate import tabulate + +from fst.file_utils import get_active_file, get_model_name_from_file, find_compiled_sql_file, generate_test_yaml +from fst.db_utils import get_duckdb_file_path, execute_query + +logger = logging.getLogger(__name__) class QueryHandler(FileSystemEventHandler): def __init__(self, callback, active_file_path: str): @@ -28,7 +36,7 @@ def debounce_query(self): with open(self.active_file_path, "r") as file: query = file.read() if query is not None and query.strip(): - logging.info(f"Detected modification: {self.active_file_path}") + logger.info(f"Detected modification: {self.active_file_path}") self.callback(query, self.active_file_path) def handle_query(query, file_path): @@ -40,7 +48,7 @@ def handle_query(query, file_path): if not active_file: return model_name = get_model_name_from_file(active_file) - logging.info( + logger.info( f"Running `dbt build` with the modified SQL file ({model_name})..." ) result = subprocess.run( @@ -53,11 +61,11 @@ def handle_query(query, file_path): stdout_without_finished = result.stdout.split("Finished running")[0] if result.returncode == 0: - logging.info("`dbt build` was successful.") - logging.info(result.stdout) + logger.info("`dbt build` was successful.") + logger.info(result.stdout) else: - logging.error("Error running `dbt build`:") - logging.error(result.stdout) + logger.error("Error running `dbt build`:") + logger.error(result.stdout) if ( "PASS" not in stdout_without_finished @@ -71,33 +79,24 @@ def handle_query(query, file_path): duckdb_file_path = get_duckdb_file_path() _, column_names = execute_query(compiled_query, duckdb_file_path) - warning_message = colored( - "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project.", - "yellow", - attrs=["bold"], - ) - logging.warning(warning_message) + warning_message = "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project." + + logger.warning(warning_message) test_yaml_path = generate_test_yaml( model_name, column_names, active_file ) - test_yaml_path_warning_message = colored( - f"Generated test YAML file: {test_yaml_path}", - "yellow", - attrs=["bold"], - ) - logging.warning(test_yaml_path_warning_message) + test_yaml_path_warning_message = f"Generated test YAML file: {test_yaml_path}" + + logger.warning(test_yaml_path_warning_message) compiled_sql_file = find_compiled_sql_file(file_path) if compiled_sql_file: with open(compiled_sql_file, "r") as file: compiled_query = file.read() - colored_compiled_query = highlight( - compiled_query, SqlLexer(), TerminalFormatter() - ) - logging.info(f"Executing compiled query from: {compiled_sql_file}") + logger.info(f"Executing compiled query from: {compiled_sql_file}") duckdb_file_path = get_duckdb_file_path() - logging.info(f"Using DuckDB file: {duckdb_file_path}") + logger.info(f"Using DuckDB file: {duckdb_file_path}") start_time = time.time() result, column_names = execute_query( @@ -105,17 +104,17 @@ def handle_query(query, file_path): ) query_time = time.time() - start_time - logging.info(f"`dbt build` time: {compile_time:.2f} seconds") - logging.info(f"Query time: {query_time:.2f} seconds") + logger.info(f"`dbt build` time: {compile_time:.2f} seconds") + logger.info(f"Query time: {query_time:.2f} seconds") - logging.info( + logger.info( "Result Preview" + "\n" + tabulate(result, headers=column_names, tablefmt="grid") ) else: - logging.error("Couldn't find the compiled SQL file.") + logger.error("Couldn't find the compiled SQL file.") except Exception as e: - logging.error(f"Error: {e}") + logger.error(f"Error: {e}") else: - logging.error("Empty query.") + logger.error("Empty query.") From d79883c0d73d9cd583a0b91ed17139869c1c6e83 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Tue, 21 Mar 2023 15:44:33 -0700 Subject: [PATCH 03/21] remove old file --- fst_query.py | 276 --------------------------------------------------- 1 file changed, 276 deletions(-) delete mode 100644 fst_query.py diff --git a/fst_query.py b/fst_query.py deleted file mode 100644 index f282cf5..0000000 --- a/fst_query.py +++ /dev/null @@ -1,276 +0,0 @@ -import duckdb -import os -import re -import time -from pathlib import Path -from watchdog.observers import Observer -from watchdog.events import FileSystemEventHandler -import subprocess -import yaml -import sys -from tabulate import tabulate -from pygments import highlight -from pygments.lexers import SqlLexer -from pygments.formatters import TerminalFormatter -from functools import lru_cache -from threading import Timer -from termcolor import colored -import logging -from colorlog import ColoredFormatter - -observer = None - - -def setup_logger(): - log_format = "%(asctime)s - %(levelname)s - %(log_color)s%(message)s%(reset)s" - - formatter = ColoredFormatter( - log_format, - datefmt="%Y-%m-%d %H:%M:%S", - reset=True, - log_colors={ - "DEBUG": "cyan", - "INFO": "light_blue", - "WARNING": "yellow", - "ERROR": "red", - "CRITICAL": "red,bg_white", - }, - ) - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(formatter) - - logger.addHandler(stream_handler) - - -CURRENT_WORKING_DIR = os.getcwd() - -# Load profiles.yml only once -with open("profiles.yml", "r") as file: - PROFILES = yaml.safe_load(file) - - -class QueryHandler(FileSystemEventHandler): - def __init__(self, callback, active_file_path: str): - self.callback = callback - self.active_file_path = active_file_path - self.debounce_timer = None - - def on_modified(self, event): - if event.src_path.endswith(".sql"): - active_file = get_active_file(self.active_file_path) - if active_file and active_file == event.src_path: - if self.debounce_timer is None: - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - else: - self.debounce_timer.cancel() - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - - def debounce_query(self): - if self.debounce_timer is not None: - self.debounce_timer.cancel() - self.debounce_timer = None - query = None - with open(self.active_file_path, "r") as file: - query = file.read() - if query is not None and query.strip(): - logging.info(f"Detected modification: {self.active_file_path}") - self.callback(query, self.active_file_path) - - -@lru_cache -def execute_query(query: str, db_file: str): - connection = duckdb.connect(database=db_file, read_only=False) - result = connection.execute(query).fetchmany(5) - column_names = [desc[0] for desc in connection.description] - connection.close() - return result, column_names - - -def watch_directory(directory: str, callback, active_file_path: str): - global observer - setup_logger() - logging.info("Started watching directory...") - event_handler = QueryHandler(callback, active_file_path) - observer = Observer() - observer.schedule(event_handler, path=directory, recursive=True) - observer.start() - - try: - while True: - time.sleep(1) - except KeyboardInterrupt: - observer.stop() - observer.join() - logging.info("Stopped watching directory.") - - -def get_active_file(file_path: str): - if file_path and file_path.endswith(".sql"): - return file_path - else: - logging.warning("No active SQL file found.") - return None - - -@lru_cache -def get_project_name(): - project_name = list(PROFILES.keys())[0] - logging.info(f"project_name: {project_name}") - return project_name - - -def find_compiled_sql_file(file_path): - active_file = get_active_file(file_path) - if not active_file: - return None - project_directory = CURRENT_WORKING_DIR - project_name = get_project_name() - relative_file_path = os.path.relpath(active_file, project_directory) - compiled_directory = os.path.join( - project_directory, "target", "compiled", project_name - ) - compiled_file_path = os.path.join(compiled_directory, relative_file_path) - return compiled_file_path if os.path.exists(compiled_file_path) else None - - -def get_model_name_from_file(file_path: str): - project_directory = CURRENT_WORKING_DIR - models_directory = os.path.join(project_directory, "models") - relative_file_path = os.path.relpath(file_path, models_directory) - model_name, _ = os.path.splitext(relative_file_path) - return model_name.replace(os.sep, ".") - - -@lru_cache -def get_duckdb_file_path(): - target = PROFILES["jaffle_shop"]["target"] - db_path = PROFILES["jaffle_shop"]["outputs"][target]["path"] - return db_path - - -def generate_test_yaml(model_name, column_names, active_file_path): - test_yaml = f"version: 2\n\nmodels:\n - name: {model_name}\n columns:" - - for column in column_names: - test_yaml += f"\n - name: {column}\n description: 'A placeholder description for {column}'" - - if re.search(r"(_id|_ID)$", column): - test_yaml += "\n tests:\n - unique\n - not_null" - - active_file_directory = os.path.dirname(active_file_path) - active_file_name, _ = os.path.splitext(os.path.basename(active_file_path)) - new_yaml_file_name = f"{active_file_name}.yml" - new_yaml_file_path = os.path.join(active_file_directory, new_yaml_file_name) - - with open(new_yaml_file_path, "w") as file: - file.write(test_yaml) - - return new_yaml_file_path - - -def handle_query(query, file_path): - if query.strip(): - try: - start_time = time.time() - - active_file = get_active_file(file_path) - if not active_file: - return - model_name = get_model_name_from_file(active_file) - logging.info( - f"Running `dbt build` with the modified SQL file ({model_name})..." - ) - result = subprocess.run( - ["dbt", "build", "--select", model_name], - capture_output=True, - text=True, - ) - compile_time = time.time() - start_time - - stdout_without_finished = result.stdout.split("Finished running")[0] - - if result.returncode == 0: - logging.info("`dbt build` was successful.") - logging.info(result.stdout) - else: - logging.error("Error running `dbt build`:") - logging.error(result.stdout) - - if ( - "PASS" not in stdout_without_finished - and "FAIL" not in stdout_without_finished - and "ERROR" not in stdout_without_finished - ): - compiled_sql_file = find_compiled_sql_file(file_path) - if compiled_sql_file: - with open(compiled_sql_file, "r") as file: - compiled_query = file.read() - duckdb_file_path = get_duckdb_file_path() - _, column_names = execute_query(compiled_query, duckdb_file_path) - - warning_message = colored( - "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project.", - "yellow", - attrs=["bold"], - ) - logging.warning(warning_message) - - test_yaml_path = generate_test_yaml( - model_name, column_names, active_file - ) - test_yaml_path_warning_message = colored( - f"Generated test YAML file: {test_yaml_path}", - "yellow", - attrs=["bold"], - ) - logging.warning(test_yaml_path_warning_message) - - compiled_sql_file = find_compiled_sql_file(file_path) - if compiled_sql_file: - with open(compiled_sql_file, "r") as file: - compiled_query = file.read() - colored_compiled_query = highlight( - compiled_query, SqlLexer(), TerminalFormatter() - ) - logging.info(f"Executing compiled query from: {compiled_sql_file}") - duckdb_file_path = get_duckdb_file_path() - logging.info(f"Using DuckDB file: {duckdb_file_path}") - - start_time = time.time() - result, column_names = execute_query( - compiled_query, duckdb_file_path - ) - query_time = time.time() - start_time - - logging.info(f"`dbt build` time: {compile_time:.2f} seconds") - logging.info(f"Query time: {query_time:.2f} seconds") - - logging.info( - "Result Preview" - + "\n" - + tabulate(result, headers=column_names, tablefmt="grid") - ) - else: - logging.error("Couldn't find the compiled SQL file.") - except Exception as e: - logging.error(f"Error: {e}") - else: - logging.error("Empty query.") - - -if __name__ == "__main__": - setup_logger() - if len(sys.argv) > 1: - active_file_path = sys.argv[1] - else: - active_file_path = None - - project_directory = CURRENT_WORKING_DIR - logging.info(f"Watching directory: {project_directory}") - watch_directory(project_directory, handle_query, active_file_path) From 98409c90a264cc2461f763a84e9ddc0d762b1843 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Tue, 21 Mar 2023 16:44:32 -0700 Subject: [PATCH 04/21] fst start works --- fst/file_utils.py | 7 +++++++ fst/main.py | 49 +++++++++++++++++++++++++++----------------- fst/query_handler.py | 49 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/fst/file_utils.py b/fst/file_utils.py index 953f289..19848a2 100644 --- a/fst/file_utils.py +++ b/fst/file_utils.py @@ -60,3 +60,10 @@ def get_model_paths(): return [ os.path.join(os.getcwd(), path) for path in model_paths ] + +def get_models_directory(project_dir): + dbt_project_file = os.path.join(project_dir, 'dbt_project.yml') + with open(dbt_project_file, 'r') as file: + dbt_project = yaml.safe_load(file) + models_subdir = dbt_project.get('model-paths')[0] + return os.path.join(project_dir, models_subdir) \ No newline at end of file diff --git a/fst/main.py b/fst/main.py index 0d73376..87598e6 100644 --- a/fst/main.py +++ b/fst/main.py @@ -1,36 +1,47 @@ -import click import os +import time +import click +from watchdog.observers import Observer + +from fst.file_utils import get_models_directory +from fst.query_handler import handle_query, DynamicQueryHandler, QueryHandler from fst.logger import setup_logger -from fst.query_handler import handle_query -from fst.directory_watcher import watch_directory -from fst.file_utils import get_active_file, get_model_paths + @click.group() def main(): setup_logger() pass + @main.command() @click.option( "--file-path", - type=click.Path( - exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True - ), - help="Path to the SQL file you want to watch.", + default=None, + help="Path to the SQL file to be watched.", ) def start(file_path): - model_paths = get_model_paths() - if file_path: - click.echo(f"Started watching directory: {os.path.dirname(file_path)}") - watch_directory( - os.path.dirname(file_path), handle_query, file_path - ) - elif model_paths: - for path in model_paths: - click.echo(f"Started watching directory: {path}") - watch_directory(path, handle_query, None) + project_dir = os.path.abspath(".") + models_dir = get_models_directory(project_dir) + + if file_path is None: + click.echo(f"Started watching directory dynamically: {models_dir}") + event_handler = DynamicQueryHandler(handle_query, models_dir) else: - click.echo("Please provide a file path using the --file-path option.") + click.echo(f"Started watching file: {file_path}") + event_handler = QueryHandler(handle_query, file_path) + + observer = Observer() + observer.schedule(event_handler, models_dir, recursive=False) + observer.start() + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + observer.stop() + observer.join() + if __name__ == "__main__": main() diff --git a/fst/query_handler.py b/fst/query_handler.py index c761eaf..d30c635 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -1,15 +1,46 @@ from watchdog.events import FileSystemEventHandler from threading import Timer import logging +import os import time import subprocess from tabulate import tabulate -from fst.file_utils import get_active_file, get_model_name_from_file, find_compiled_sql_file, generate_test_yaml +from fst.file_utils import ( + get_active_file, + get_model_name_from_file, + find_compiled_sql_file, + generate_test_yaml, +) from fst.db_utils import get_duckdb_file_path, execute_query logger = logging.getLogger(__name__) + +class DynamicQueryHandler(FileSystemEventHandler): + def __init__(self, callback, models_dir: str): + self.callback = callback + self.models_dir = models_dir + self.debounce_timer = None + + def on_modified(self, event): + if event.src_path.endswith(".sql"): + if os.path.dirname(event.src_path) == self.models_dir: + handle_query_for_file(event.src_path) + if self.debounce_timer is None: + self.debounce_timer = Timer(1.5, self.debounce_query) + self.debounce_timer.start() + else: + self.debounce_timer.cancel() + self.debounce_timer = Timer(1.5, self.debounce_query) + self.debounce_timer.start() + + def debounce_query(self): + if self.debounce_timer is not None: + self.debounce_timer.cancel() + self.debounce_timer = None + + class QueryHandler(FileSystemEventHandler): def __init__(self, callback, active_file_path: str): self.callback = callback @@ -39,6 +70,14 @@ def debounce_query(self): logger.info(f"Detected modification: {self.active_file_path}") self.callback(query, self.active_file_path) + +def handle_query_for_file(file_path): + with open(file_path, "r") as file: + query = file.read() + if query is not None and query.strip(): + handle_query(query, file_path) + + def handle_query(query, file_path): if query.strip(): try: @@ -80,14 +119,16 @@ def handle_query(query, file_path): _, column_names = execute_query(compiled_query, duckdb_file_path) warning_message = "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project." - + logger.warning(warning_message) test_yaml_path = generate_test_yaml( model_name, column_names, active_file ) - test_yaml_path_warning_message = f"Generated test YAML file: {test_yaml_path}" - + test_yaml_path_warning_message = ( + f"Generated test YAML file: {test_yaml_path}" + ) + logger.warning(test_yaml_path_warning_message) compiled_sql_file = find_compiled_sql_file(file_path) From 1afb04c9e95f9e5014a45e27b3c5c479edcd815e Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 08:52:19 -0700 Subject: [PATCH 05/21] pollingobserver refactor --- fst/directory_watcher.py | 4 +- fst/fst_query.py | 276 --------------------------------------- fst/main.py | 4 +- 3 files changed, 4 insertions(+), 280 deletions(-) delete mode 100644 fst/fst_query.py diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py index 6d105db..f70c7ed 100644 --- a/fst/directory_watcher.py +++ b/fst/directory_watcher.py @@ -1,6 +1,6 @@ import os import time -from watchdog.observers import Observer +from watchdog.observers.polling import PollingObserver from fst.query_handler import QueryHandler import logging @@ -12,7 +12,7 @@ def watch_directory(directory: str, callback, active_file_path: str): global observer logger.info("Started watching directory...") event_handler = QueryHandler(callback, active_file_path) - observer = Observer() + observer = PollingObserver() observer.schedule(event_handler, path=directory, recursive=True) observer.start() diff --git a/fst/fst_query.py b/fst/fst_query.py deleted file mode 100644 index 1534328..0000000 --- a/fst/fst_query.py +++ /dev/null @@ -1,276 +0,0 @@ -import duckdb -import os -import re -import time -from pathlib import Path -from watchdog.observers.polling import PollingObserver -from watchdog.events import FileSystemEventHandler -import subprocess -import yaml -import sys -from tabulate import tabulate -from pygments import highlight -from pygments.lexers import SqlLexer -from pygments.formatters import TerminalFormatter -from functools import lru_cache -from threading import Timer -from termcolor import colored -import logging -from colorlog import ColoredFormatter - -observer = None - - -def setup_logger(): - log_format = "%(asctime)s - %(levelname)s - %(log_color)s%(message)s%(reset)s" - - formatter = ColoredFormatter( - log_format, - datefmt="%Y-%m-%d %H:%M:%S", - reset=True, - log_colors={ - "DEBUG": "cyan", - "INFO": "light_blue", - "WARNING": "yellow", - "ERROR": "red", - "CRITICAL": "red,bg_white", - }, - ) - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - - stream_handler = logging.StreamHandler() - stream_handler.setFormatter(formatter) - - logger.addHandler(stream_handler) - - -CURRENT_WORKING_DIR = os.getcwd() - -# Load profiles.yml only once -with open("profiles.yml", "r") as file: - PROFILES = yaml.safe_load(file) - - -class QueryHandler(FileSystemEventHandler): - def __init__(self, callback, active_file_path: str): - self.callback = callback - self.active_file_path = active_file_path - self.debounce_timer = None - - def on_modified(self, event): - if event.src_path.endswith(".sql"): - active_file = get_active_file(self.active_file_path) - if active_file and active_file == event.src_path: - if self.debounce_timer is None: - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - else: - self.debounce_timer.cancel() - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - - def debounce_query(self): - if self.debounce_timer is not None: - self.debounce_timer.cancel() - self.debounce_timer = None - query = None - with open(self.active_file_path, "r") as file: - query = file.read() - if query is not None and query.strip(): - logging.info(f"Detected modification: {self.active_file_path}") - self.callback(query, self.active_file_path) - - -@lru_cache -def execute_query(query: str, db_file: str): - connection = duckdb.connect(database=db_file, read_only=False) - result = connection.execute(query).fetchmany(5) - column_names = [desc[0] for desc in connection.description] - connection.close() - return result, column_names - - -def watch_directory(directory: str, callback, active_file_path: str): - global observer - setup_logger() - logging.info("Started watching directory...") - event_handler = QueryHandler(callback, active_file_path) - observer = PollingObserver() - observer.schedule(event_handler, path=directory, recursive=True) - observer.start() - - try: - while True: - time.sleep(1) - except KeyboardInterrupt: - observer.stop() - observer.join() - logging.info("Stopped watching directory.") - - -def get_active_file(file_path: str): - if file_path and file_path.endswith(".sql"): - return file_path - else: - logging.warning("No active SQL file found.") - return None - - -@lru_cache -def get_project_name(): - project_name = list(PROFILES.keys())[0] - logging.info(f"project_name: {project_name}") - return project_name - - -def find_compiled_sql_file(file_path): - active_file = get_active_file(file_path) - if not active_file: - return None - project_directory = CURRENT_WORKING_DIR - project_name = get_project_name() - relative_file_path = os.path.relpath(active_file, project_directory) - compiled_directory = os.path.join( - project_directory, "target", "compiled", project_name - ) - compiled_file_path = os.path.join(compiled_directory, relative_file_path) - return compiled_file_path if os.path.exists(compiled_file_path) else None - - -def get_model_name_from_file(file_path: str): - project_directory = CURRENT_WORKING_DIR - models_directory = os.path.join(project_directory, "models") - relative_file_path = os.path.relpath(file_path, models_directory) - model_name, _ = os.path.splitext(relative_file_path) - return model_name.replace(os.sep, ".") - - -@lru_cache -def get_duckdb_file_path(): - target = PROFILES["jaffle_shop"]["target"] - db_path = PROFILES["jaffle_shop"]["outputs"][target]["path"] - return db_path - - -def generate_test_yaml(model_name, column_names, active_file_path): - test_yaml = f"version: 2\n\nmodels:\n - name: {model_name}\n columns:" - - for column in column_names: - test_yaml += f"\n - name: {column}\n description: 'A placeholder description for {column}'" - - if re.search(r"(_id|_ID)$", column): - test_yaml += "\n tests:\n - unique\n - not_null" - - active_file_directory = os.path.dirname(active_file_path) - active_file_name, _ = os.path.splitext(os.path.basename(active_file_path)) - new_yaml_file_name = f"{active_file_name}.yml" - new_yaml_file_path = os.path.join(active_file_directory, new_yaml_file_name) - - with open(new_yaml_file_path, "w") as file: - file.write(test_yaml) - - return new_yaml_file_path - - -def handle_query(query, file_path): - if query.strip(): - try: - start_time = time.time() - - active_file = get_active_file(file_path) - if not active_file: - return - model_name = get_model_name_from_file(active_file) - logging.info( - f"Running `dbt build` with the modified SQL file ({model_name})..." - ) - result = subprocess.run( - ["dbt", "build", "--select", model_name], - capture_output=True, - text=True, - ) - compile_time = time.time() - start_time - - stdout_without_finished = result.stdout.split("Finished running")[0] - - if result.returncode == 0: - logging.info("`dbt build` was successful.") - logging.info(result.stdout) - else: - logging.error("Error running `dbt build`:") - logging.error(result.stdout) - - if ( - "PASS" not in stdout_without_finished - and "FAIL" not in stdout_without_finished - and "ERROR" not in stdout_without_finished - ): - compiled_sql_file = find_compiled_sql_file(file_path) - if compiled_sql_file: - with open(compiled_sql_file, "r") as file: - compiled_query = file.read() - duckdb_file_path = get_duckdb_file_path() - _, column_names = execute_query(compiled_query, duckdb_file_path) - - warning_message = colored( - "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project.", - "yellow", - attrs=["bold"], - ) - logging.warning(warning_message) - - test_yaml_path = generate_test_yaml( - model_name, column_names, active_file - ) - test_yaml_path_warning_message = colored( - f"Generated test YAML file: {test_yaml_path}", - "yellow", - attrs=["bold"], - ) - logging.warning(test_yaml_path_warning_message) - - compiled_sql_file = find_compiled_sql_file(file_path) - if compiled_sql_file: - with open(compiled_sql_file, "r") as file: - compiled_query = file.read() - colored_compiled_query = highlight( - compiled_query, SqlLexer(), TerminalFormatter() - ) - logging.info(f"Executing compiled query from: {compiled_sql_file}") - duckdb_file_path = get_duckdb_file_path() - logging.info(f"Using DuckDB file: {duckdb_file_path}") - - start_time = time.time() - result, column_names = execute_query( - compiled_query, duckdb_file_path - ) - query_time = time.time() - start_time - - logging.info(f"`dbt build` time: {compile_time:.2f} seconds") - logging.info(f"Query time: {query_time:.2f} seconds") - - logging.info( - "Result Preview" - + "\n" - + tabulate(result, headers=column_names, tablefmt="grid") - ) - else: - logging.error("Couldn't find the compiled SQL file.") - except Exception as e: - logging.error(f"Error: {e}") - else: - logging.error("Empty query.") - - -if __name__ == "__main__": - setup_logger() - if len(sys.argv) > 1: - active_file_path = sys.argv[1] - else: - active_file_path = None - - project_directory = CURRENT_WORKING_DIR - logging.info(f"Watching directory: {project_directory}") - watch_directory(project_directory, handle_query, active_file_path) diff --git a/fst/main.py b/fst/main.py index 87598e6..e1352c3 100644 --- a/fst/main.py +++ b/fst/main.py @@ -1,7 +1,7 @@ import os import time import click -from watchdog.observers import Observer +from watchdog.observers.polling import PollingObserver from fst.file_utils import get_models_directory from fst.query_handler import handle_query, DynamicQueryHandler, QueryHandler @@ -31,7 +31,7 @@ def start(file_path): click.echo(f"Started watching file: {file_path}") event_handler = QueryHandler(handle_query, file_path) - observer = Observer() + observer = PollingObserver() observer.schedule(event_handler, models_dir, recursive=False) observer.start() From 9b6344bada405063f88a7f5ab915bfc2f82353fe Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 09:02:56 -0700 Subject: [PATCH 06/21] better debounce --- fst/query_handler.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/fst/query_handler.py b/fst/query_handler.py index d30c635..ee21dcc 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -26,20 +26,26 @@ def __init__(self, callback, models_dir: str): def on_modified(self, event): if event.src_path.endswith(".sql"): if os.path.dirname(event.src_path) == self.models_dir: - handle_query_for_file(event.src_path) - if self.debounce_timer is None: - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - else: - self.debounce_timer.cancel() - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() + self.debounce() + self.handle_query_for_file(event.src_path) + + def debounce(self): + if self.debounce_timer is not None: + self.debounce_timer.cancel() + self.debounce_timer = Timer(1.5, self.debounce_query) + self.debounce_timer.start() def debounce_query(self): if self.debounce_timer is not None: self.debounce_timer.cancel() self.debounce_timer = None + def handle_query_for_file(self, file_path): + with open(file_path, "r") as file: + query = file.read() + if query is not None and query.strip(): + handle_query(query, file_path) + class QueryHandler(FileSystemEventHandler): def __init__(self, callback, active_file_path: str): @@ -71,13 +77,6 @@ def debounce_query(self): self.callback(query, self.active_file_path) -def handle_query_for_file(file_path): - with open(file_path, "r") as file: - query = file.read() - if query is not None and query.strip(): - handle_query(query, file_path) - - def handle_query(query, file_path): if query.strip(): try: @@ -130,6 +129,16 @@ def handle_query(query, file_path): ) logger.warning(test_yaml_path_warning_message) + logger.warning("Rerunning `dbt build` with the generated test YAML file...") + time.sleep(0.5) + result_rerun = subprocess.run( + ["dbt", "build", "--select", model_name], + capture_output=True, + text=True, + ) + if result_rerun.returncode == 0: + logger.info("`dbt build` with generated tests was successful.") + logger.info(result.stdout) compiled_sql_file = find_compiled_sql_file(file_path) if compiled_sql_file: From aa5fee65d7ec50750a62d39c56791641bcad3f44 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 09:18:32 -0700 Subject: [PATCH 07/21] simple fst start only --- fst/main.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/fst/main.py b/fst/main.py index e1352c3..cb83269 100644 --- a/fst/main.py +++ b/fst/main.py @@ -15,21 +15,12 @@ def main(): @main.command() -@click.option( - "--file-path", - default=None, - help="Path to the SQL file to be watched.", -) -def start(file_path): +def start(): project_dir = os.path.abspath(".") models_dir = get_models_directory(project_dir) - if file_path is None: - click.echo(f"Started watching directory dynamically: {models_dir}") - event_handler = DynamicQueryHandler(handle_query, models_dir) - else: - click.echo(f"Started watching file: {file_path}") - event_handler = QueryHandler(handle_query, file_path) + click.echo(f"Started watching directory dynamically: {models_dir}") + event_handler = DynamicQueryHandler(handle_query, models_dir) observer = PollingObserver() observer.schedule(event_handler, models_dir, recursive=False) From d8f3db0e24f46093ccffcec1bf7fd95e742cfd1e Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 10:04:01 -0700 Subject: [PATCH 08/21] cleaner imports --- fst/directory_watcher.py | 9 ++++----- fst/main.py | 16 +++------------- fst/query_handler.py | 2 +- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py index f70c7ed..38157da 100644 --- a/fst/directory_watcher.py +++ b/fst/directory_watcher.py @@ -8,12 +8,11 @@ observer = None -def watch_directory(directory: str, callback, active_file_path: str): +def watch_directory(event_handler, file_path: str, ): global observer - logger.info("Started watching directory...") - event_handler = QueryHandler(callback, active_file_path) + logger.info(f"Started watching directory dynamically: {file_path}") observer = PollingObserver() - observer.schedule(event_handler, path=directory, recursive=True) + observer.schedule(event_handler, path=file_path, recursive=False) observer.start() try: @@ -22,4 +21,4 @@ def watch_directory(directory: str, callback, active_file_path: str): except KeyboardInterrupt: observer.stop() observer.join() - logger.info("Stopped watching directory.") \ No newline at end of file + logger.info(f"Stopped watching directory dynamically: {file_path}") \ No newline at end of file diff --git a/fst/main.py b/fst/main.py index cb83269..5e8470e 100644 --- a/fst/main.py +++ b/fst/main.py @@ -4,7 +4,8 @@ from watchdog.observers.polling import PollingObserver from fst.file_utils import get_models_directory -from fst.query_handler import handle_query, DynamicQueryHandler, QueryHandler +from fst.query_handler import handle_query, DynamicQueryHandler +from fst.directory_watcher import watch_directory from fst.logger import setup_logger @@ -19,19 +20,8 @@ def start(): project_dir = os.path.abspath(".") models_dir = get_models_directory(project_dir) - click.echo(f"Started watching directory dynamically: {models_dir}") event_handler = DynamicQueryHandler(handle_query, models_dir) - - observer = PollingObserver() - observer.schedule(event_handler, models_dir, recursive=False) - observer.start() - - try: - while True: - time.sleep(1) - except KeyboardInterrupt: - observer.stop() - observer.join() + watch_directory(event_handler, models_dir) if __name__ == "__main__": diff --git a/fst/query_handler.py b/fst/query_handler.py index ee21dcc..ccc0842 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -87,7 +87,7 @@ def handle_query(query, file_path): return model_name = get_model_name_from_file(active_file) logger.info( - f"Running `dbt build` with the modified SQL file ({model_name})..." + f"Running `dbt build` with the modified SQL file ({active_file})..." ) result = subprocess.run( ["dbt", "build", "--select", model_name], From bec6391e97965aa8363f61c53c28b9e37c9588b1 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 10:12:34 -0700 Subject: [PATCH 09/21] add path option --- fst/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fst/main.py b/fst/main.py index 5e8470e..2517d96 100644 --- a/fst/main.py +++ b/fst/main.py @@ -1,12 +1,11 @@ import os -import time import click -from watchdog.observers.polling import PollingObserver from fst.file_utils import get_models_directory from fst.query_handler import handle_query, DynamicQueryHandler from fst.directory_watcher import watch_directory from fst.logger import setup_logger +from fst.constants import CURRENT_WORKING_DIR @click.group() @@ -15,9 +14,10 @@ def main(): pass -@main.command() -def start(): - project_dir = os.path.abspath(".") +@main.command() #TODO: add file path flag +@click.option("--path", "-p", default=CURRENT_WORKING_DIR, help="dbt project root directory") +def start(path): + project_dir = path models_dir = get_models_directory(project_dir) event_handler = DynamicQueryHandler(handle_query, models_dir) From 00e05429fa1c083b9bc0e1eb307033c185c9c429 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 10:14:37 -0700 Subject: [PATCH 10/21] rename file and imports --- fst/{constants.py => config_defaults.py} | 0 fst/db_utils.py | 2 +- fst/file_utils.py | 2 +- fst/main.py | 3 +-- 4 files changed, 3 insertions(+), 4 deletions(-) rename fst/{constants.py => config_defaults.py} (100%) diff --git a/fst/constants.py b/fst/config_defaults.py similarity index 100% rename from fst/constants.py rename to fst/config_defaults.py diff --git a/fst/db_utils.py b/fst/db_utils.py index 5bfd34e..1957933 100644 --- a/fst/db_utils.py +++ b/fst/db_utils.py @@ -1,7 +1,7 @@ import duckdb import os from functools import lru_cache -from fst.constants import PROFILES +from fst.config_defaults import PROFILES import logging logger = logging.getLogger(__name__) diff --git a/fst/file_utils.py b/fst/file_utils.py index 19848a2..6526bdc 100644 --- a/fst/file_utils.py +++ b/fst/file_utils.py @@ -2,7 +2,7 @@ import yaml import logging import re -from fst.constants import CURRENT_WORKING_DIR +from fst.config_defaults import CURRENT_WORKING_DIR from fst.db_utils import get_project_name logger = logging.getLogger(__name__) diff --git a/fst/main.py b/fst/main.py index 2517d96..5c8b487 100644 --- a/fst/main.py +++ b/fst/main.py @@ -1,11 +1,10 @@ -import os import click from fst.file_utils import get_models_directory from fst.query_handler import handle_query, DynamicQueryHandler from fst.directory_watcher import watch_directory from fst.logger import setup_logger -from fst.constants import CURRENT_WORKING_DIR +from fst.config_defaults import CURRENT_WORKING_DIR @click.group() From 1a809abf736899103b4805c481cfb87b9ec0c42d Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 10:16:11 -0700 Subject: [PATCH 11/21] less words --- fst/directory_watcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py index 38157da..9a10ce2 100644 --- a/fst/directory_watcher.py +++ b/fst/directory_watcher.py @@ -10,7 +10,7 @@ def watch_directory(event_handler, file_path: str, ): global observer - logger.info(f"Started watching directory dynamically: {file_path}") + logger.info(f"Started watching directory: {file_path}") observer = PollingObserver() observer.schedule(event_handler, path=file_path, recursive=False) observer.start() @@ -21,4 +21,4 @@ def watch_directory(event_handler, file_path: str, ): except KeyboardInterrupt: observer.stop() observer.join() - logger.info(f"Stopped watching directory dynamically: {file_path}") \ No newline at end of file + logger.info(f"Stopped watching directory: {file_path}") \ No newline at end of file From a452ee83bf46ea97e58eda5844c5ad51d4cd84ab Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 10:21:05 -0700 Subject: [PATCH 12/21] remove unused stuff --- fst/directory_watcher.py | 1 - fst/main.py | 2 +- fst/query_handler.py | 34 +++------------------------------- 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/fst/directory_watcher.py b/fst/directory_watcher.py index 9a10ce2..182a64a 100644 --- a/fst/directory_watcher.py +++ b/fst/directory_watcher.py @@ -1,7 +1,6 @@ import os import time from watchdog.observers.polling import PollingObserver -from fst.query_handler import QueryHandler import logging logger = logging.getLogger(__name__) diff --git a/fst/main.py b/fst/main.py index 5c8b487..e2f319f 100644 --- a/fst/main.py +++ b/fst/main.py @@ -13,7 +13,7 @@ def main(): pass -@main.command() #TODO: add file path flag +@main.command() @click.option("--path", "-p", default=CURRENT_WORKING_DIR, help="dbt project root directory") def start(path): project_dir = path diff --git a/fst/query_handler.py b/fst/query_handler.py index ccc0842..3676acb 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -47,36 +47,6 @@ def handle_query_for_file(self, file_path): handle_query(query, file_path) -class QueryHandler(FileSystemEventHandler): - def __init__(self, callback, active_file_path: str): - self.callback = callback - self.active_file_path = active_file_path - self.debounce_timer = None - - def on_modified(self, event): - if event.src_path.endswith(".sql"): - active_file = get_active_file(self.active_file_path) - if active_file and active_file == event.src_path: - if self.debounce_timer is None: - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - else: - self.debounce_timer.cancel() - self.debounce_timer = Timer(1.5, self.debounce_query) - self.debounce_timer.start() - - def debounce_query(self): - if self.debounce_timer is not None: - self.debounce_timer.cancel() - self.debounce_timer = None - query = None - with open(self.active_file_path, "r") as file: - query = file.read() - if query is not None and query.strip(): - logger.info(f"Detected modification: {self.active_file_path}") - self.callback(query, self.active_file_path) - - def handle_query(query, file_path): if query.strip(): try: @@ -129,7 +99,9 @@ def handle_query(query, file_path): ) logger.warning(test_yaml_path_warning_message) - logger.warning("Rerunning `dbt build` with the generated test YAML file...") + logger.warning( + "Rerunning `dbt build` with the generated test YAML file..." + ) time.sleep(0.5) result_rerun = subprocess.run( ["dbt", "build", "--select", model_name], From 9b3ad3e8855a17b8518937c0cc8c4cea71a9dd7a Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 10:56:26 -0700 Subject: [PATCH 13/21] robust test runs --- fst/query_handler.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/fst/query_handler.py b/fst/query_handler.py index 3676acb..6ecbe7f 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -98,19 +98,24 @@ def handle_query(query, file_path): f"Generated test YAML file: {test_yaml_path}" ) - logger.warning(test_yaml_path_warning_message) - logger.warning( - "Rerunning `dbt build` with the generated test YAML file..." - ) - time.sleep(0.5) - result_rerun = subprocess.run( - ["dbt", "build", "--select", model_name], - capture_output=True, - text=True, - ) - if result_rerun.returncode == 0: - logger.info("`dbt build` with generated tests was successful.") - logger.info(result.stdout) + # Verify if the newly generated test YAML file exists + if os.path.isfile(test_yaml_path): + logger.warning(test_yaml_path_warning_message) + logger.warning( + "Running `dbt test` with the generated test YAML file..." + ) + time.sleep(0.5) + result_rerun = subprocess.run( + ["dbt", "test", "--select", model_name], + capture_output=True, + text=True, + ) + if result_rerun.returncode == 0: + logger.info("`dbt test` with generated tests was successful.") + logger.info(result_rerun.stdout) + else: + logger.error("Couldn't find the generated test YAML file.") + compiled_sql_file = find_compiled_sql_file(file_path) if compiled_sql_file: From 353b0a73e977807a685b1c88ad833fbc9e002c94 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 11:09:42 -0700 Subject: [PATCH 14/21] robust path flagging --- fst/main.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fst/main.py b/fst/main.py index e2f319f..ee91255 100644 --- a/fst/main.py +++ b/fst/main.py @@ -14,7 +14,15 @@ def main(): @main.command() -@click.option("--path", "-p", default=CURRENT_WORKING_DIR, help="dbt project root directory") +@click.option( + "--path", + "-p", + default=CURRENT_WORKING_DIR, + type=click.Path( + exists=True, dir_okay=True, readable=True, resolve_path=True + ), + help="dbt project root directory. Defaults to current working directory.", +) def start(path): project_dir = path models_dir = get_models_directory(project_dir) From ca1dddf88441863c350dfd66db300d3dbc23679a Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 11:09:51 -0700 Subject: [PATCH 15/21] remove buffer --- fst/query_handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fst/query_handler.py b/fst/query_handler.py index 6ecbe7f..2dce5b4 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -104,7 +104,6 @@ def handle_query(query, file_path): logger.warning( "Running `dbt test` with the generated test YAML file..." ) - time.sleep(0.5) result_rerun = subprocess.run( ["dbt", "test", "--select", model_name], capture_output=True, From 21706c33f612d727fdc4f2b480bdee12c5d7bfc3 Mon Sep 17 00:00:00 2001 From: Sung Won Chung Date: Wed, 22 Mar 2023 11:13:00 -0700 Subject: [PATCH 16/21] update logs --- README.md | 69 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index d0bc4b6..35007e4 100644 --- a/README.md +++ b/README.md @@ -55,38 +55,63 @@ python3 -m pip install --upgrade pip setuptools wheel source venv/bin/activate pip3 install -r requirements.txt pip3 install -e ../ # installing the fst package locally -# fst start --file-path code . -fst start --file-path models/customers.sql +fst start ``` ```shell -# example of running this tool on each modification to the sql file +# example of running this tool on each modification to any SQL file within models/ # pro tip: open up the compiled query in a split IDE window for hot reloading as you develope - ~/De/fst/jaffle_shop_duckdb python ../fst_query.py /Users/sung/Desktop/fst/jaffle_shop_duckdb/models/new_file.sql -2023-03-18 18:39:15 - INFO - Watching directory: /Users/sung/Desktop/fst/jaffle_shop_duckdb -2023-03-18 18:39:34 - INFO - Detected modification: /Users/sung/Desktop/fst/jaffle_shop_duckdb/models/new_file.sql -2023-03-18 18:39:34 - INFO - Running `dbt build` with the modified SQL file (new_file)... -2023-03-18 18:39:37 - INFO - `dbt build` was successful. -2023-03-18 18:39:37 - INFO - project_name: jaffle_shop -2023-03-18 18:39:37 - WARNING - Warning: No tests were run with the `dbt build` command. Consider adding tests to your project. -2023-03-18 18:39:37 - WARNING - Generated test YAML file: /Users/sung/Desktop/fst/jaffle_shop_duckdb/models/new_file.yml -2023-03-18 18:39:37 - INFO - Executing compiled query from: /Users/sung/Desktop/fst/jaffle_shop_duckdb/target/compiled/jaffle_shop/models/new_file.sql -2023-03-18 18:39:37 - INFO - Using DuckDB file: jaffle_shop.duckdb -2023-03-18 18:39:37 - INFO - `dbt build` time: 3.38 seconds -2023-03-18 18:39:37 - INFO - Query time: 0.00 seconds -2023-03-18 18:39:37 - INFO - Result Preview +2023-03-22 11:05:29 - INFO - Running `dbt build` with the modified SQL file (/Users/sung/fst/jaffle_shop_duckdb/models/new_file.sql)... +2023-03-22 11:05:33 - INFO - `dbt build` was successful. +2023-03-22 11:05:33 - INFO - 18:05:32 Running with dbt=1.4.5 +18:05:32 Found 6 models, 20 tests, 0 snapshots, 0 analyses, 297 macros, 0 operations, 3 seed files, 0 sources, 0 exposures, 0 metrics +18:05:32 +18:05:32 Concurrency: 24 threads (target='dev') +18:05:32 +18:05:32 1 of 1 START sql table model main.new_file ..................................... [RUN] +18:05:33 1 of 1 OK created sql table model main.new_file ................................ [OK in 0.12s] +18:05:33 +18:05:33 Finished running 1 table model in 0 hours 0 minutes and 0.25 seconds (0.25s). +18:05:33 +18:05:33 Completed successfully +18:05:33 +18:05:33 Done. PASS=1 WARN=0 ERROR=0 SKIP=0 TOTAL=1 + +2023-03-22 11:05:33 - WARNING - Warning: No tests were run with the `dbt build` command. Consider adding tests to your project. +2023-03-22 11:05:33 - WARNING - Generated test YAML file: /Users/sung/fst/jaffle_shop_duckdb/models/new_file.yml +2023-03-22 11:05:33 - WARNING - Running `dbt test` with the generated test YAML file... +2023-03-22 11:05:37 - INFO - `dbt test` with generated tests was successful. +2023-03-22 11:05:37 - INFO - 18:05:36 Running with dbt=1.4.5 +18:05:36 Found 6 models, 22 tests, 0 snapshots, 0 analyses, 297 macros, 0 operations, 3 seed files, 0 sources, 0 exposures, 0 metrics +18:05:36 +18:05:36 Concurrency: 24 threads (target='dev') +18:05:36 +18:05:36 1 of 2 START test not_null_new_file_customer_id ................................ [RUN] +18:05:36 2 of 2 START test unique_new_file_customer_id .................................. [RUN] +18:05:36 1 of 2 PASS not_null_new_file_customer_id ...................................... [PASS in 0.07s] +18:05:36 2 of 2 PASS unique_new_file_customer_id ........................................ [PASS in 0.07s] +18:05:36 +18:05:36 Finished running 2 tests in 0 hours 0 minutes and 0.17 seconds (0.17s). +18:05:36 +18:05:36 Completed successfully +18:05:36 +18:05:36 Done. PASS=2 WARN=0 ERROR=0 SKIP=0 TOTAL=2 + +2023-03-22 11:05:37 - INFO - Executing compiled query from: /Users/sung/fst/jaffle_shop_duckdb/target/compiled/jaffle_shop/models/new_file.sql +2023-03-22 11:05:37 - INFO - Using DuckDB file: jaffle_shop.duckdb +2023-03-22 11:05:37 - INFO - `dbt build` time: 4.28 seconds +2023-03-22 11:05:37 - INFO - Query time: 0.00 seconds +2023-03-22 11:05:37 - INFO - Result Preview +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+ | customer_id | first_name | last_name | first_order | most_recent_order | number_of_orders | customer_lifetime_value | +===============+==============+=============+===============+=====================+====================+===========================+ -| 51 | Howard | R. | 2018-01-28 | 2018-02-23 | 3 | 99 | +| 1 | Michael | P. | 2018-01-01 | 2018-02-10 | 2 | 33 | +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+ -| 3 | Kathleen | P. | 2018-01-02 | 2018-03-11 | 3 | 65 | -+---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+ -| 46 | Norma | C. | 2018-03-24 | 2018-03-27 | 2 | 64 | +| 2 | Shawn | M. | 2018-01-11 | 2018-01-11 | 1 | 23 | +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+ -| 30 | Christina | W. | 2018-03-02 | 2018-03-14 | 2 | 57 | +| 3 | Kathleen | P. | 2018-01-02 | 2018-03-11 | 3 | 65 | +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+ -| 54 | Rose | M. | 2018-01-07 | 2018-03-24 | 5 | 57 | +| 6 | Sarah | R. | 2018-02-19 | 2018-02-19 | 1 | 8 | +---------------+--------------+-------------+---------------+---------------------+--------------------+---------------------------+ ``` From bafdcd6d63e118721d2cc408fd68f65644cae818 Mon Sep 17 00:00:00 2001 From: Tomek F Date: Wed, 22 Mar 2023 21:05:17 +0100 Subject: [PATCH 17/21] Add note on running dbt build to README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 35007e4..40d6583 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ It works with any SQL file within the `models/` directory of the dbt project. Yo You'll notice for the sake of MVP, I am running nested git clones to get this working. I'll release to pypi soon. + ```bash # my command to run this tool in an infinite loop in a split terminal git clone https://github.com/sungchun12/fst.git @@ -55,6 +56,7 @@ python3 -m pip install --upgrade pip setuptools wheel source venv/bin/activate pip3 install -r requirements.txt pip3 install -e ../ # installing the fst package locally +dbt build # Optional, so that upstream dependencies are created code . fst start ``` From a0e39336b8ace9b3a7c40913e818893d04cdb360 Mon Sep 17 00:00:00 2001 From: tomflo Date: Thu, 23 Mar 2023 18:10:05 +0100 Subject: [PATCH 18/21] Add test finding method and add prompt to ask if tests should be generated --- fst/file_utils.py | 33 ++++++++ fst/query_handler.py | 194 ++++++++++++++++++++++--------------------- 2 files changed, 132 insertions(+), 95 deletions(-) diff --git a/fst/file_utils.py b/fst/file_utils.py index 6526bdc..7229f1e 100644 --- a/fst/file_utils.py +++ b/fst/file_utils.py @@ -27,6 +27,39 @@ def find_compiled_sql_file(file_path): compiled_file_path = os.path.join(compiled_directory, relative_file_path) return compiled_file_path if os.path.exists(compiled_file_path) else None +def find_tests_for_model(model_name, directory='models'): + """ + Check if tests are generated for a given model in a dbt project. + + Args: + model_name (str): The name of the model to search for tests. + directory (str, optional): The root directory to start the search. Defaults to 'models'. + + Returns: + tests_found: True if tests are found for the model, False otherwise. + """ + tests_found = False + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('schema.yml'): + filepath = os.path.join(root, file) + with open(filepath, 'r') as f: + schema_data = yaml.safe_load(f) + + for model in schema_data.get('models', []): + if model['name'] == model_name: + columns = model.get('columns', {}) + for column_name, column_data in columns.items(): + tests = column_data.get('tests', []) + if tests: + tests_found = True + logger.info(f"Tests found for '{model_name}' model in column '{column_name}': {tests}") + + if not tests_found: + logger.info(f"No tests found for the '{model_name}' model.") + + return tests_found + def get_model_name_from_file(file_path: str): project_directory = CURRENT_WORKING_DIR models_directory = os.path.join(project_directory, "models") diff --git a/fst/query_handler.py b/fst/query_handler.py index 2dce5b4..533b2fd 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -8,11 +8,13 @@ from fst.file_utils import ( get_active_file, + find_tests_for_model, get_model_name_from_file, find_compiled_sql_file, generate_test_yaml, ) from fst.db_utils import get_duckdb_file_path, execute_query +from fst.config_defaults import DISABLE_TESTS logger = logging.getLogger(__name__) @@ -47,100 +49,102 @@ def handle_query_for_file(self, file_path): handle_query(query, file_path) + +def generate_and_run_tests(model_name, column_names, active_file): + test_yaml_path = generate_test_yaml( + model_name, column_names, active_file + ) + test_yaml_path_warning_message = ( + f"Generated test YAML file: {test_yaml_path}" + ) + + # Verify if the newly generated test YAML file exists + if os.path.isfile(test_yaml_path): + logger.warning(test_yaml_path_warning_message) + logger.warning( + "Running `dbt test` with the generated test YAML file..." + ) + result_rerun = subprocess.run( + ["dbt", "test", "--select", model_name], + capture_output=True, + text=True, + ) + if result_rerun.returncode == 0: + logger.info("`dbt test` with generated tests was successful.") + else: + logger.info("`dbt test` with generated tests failed.") + + logger.info(result_rerun.stdout) + else: + logger.error("Couldn't find the generated test YAML file.") + def handle_query(query, file_path): - if query.strip(): - try: - start_time = time.time() - - active_file = get_active_file(file_path) - if not active_file: - return - model_name = get_model_name_from_file(active_file) - logger.info( - f"Running `dbt build` with the modified SQL file ({active_file})..." - ) - result = subprocess.run( - ["dbt", "build", "--select", model_name], - capture_output=True, - text=True, - ) - compile_time = time.time() - start_time - - stdout_without_finished = result.stdout.split("Finished running")[0] - - if result.returncode == 0: - logger.info("`dbt build` was successful.") - logger.info(result.stdout) - else: - logger.error("Error running `dbt build`:") - logger.error(result.stdout) - - if ( - "PASS" not in stdout_without_finished - and "FAIL" not in stdout_without_finished - and "ERROR" not in stdout_without_finished - ): - compiled_sql_file = find_compiled_sql_file(file_path) - if compiled_sql_file: - with open(compiled_sql_file, "r") as file: - compiled_query = file.read() - duckdb_file_path = get_duckdb_file_path() - _, column_names = execute_query(compiled_query, duckdb_file_path) - - warning_message = "Warning: No tests were run with the `dbt build` command. Consider adding tests to your project." - - logger.warning(warning_message) - - test_yaml_path = generate_test_yaml( - model_name, column_names, active_file - ) - test_yaml_path_warning_message = ( - f"Generated test YAML file: {test_yaml_path}" - ) - - # Verify if the newly generated test YAML file exists - if os.path.isfile(test_yaml_path): - logger.warning(test_yaml_path_warning_message) - logger.warning( - "Running `dbt test` with the generated test YAML file..." - ) - result_rerun = subprocess.run( - ["dbt", "test", "--select", model_name], - capture_output=True, - text=True, - ) - if result_rerun.returncode == 0: - logger.info("`dbt test` with generated tests was successful.") - logger.info(result_rerun.stdout) + if not query.strip(): + logger.info("Query is empty.") + return + + try: + start_time = time.time() + + active_file = get_active_file(file_path) + if not active_file: + return + model_name = get_model_name_from_file(active_file) + + logger.info( + f"Running `dbt build` with the modified SQL file ({active_file})..." + ) + result = subprocess.run( + ["dbt", "build", "--select", model_name], + capture_output=True, + text=True, + ) + + compile_time = time.time() - start_time + + if result.returncode == 0: + logger.info("`dbt build` was successful.") + logger.info(result.stdout) + else: + logger.error("Error running `dbt build`:") + logger.error(result.stdout) + + + compiled_sql_file = find_compiled_sql_file(file_path) + if compiled_sql_file: + with open(compiled_sql_file, "r") as file: + compiled_query = file.read() + logger.info(f"Executing compiled query from: {compiled_sql_file}") + duckdb_file_path = get_duckdb_file_path() + logger.info(f"Using DuckDB file: {duckdb_file_path}") + + start_time = time.time() + result, column_names = execute_query( + compiled_query, duckdb_file_path + ) + query_time = time.time() - start_time + + logger.info(f"`dbt build` time: {compile_time:.2f} seconds") + logger.info(f"Query time: {query_time:.2f} seconds") + + logger.info( + "Result Preview" + + "\n" + + tabulate(result, headers=column_names, tablefmt="grid") + ) + # Check if tests are generated for the model + tests_exist = find_tests_for_model(model_name) + + if not tests_exist and not DISABLE_TESTS: + response = input(f"No tests found for the '{model_name}' model. Would you like to generate tests? (yes/no): ") + + if response.lower() == 'yes': + logger.info(f"Generating tests for the '{model_name}' model...") + generate_and_run_tests(model_name, column_names, active_file) else: - logger.error("Couldn't find the generated test YAML file.") - - - compiled_sql_file = find_compiled_sql_file(file_path) - if compiled_sql_file: - with open(compiled_sql_file, "r") as file: - compiled_query = file.read() - logger.info(f"Executing compiled query from: {compiled_sql_file}") - duckdb_file_path = get_duckdb_file_path() - logger.info(f"Using DuckDB file: {duckdb_file_path}") - - start_time = time.time() - result, column_names = execute_query( - compiled_query, duckdb_file_path - ) - query_time = time.time() - start_time - - logger.info(f"`dbt build` time: {compile_time:.2f} seconds") - logger.info(f"Query time: {query_time:.2f} seconds") - - logger.info( - "Result Preview" - + "\n" - + tabulate(result, headers=column_names, tablefmt="grid") - ) - else: - logger.error("Couldn't find the compiled SQL file.") - except Exception as e: - logger.error(f"Error: {e}") - else: - logger.error("Empty query.") + logger.info(f"Skipping tests generation for the '{model_name}' model.") + + else: + logger.error("Couldn't find the compiled SQL file.") + except Exception as e: + logger.error(f"Error: {e}") From 8ad5666c6b272e82554c0f72be76301abd568a66 Mon Sep 17 00:00:00 2001 From: tomflo Date: Thu, 23 Mar 2023 18:11:02 +0100 Subject: [PATCH 19/21] Add Flag setting for running tests --- fst/config_defaults.py | 1 + fst/main.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fst/config_defaults.py b/fst/config_defaults.py index 752b7a0..947e11d 100644 --- a/fst/config_defaults.py +++ b/fst/config_defaults.py @@ -2,6 +2,7 @@ import yaml CURRENT_WORKING_DIR = os.getcwd() +DISABLE_TESTS = False # Load profiles.yml only once profiles_path = os.path.join(CURRENT_WORKING_DIR, "profiles.yml") diff --git a/fst/main.py b/fst/main.py index ee91255..10b68d7 100644 --- a/fst/main.py +++ b/fst/main.py @@ -4,8 +4,9 @@ from fst.query_handler import handle_query, DynamicQueryHandler from fst.directory_watcher import watch_directory from fst.logger import setup_logger -from fst.config_defaults import CURRENT_WORKING_DIR +from fst.config_defaults import CURRENT_WORKING_DIR, DISABLE_TESTS +DISABLE_TESTS = False @click.group() def main(): @@ -23,8 +24,15 @@ def main(): ), help="dbt project root directory. Defaults to current working directory.", ) -def start(path): +@click.option( + "--disable-tests", + is_flag=True, + default=False, + help="Disable checking for tests and test generation.", +) +def start(path, disable_tests): project_dir = path + DISABLE_TESTS = disable_tests models_dir = get_models_directory(project_dir) event_handler = DynamicQueryHandler(handle_query, models_dir) From a141f18fb8d271fac10d2e84c454984aa709a103 Mon Sep 17 00:00:00 2001 From: tomflo Date: Fri, 24 Mar 2023 15:29:46 +0100 Subject: [PATCH 20/21] Fix duplicating .yml definitions for models. When schema.yml has descriptions without tests. --- fst/file_utils.py | 86 ++++++++++++++++++++++++++++++++------------ fst/query_handler.py | 10 +++--- 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/fst/file_utils.py b/fst/file_utils.py index 7229f1e..3e429a5 100644 --- a/fst/file_utils.py +++ b/fst/file_utils.py @@ -36,12 +36,13 @@ def find_tests_for_model(model_name, directory='models'): directory (str, optional): The root directory to start the search. Defaults to 'models'. Returns: - tests_found: True if tests are found for the model, False otherwise. + dict: A dictionary containing information about the tests found, including the model name, column name, file type, and tests. """ - tests_found = False + tests_data = {} + for root, _, files in os.walk(directory): for file in files: - if file.endswith('schema.yml'): + if file.endswith(('.schema.yml', '.yml')): filepath = os.path.join(root, file) with open(filepath, 'r') as f: schema_data = yaml.safe_load(f) @@ -49,16 +50,13 @@ def find_tests_for_model(model_name, directory='models'): for model in schema_data.get('models', []): if model['name'] == model_name: columns = model.get('columns', {}) - for column_name, column_data in columns.items(): + for column_data in columns: + column_name = column_data['name'] tests = column_data.get('tests', []) if tests: - tests_found = True - logger.info(f"Tests found for '{model_name}' model in column '{column_name}': {tests}") - - if not tests_found: - logger.info(f"No tests found for the '{model_name}' model.") + tests_data.append({'file': filepath, 'column': column_name, 'tests': tests}) - return tests_found + return tests_data def get_model_name_from_file(file_path: str): project_directory = CURRENT_WORKING_DIR @@ -67,24 +65,68 @@ def get_model_name_from_file(file_path: str): model_name, _ = os.path.splitext(relative_file_path) return model_name.replace(os.sep, ".") -def generate_test_yaml(model_name, column_names, active_file_path): - test_yaml = f"version: 2\n\nmodels:\n - name: {model_name}\n columns:" +import yaml +import re +import os - for column in column_names: - test_yaml += f"\n - name: {column}\n description: 'A placeholder description for {column}'" +def generate_test_yaml(model_name, column_names, active_file_path, tests_data): + yaml_files = {} + for column in column_names: + tests_to_add = [] if re.search(r"(_id|_ID)$", column): - test_yaml += "\n tests:\n - unique\n - not_null" + tests_to_add = ["unique", "not_null"] - active_file_directory = os.path.dirname(active_file_path) - active_file_name, _ = os.path.splitext(os.path.basename(active_file_path)) - new_yaml_file_name = f"{active_file_name}.yml" - new_yaml_file_path = os.path.join(active_file_directory, new_yaml_file_name) + # Check if tests for this column already exist + existing_tests = [data for data in tests_data if data['column'] == column] - with open(new_yaml_file_path, "w") as file: - file.write(test_yaml) + if existing_tests: + # Update the existing YAML file with new tests + for test_data in existing_tests: + yaml_file = test_data['file'] + if yaml_file not in yaml_files: + with open(yaml_file, 'r') as f: + yaml_files[yaml_file] = yaml.safe_load(f) + + models = yaml_files[yaml_file].get('models', []) + for model in models: + if model['name'] == model_name: + columns = model.get('columns', []) + for existing_column in columns: + if existing_column['name'] == column: + tests = existing_column.get('tests', []) + for test in tests_to_add: + if test not in tests: + tests.append(test) + existing_column['tests'] = tests + else: + # If no tests exist, add the tests to the schema.yml file + schema_yml_path = os.path.join(os.path.dirname(active_file_path), "schema.yml") + if os.path.exists(schema_yml_path): + with open(schema_yml_path, "r") as f: + schema_yml_data = yaml.safe_load(f) + + for model in schema_yml_data.get("models", []): + if model["name"] == model_name: + if "columns" not in model: + model["columns"] = [] + + new_column = { + "name": column, + "description": f"A placeholder description for {column}", + "tests": tests_to_add, + } + model["columns"].append(new_column) + break + + with open(schema_yml_path, "w") as f: + yaml.dump(schema_yml_data, f) + + return schema_yml_path + + # Return the first file path where tests were found + return next(iter(yaml_files)) - return new_yaml_file_path def get_model_paths(): with open("dbt_project.yml", "r") as file: diff --git a/fst/query_handler.py b/fst/query_handler.py index 533b2fd..b77cc93 100644 --- a/fst/query_handler.py +++ b/fst/query_handler.py @@ -50,9 +50,9 @@ def handle_query_for_file(self, file_path): -def generate_and_run_tests(model_name, column_names, active_file): +def generate_and_run_tests(model_name, column_names, active_file, tests_data): test_yaml_path = generate_test_yaml( - model_name, column_names, active_file + model_name, column_names, active_file, tests_data ) test_yaml_path_warning_message = ( f"Generated test YAML file: {test_yaml_path}" @@ -133,14 +133,14 @@ def handle_query(query, file_path): + tabulate(result, headers=column_names, tablefmt="grid") ) # Check if tests are generated for the model - tests_exist = find_tests_for_model(model_name) + tests_data = find_tests_for_model(model_name) - if not tests_exist and not DISABLE_TESTS: + if not tests_data and not DISABLE_TESTS: response = input(f"No tests found for the '{model_name}' model. Would you like to generate tests? (yes/no): ") if response.lower() == 'yes': logger.info(f"Generating tests for the '{model_name}' model...") - generate_and_run_tests(model_name, column_names, active_file) + generate_and_run_tests(model_name, column_names, active_file, tests_data) else: logger.info(f"Skipping tests generation for the '{model_name}' model.") From 0286c1be53ecea98cd6ef30e33185d0a7811326f Mon Sep 17 00:00:00 2001 From: tomflo Date: Wed, 29 Mar 2023 16:09:04 +0200 Subject: [PATCH 21/21] Add model name getter --- fst/file_utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fst/file_utils.py b/fst/file_utils.py index 3e429a5..f108121 100644 --- a/fst/file_utils.py +++ b/fst/file_utils.py @@ -27,6 +27,13 @@ def find_compiled_sql_file(file_path): compiled_file_path = os.path.join(compiled_directory, relative_file_path) return compiled_file_path if os.path.exists(compiled_file_path) else None +def get_model_name_from_file(file_path: str): + project_directory = CURRENT_WORKING_DIR + models_directory = os.path.join(project_directory, "models") + relative_file_path = os.path.relpath(file_path, models_directory) + model_name, _ = os.path.splitext(relative_file_path) + return model_name.replace(os.sep, ".") + def find_tests_for_model(model_name, directory='models'): """ Check if tests are generated for a given model in a dbt project. @@ -58,12 +65,7 @@ def find_tests_for_model(model_name, directory='models'): return tests_data -def get_model_name_from_file(file_path: str): - project_directory = CURRENT_WORKING_DIR - models_directory = os.path.join(project_directory, "models") - relative_file_path = os.path.relpath(file_path, models_directory) - model_name, _ = os.path.splitext(relative_file_path) - return model_name.replace(os.sep, ".") + import yaml import re