-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
149 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# .github/workflows/run_python_script.yaml | ||
name: Drop Deprecated Spells | ||
|
||
on: | ||
schedule: | ||
- cron: '0 14 * * 1' # At 9 AM EST every Monday (14:00 UTC) | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
run-script: | ||
runs-on: [ self-hosted, linux, spellbook-trino-ci ] | ||
timeout-minutes: 10 # Timeout set to 10 minutes | ||
|
||
steps: | ||
- name: Check out repository code | ||
uses: actions/checkout@v3 | ||
with: | ||
ref: main # Specify the main branch | ||
|
||
- name: Setup Python environment | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: '3.9' # or whichever version your script requires | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install psycopg2-binary # Add other dependencies here | ||
- name: Set environment variables | ||
run: | | ||
echo "SH_METASTORE_PROD_PASS=${{ secrets.SH_METASTORE_PROD_PASS }}" >> $GITHUB_ENV | ||
# Set any other necessary environment variables here | ||
- name: Run Python script | ||
run: python drop_deprecated_spells.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import subprocess | ||
import json | ||
import os | ||
|
||
def generate_tables_query(): | ||
return """ | ||
SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" | ||
FROM "TBLS" t | ||
JOIN "DBS" d ON d."DB_ID" = t."DB_ID" | ||
JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" | ||
WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' | ||
AND tp."PARAM_VALUE" = 'abstraction' | ||
AND t."OWNER_TYPE" = 'USER' | ||
AND t."OWNER" = 'spellbook' | ||
AND t."TBL_TYPE" = 'EXTERNAL_TABLE'; | ||
""" | ||
|
||
def generate_views_query(): | ||
return """ | ||
SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME" | ||
FROM "TBLS" t | ||
JOIN "DBS" d ON d."DB_ID" = t."DB_ID" | ||
JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID" | ||
WHERE tp."PARAM_KEY" = 'dune.data_explorer.category' | ||
AND tp."PARAM_VALUE" = 'abstraction' | ||
AND t."OWNER_TYPE" = 'USER' | ||
AND t."OWNER" = 'spellbook' | ||
AND t."TBL_TYPE" = 'VIRTUAL_VIEW'; | ||
""" | ||
|
||
def run_psql_command(sql_query): | ||
postgres_host = "prod-metastore-db" | ||
postgres_password = os.environ.get("SH_METASTORE_PROD_PASS") | ||
|
||
psql_command = [ | ||
'psql', | ||
'-h', postgres_host, | ||
'-p', '5432', | ||
'-U', 'hive', | ||
'-t', | ||
'-c', sql_query | ||
] | ||
|
||
psql_process = subprocess.run( | ||
psql_command, | ||
text=True, | ||
env=dict(os.environ, PGPASSWORD=postgres_password), | ||
capture_output=True | ||
) | ||
|
||
if psql_process.returncode != 0: | ||
print("Error executing psql command:") | ||
print("psql_process.stderr:", psql_process.stderr) | ||
return [] | ||
|
||
result_lines = psql_process.stdout.splitlines() | ||
|
||
if not result_lines[-1]: | ||
result_lines.pop() | ||
|
||
return result_lines | ||
|
||
# Main script | ||
|
||
# Step 1: List dbt models and output in JSON format | ||
dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json'] | ||
dbt_output_bytes = subprocess.check_output(dbt_command) | ||
dbt_output_str = dbt_output_bytes.decode('utf-8') | ||
dbt_lines = dbt_output_str.splitlines() | ||
dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')] | ||
dbt_data_list = [json.loads(obj) for obj in dbt_json_objects] | ||
|
||
# Iterate through each JSON object and categorize based on 'materialized' | ||
view_models_dbt = [] | ||
table_models_dbt = [] | ||
|
||
for data in dbt_data_list: | ||
materialized = data.get('config', {}).get('materialized', '').lower() | ||
schema = data.get('config', {}).get('schema', 'schema_not_found') | ||
alias = data.get('config', {}).get('alias', 'alias_not_found') | ||
|
||
if materialized == 'view': | ||
view_models_dbt.append(f"{schema}.{alias}") | ||
elif materialized == 'table' or materialized == 'incremental': | ||
table_models_dbt.append(f"{schema}.{alias}") | ||
|
||
# Generate SQL queries | ||
tables_sql_query = generate_tables_query() | ||
views_sql_query = generate_views_query() | ||
|
||
# Run psql queries for tables and views | ||
psql_tables = run_psql_command(tables_sql_query) | ||
psql_views = run_psql_command(views_sql_query) | ||
|
||
# Trim whitespace from PostgreSQL tables and views | ||
psql_tables = [table.strip() for table in psql_tables] | ||
psql_views = [view.strip() for view in psql_views] | ||
|
||
# Compare psql_views vs. view_models_dbt | ||
print("\nViews in PostgreSQL but not in DBT:") | ||
for view in psql_views: | ||
if view not in view_models_dbt: | ||
print(f"DROP VIEW IF EXISTS {view};") | ||
|
||
# Compare psql_tables vs. table_models_dbt | ||
print("\nTables in PostgreSQL but not in DBT:") | ||
for table in psql_tables: | ||
if table not in table_models_dbt: | ||
print(f"DROP TABLE IF EXISTS {table};") |
This file was deleted.
Oops, something went wrong.