Skip to content

Commit

Permalink
rename script, create gh action
Browse files Browse the repository at this point in the history
  • Loading branch information
jeff-dude committed Dec 5, 2023
1 parent ea379d1 commit 8756bc1
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 141 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/drop_deprecated_spells.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# .github/workflows/run_python_script.yaml
name: Drop Deprecated Spells

on:
schedule:
- cron: '0 14 * * 1' # At 9 AM EST every Monday (14:00 UTC)
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
run-script:
runs-on: [ self-hosted, linux, spellbook-trino-ci ]
timeout-minutes: 10 # Timeout set to 10 minutes

steps:
- name: Check out repository code
uses: actions/checkout@v3
with:
ref: main # Specify the main branch

- name: Setup Python environment
uses: actions/setup-python@v3
with:
python-version: '3.9' # or whichever version your script requires

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install psycopg2-binary # Add other dependencies here
- name: Set environment variables
run: |
echo "SH_METASTORE_PROD_PASS=${{ secrets.SH_METASTORE_PROD_PASS }}" >> $GITHUB_ENV
# Set any other necessary environment variables here
- name: Run Python script
run: python drop_deprecated_spells.py
109 changes: 109 additions & 0 deletions scripts/drop_deprecated_spells.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import subprocess
import json
import os

def generate_tables_query():
return """
SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME"
FROM "TBLS" t
JOIN "DBS" d ON d."DB_ID" = t."DB_ID"
JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID"
WHERE tp."PARAM_KEY" = 'dune.data_explorer.category'
AND tp."PARAM_VALUE" = 'abstraction'
AND t."OWNER_TYPE" = 'USER'
AND t."OWNER" = 'spellbook'
AND t."TBL_TYPE" = 'EXTERNAL_TABLE';
"""

def generate_views_query():
return """
SELECT DISTINCT d."NAME" || '.' || t."TBL_NAME"
FROM "TBLS" t
JOIN "DBS" d ON d."DB_ID" = t."DB_ID"
JOIN "TABLE_PARAMS" tp ON tp."TBL_ID" = t."TBL_ID"
WHERE tp."PARAM_KEY" = 'dune.data_explorer.category'
AND tp."PARAM_VALUE" = 'abstraction'
AND t."OWNER_TYPE" = 'USER'
AND t."OWNER" = 'spellbook'
AND t."TBL_TYPE" = 'VIRTUAL_VIEW';
"""

def run_psql_command(sql_query):
postgres_host = "prod-metastore-db"
postgres_password = os.environ.get("SH_METASTORE_PROD_PASS")

psql_command = [
'psql',
'-h', postgres_host,
'-p', '5432',
'-U', 'hive',
'-t',
'-c', sql_query
]

psql_process = subprocess.run(
psql_command,
text=True,
env=dict(os.environ, PGPASSWORD=postgres_password),
capture_output=True
)

if psql_process.returncode != 0:
print("Error executing psql command:")
print("psql_process.stderr:", psql_process.stderr)
return []

result_lines = psql_process.stdout.splitlines()

if not result_lines[-1]:
result_lines.pop()

return result_lines

# Main script

# Step 1: List dbt models and output in JSON format
dbt_command = ['dbt', 'ls', '--resource-type', 'model', '--output', 'json']
dbt_output_bytes = subprocess.check_output(dbt_command)
dbt_output_str = dbt_output_bytes.decode('utf-8')
dbt_lines = dbt_output_str.splitlines()
dbt_json_objects = [line for line in dbt_lines if line.strip().startswith('{')]
dbt_data_list = [json.loads(obj) for obj in dbt_json_objects]

# Iterate through each JSON object and categorize based on 'materialized'
view_models_dbt = []
table_models_dbt = []

for data in dbt_data_list:
materialized = data.get('config', {}).get('materialized', '').lower()
schema = data.get('config', {}).get('schema', 'schema_not_found')
alias = data.get('config', {}).get('alias', 'alias_not_found')

if materialized == 'view':
view_models_dbt.append(f"{schema}.{alias}")
elif materialized == 'table' or materialized == 'incremental':
table_models_dbt.append(f"{schema}.{alias}")

# Generate SQL queries
tables_sql_query = generate_tables_query()
views_sql_query = generate_views_query()

# Run psql queries for tables and views
psql_tables = run_psql_command(tables_sql_query)
psql_views = run_psql_command(views_sql_query)

# Trim whitespace from PostgreSQL tables and views
psql_tables = [table.strip() for table in psql_tables]
psql_views = [view.strip() for view in psql_views]

# Compare psql_views vs. view_models_dbt
print("\nViews in PostgreSQL but not in DBT:")
for view in psql_views:
if view not in view_models_dbt:
print(f"DROP VIEW IF EXISTS {view};")

# Compare psql_tables vs. table_models_dbt
print("\nTables in PostgreSQL but not in DBT:")
for table in psql_tables:
if table not in table_models_dbt:
print(f"DROP TABLE IF EXISTS {table};")
141 changes: 0 additions & 141 deletions scripts/drop_spells_removed_from_spellbook.py

This file was deleted.

0 comments on commit 8756bc1

Please sign in to comment.