Skip to content

Commit

Permalink
feat: refactored adding support for RETL and Profiles
Browse files Browse the repository at this point in the history
  • Loading branch information
Ranjeet committed Oct 3, 2024
1 parent ff123fe commit 6041849
Show file tree
Hide file tree
Showing 15 changed files with 848 additions and 495 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ jobs:
- name: Install dependencies
run: |
pip3 install -r requirements.txt
- name: Test with unittest
- name: Test with pytest
run: |
pip3 install pytest-cov
make test
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
files: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*.egg-info
**/__pycache__
.vscode
.venv/


# Distribution / packaging
Expand Down Expand Up @@ -36,4 +37,4 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
cover/
cover/
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
.PHONY: test
test:
python3 -m unittest discover -s rudder_airflow_provider/test
pytest --cov=rudder_airflow_provider rudder_airflow_provider/test --cov-report=xml
34 changes: 34 additions & 0 deletions examples/profiles_sample_dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from datetime import datetime, timedelta

from airflow import DAG

from rudder_airflow_provider.operators.rudderstack import RudderstackProfilesOperator

default_args = {
"owner": "airflow",
"depends_on_past": False,
"email": ["[email protected]"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
}

with DAG(
"rudderstack-profiles-sample",
default_args=default_args,
description="A simple tutorial DAG",
schedule_interval=timedelta(days=1),
start_date=datetime(2021, 1, 1),
catchup=False,
tags=["rs-profiles"],
) as dag:
# profile_id is template field
rs_operator = RudderstackProfilesOperator(
profile_id="{{ var.value.profile_id }}",
task_id="<replace task id>",
connection_id="<rudderstack api connection id>",
)

if __name__ == "__main__":
dag.test()
29 changes: 15 additions & 14 deletions examples/retl_sample.dag.py → examples/retl_sample_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,31 @@
from rudder_airflow_provider.operators.rudderstack import RudderstackRETLOperator

default_args = {
'owner': 'airflow',
'depends_on_past': False,
'email': ['[email protected]'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5)
"owner": "airflow",
"depends_on_past": False,
"email": ["[email protected]"],
"email_on_failure": False,
"email_on_retry": False,
"retries": 1,
"retry_delay": timedelta(minutes=5),
}

with DAG('rudderstack-sample',
with DAG(
"rudderstack-sample",
default_args=default_args,
description='A simple tutorial DAG',
description="A simple tutorial DAG",
schedule_interval=timedelta(days=1),
start_date=datetime(2021, 1, 1),
catchup=False,
tags=['rs']) as dag:
tags=["rs"],
) as dag:
# retl_connection_id, sync_type are template fields
rs_operator = RudderstackRETLOperator(
retl_connection_id="{{ var.value.retl_connection_id }}",
task_id='<replace task id>',
connection_id='<rudderstack api connection id>',
task_id="<replace task id>",
connection_id="<rudderstack api connection id>",
sync_type="{{ var.value.sync_type }}",
wait_for_completion=True
)

if __name__ == "__main__":
dag.test()
dag.test()
24 changes: 0 additions & 24 deletions examples/sample_dag.py

This file was deleted.

32 changes: 32 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,35 @@ requires = [
"wheel"
]
build-backend = "setuptools.build_meta"

[build-system]
requires = [
"setuptools >= 61.0",
"wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "rudderstack-airflow-provider"
version = "2.0.0"
readme = "README.md"
license = {file = "LICENSE"}
description = "Apache airflow provider for managing Reverse ETL syncs and Profiles runs in RudderStack."
keywords = [ "airflow", "orchestration", "rudderstack"]
classifiers = [
"Framework :: Apache Airflow",
"Framework :: Apache Airflow :: Provider",
]
dependencies = [
"apache-airflow",
"pytest",
"requests",
"responses",
"setuptools"
]
requires-python = ">= 3.6"

[tool.setuptools.packages.find]
exclude = *test*

[project.entry-points.apache_airflow_provider]
provider_info = "sample_provider.__init__:get_provider_info"
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
apache-airflow == 2.8.0
requests == 2.28.2
apache-airflow == 2.10.0
requests == 2.32.3
setuptools == 65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
pytest==7.3.1
6 changes: 3 additions & 3 deletions rudder_airflow_provider/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
def get_provider_info():
return {
'package-name': 'rudderstack-airflow-provider',
'name': 'rudderstack-airflow-provider',
'description': 'Apache airflow provider for Rudderstack'
"package-name": "rudderstack-airflow-provider",
"name": "rudderstack-airflow-provider",
"description": "Apache airflow provider for Rudderstack",
}
Loading

0 comments on commit 6041849

Please sign in to comment.