-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
200 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
name: CrateDB Toolkit | ||
|
||
on: | ||
pull_request: | ||
branches: ~ | ||
paths: | ||
- '.github/workflows/application-cratedb-toolkit.yml' | ||
- 'application/cratedb-toolkit/**' | ||
- '/requirements.txt' | ||
push: | ||
branches: [ main ] | ||
paths: | ||
- '.github/workflows/application-cratedb-toolkit.yml' | ||
- 'application/cratedb-toolkit/**' | ||
- '/requirements.txt' | ||
|
||
# Allow job to be triggered manually. | ||
workflow_dispatch: | ||
|
||
# Run job each night after CrateDB nightly has been published. | ||
schedule: | ||
- cron: '0 3 * * *' | ||
|
||
# Cancel in-progress jobs when pushing to the same branch. | ||
concurrency: | ||
cancel-in-progress: true | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
|
||
jobs: | ||
|
||
test: | ||
name: " | ||
Python: ${{ matrix.python-version }} | ||
CrateDB: ${{ matrix.cratedb-version }} | ||
on ${{ matrix.os }}" | ||
runs-on: ${{ matrix.os }} | ||
|
||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ ubuntu-22.04 ] | ||
python-version: [ "3.9", "3.12" ] | ||
cratedb-version: [ 'nightly' ] | ||
|
||
services: | ||
cratedb: | ||
image: crate/crate:${{ matrix.cratedb-version }} | ||
ports: | ||
- 4200:4200 | ||
- 5432:5432 | ||
env: | ||
CRATE_HEAP_SIZE: 4g | ||
|
||
steps: | ||
|
||
- name: Acquire sources | ||
uses: actions/checkout@v4 | ||
|
||
- name: Setup Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
architecture: x64 | ||
cache: "pip" | ||
cache-dependency-path: | | ||
pyproject.toml | ||
requirements.txt | ||
requirements-test.txt | ||
- name: Install utilities | ||
run: | | ||
pip install -r requirements.txt | ||
- name: Validate application/cratedb-toolkit | ||
run: | | ||
ngr test --accept-no-venv application/cratedb-toolkit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[tool.pytest.ini_options] | ||
minversion = "2.0" | ||
addopts = """ | ||
-rfEXs -p pytester --strict-markers --verbosity=3 | ||
--capture=no | ||
""" | ||
log_level = "DEBUG" | ||
log_cli_level = "DEBUG" | ||
testpaths = ["*.py"] | ||
xfail_strict = true | ||
markers = [ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
gitpython<4 | ||
platformdirs<5 | ||
pytest<9 | ||
requests<3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cratedb-toolkit[mongodb]==0.0.23 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import shlex | ||
import sys | ||
|
||
import requests | ||
import logging | ||
import platformdirs | ||
from cratedb_toolkit.util import DatabaseAdapter | ||
from git import Repo, RemoteProgress | ||
import subprocess | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class GitProgressPrinter(RemoteProgress): | ||
def update(self, op_code, cur_count, max_count=None, message=""): | ||
print( | ||
op_code, | ||
cur_count, | ||
max_count, | ||
cur_count / (max_count or 100.0), | ||
message or "NO MESSAGE", | ||
file=sys.stderr, | ||
) | ||
|
||
|
||
def test_ctk_load_table_mongodb_json(): | ||
""" | ||
Probe importing data from MongoDB Extended JSON files. | ||
""" | ||
|
||
# Define table names used for testing. | ||
table_names = [ | ||
"books", | ||
"city_inspections", | ||
"companies", | ||
"countries-big", | ||
"countries-small", | ||
"covers", | ||
"grades", | ||
"products", | ||
"profiles", | ||
"restaurant", | ||
"students", | ||
] | ||
|
||
# Define table cardinalities used in validation step. | ||
table_cardinalities = { | ||
"books": 431, | ||
"city_inspections": 81047, | ||
"companies": 2537, | ||
"countries-big": 21640, | ||
"countries-small": 248, | ||
"covers": 5071, | ||
"grades": 280, | ||
"products": 11, | ||
"profiles": 1515, | ||
"restaurant": 2548, | ||
"students": 200, | ||
} | ||
|
||
db = DatabaseAdapter("crate://localhost:4200/?schema=from-mongodb") | ||
|
||
# Drop tables for blank canvas. | ||
for table_name in table_names: | ||
db.drop_table(table_name) | ||
|
||
# Define path to source data. | ||
mongodb_json_files_path = platformdirs.user_cache_path("cratedb-examples") / "mongodb_json_files" | ||
datasets_path = mongodb_json_files_path / "datasets" | ||
|
||
# Acquire source data. | ||
if not datasets_path.exists(): | ||
repository_url = "https://github.com/ozlerhakan/mongodb-json-files" | ||
print(f"Downloading repository: {repository_url}", file=sys.stderr) | ||
Repo.clone_from( | ||
url="https://github.com/ozlerhakan/mongodb-json-files", | ||
to_path=mongodb_json_files_path, | ||
progress=GitProgressPrinter(), | ||
) | ||
|
||
# Invoke data transfer. | ||
command = f""" | ||
ctk load table \ | ||
"file+bson://{datasets_path}/*.json?batch-size=2500" \ | ||
--cratedb-sqlalchemy-url="crate://localhost:4200/from-mongodb" | ||
""" | ||
print(f"Invoking CTK: {command}", file=sys.stderr) | ||
subprocess.check_call(shlex.split(command)) | ||
|
||
# Validate data in database. | ||
results = db.run_sql("SHOW TABLES", records=True) | ||
results = [item["table_name"] for item in results] | ||
assert results == table_names | ||
|
||
cardinalities = {} | ||
for table_name, cardinality in table_cardinalities.items(): | ||
cardinalities[table_name] = db.count_records(table_name) | ||
assert cardinalities == table_cardinalities |