Skip to content

Commit

Permalink
CTK: Improve transformations for MongoDB Table Loader
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Oct 9, 2024
1 parent 7f6bc85 commit 30accef
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 7 deletions.
6 changes: 6 additions & 0 deletions application/cratedb-toolkit/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ def test_ctk_load_table_mongodb_json(drop_testing_tables):
progress=GitProgressPrinter(),
)

# The `countries-big.json` file contains bogus characters.
countries_big_path = datasets_path / "countries-big.json"
payload = countries_big_path.read_text()
payload = payload.replace("\ufeff", "")
countries_big_path.write_text(payload)

# Invoke data transfer.
command = f"""
ctk load table \
Expand Down
81 changes: 74 additions & 7 deletions application/cratedb-toolkit/zyp-mongodb-json-files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,78 @@
meta:
type: zyp-project
version: 1

collections:
- address:
container: datasets
name: companies
pre:
rules:
- expression: .[] |= del(.image.available_sizes, .screenshots[].available_sizes)
type: jq

- address:
container: datasets
name: books
pre:
rules:
- expression: .[] |= (._id |= tostring)
type: jq

- address:
container: datasets
name: city_inspections
pre:
rules:
- expression: |
.[] |= (
select(true)
| .address.number |= numbers
| .address.zip |= numbers
| .certificate_number |= tostring
)
type: jq
- address:
container: datasets
name: companies
pre:
rules:
- expression: |
.[] |= (
select(true)
|= del(.image.available_sizes, .screenshots[].available_sizes)
| .created_at |= strings
| .updated_at |= strings
)
type: jq
- address:
container: datasets
name: countries-big
pre:
rules:
- expression: .[] |= (.ISO |= tostring)
type: jq

- address:
container: datasets
name: products
pre:
rules:
- expression: |
.[] |= (
select(true)
| if (.for) then .for |= to_array end
| if (.type) then .type |= to_array end
| if (.limits.data.n) then .limits.data.n |= tostring end
| if (.limits.sms.n) then .limits.sms.n |= tostring end
| if (.limits.voice.n) then .limits.voice.n |= tostring end
)
type: jq
- address:
container: datasets
name: restaurant
pre:
rules:
- expression: |
.[] |= (
select(true)
| .rating |= tostring
| .type |= to_array
)
type: jq

0 comments on commit 30accef

Please sign in to comment.