diff --git a/application/cratedb-toolkit/test_io.py b/application/cratedb-toolkit/test_io.py index efb727dc..dbe57303 100644 --- a/application/cratedb-toolkit/test_io.py +++ b/application/cratedb-toolkit/test_io.py @@ -134,6 +134,12 @@ def test_ctk_load_table_mongodb_json(drop_testing_tables): progress=GitProgressPrinter(), ) + # The `countries-big.json` file contains bogus characters. + countries_big_path = datasets_path / "countries-big.json" + payload = countries_big_path.read_text() + payload = payload.replace("\ufeff", "") + countries_big_path.write_text(payload) + # Invoke data transfer. command = f""" ctk load table \ diff --git a/application/cratedb-toolkit/zyp-mongodb-json-files.yaml b/application/cratedb-toolkit/zyp-mongodb-json-files.yaml index 31605164..6cb5b459 100644 --- a/application/cratedb-toolkit/zyp-mongodb-json-files.yaml +++ b/application/cratedb-toolkit/zyp-mongodb-json-files.yaml @@ -38,11 +38,78 @@ meta: type: zyp-project version: 1 + collections: -- address: - container: datasets - name: companies - pre: - rules: - - expression: .[] |= del(.image.available_sizes, .screenshots[].available_sizes) - type: jq + + - address: + container: datasets + name: books + pre: + rules: + - expression: .[] |= (._id |= tostring) + type: jq + + - address: + container: datasets + name: city_inspections + pre: + rules: + - expression: | + .[] |= ( + select(true) + | .address.number |= numbers + | .address.zip |= numbers + | .certificate_number |= tostring + ) + type: jq + + - address: + container: datasets + name: companies + pre: + rules: + - expression: | + .[] |= ( + select(true) + |= del(.image.available_sizes, .screenshots[].available_sizes) + | .created_at |= strings + | .updated_at |= strings + ) + type: jq + + - address: + container: datasets + name: countries-big + pre: + rules: + - expression: .[] |= (.ISO |= tostring) + type: jq + + - address: + container: datasets + name: products + pre: + rules: + - expression: | + .[] |= ( + select(true) + | if (.for) then .for |= to_array end + | if (.type) then .type |= to_array end + | if (.limits.data.n) then .limits.data.n |= tostring end + | if (.limits.sms.n) then .limits.sms.n |= tostring end + | if (.limits.voice.n) then .limits.voice.n |= tostring end + ) + type: jq + + - address: + container: datasets + name: restaurant + pre: + rules: + - expression: | + .[] |= ( + select(true) + | .rating |= tostring + | .type |= to_array + ) + type: jq