Skip to content

Commit

Permalink
MongoDB: Improve type mapper discriminating between INTEGER and BIGINT
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Aug 21, 2024
1 parent ffd6bd1 commit d1c0740
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@


## Unreleased
- MongoDB: Improve type mapper by discriminating between
`INTEGER` and `BIGINT`

## 2024/08/19 v0.0.17
- Processor: Updated Kinesis Lambda processor to understand AWS DMS
Expand Down
16 changes: 14 additions & 2 deletions cratedb_toolkit/io/mongodb/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,5 +182,17 @@ def extract_schema_from_array(array: list, schema: dict):
}


def get_type(o):
return TYPES_MAP.get(type(o), "UNKNOWN")
def get_type(value):
"""
Resolve value type via type map, with special treatment for integer types.
INTEGER: -2^31 to 2^31-1
BIGINT: -2^63 to 2^63-1
"""
type_ = type(value)
if type_ is int:
if -(2**31) <= value <= 2**31 - 1:
return "INTEGER"
else:
return "BIGINT"
return TYPES_MAP.get(type_, "UNKNOWN")
15 changes: 15 additions & 0 deletions tests/io/mongodb/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,21 @@ def test_primitive_types(self):
schema = trim_schema(extract.extract_schema_from_document(data, {}))
self.assertDictEqual(schema, expected)

def test_integer_types(self):
"""
Validate extraction of numeric types INTEGER vs. BIGINT.
"""
data = {
"integer": 2147483647,
"bigint": 1563051934000,
}
expected = {
"integer": "INTEGER",
"bigint": "BIGINT",
}
schema = trim_schema(extract.extract_schema_from_document(data, {}))
self.assertDictEqual(schema, expected)

def test_bson_types(self):
data = {
"a": bson.ObjectId("55153a8014829a865bbf700d"),
Expand Down

0 comments on commit d1c0740

Please sign in to comment.