Skip to content

Commit

Permalink
[skip ci] add Oracle support (aws#629)
Browse files Browse the repository at this point in the history
  • Loading branch information
cnfait committed Jun 22, 2022
1 parent aa2a3d2 commit 4bf77b6
Show file tree
Hide file tree
Showing 14 changed files with 786 additions and 18 deletions.
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ or

* Install dependencies:

``poetry install --extras "sqlserver sparql"``
``poetry install --extras "sqlserver oracle sparql"``

* Run the validation script:

Expand Down Expand Up @@ -135,7 +135,7 @@ or

* Install dependencies:

``poetry install --extras "sqlserver sparql"``
``poetry install --extras "sqlserver oracle sparql"``

* Go to the ``test_infra`` directory

Expand Down Expand Up @@ -192,7 +192,7 @@ or

* Then run the command bellow to install all dependencies:

``poetry install --extras "sqlserver sparql"``
``poetry install --extras "sqlserver oracle sparql"``

* Go to the ``test_infra`` directory

Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3
- [004 - Parquet Datasets](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/004%20-%20Parquet%20Datasets.ipynb)
- [005 - Glue Catalog](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/005%20-%20Glue%20Catalog.ipynb)
- [006 - Amazon Athena](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/006%20-%20Amazon%20Athena.ipynb)
- [007 - Databases (Redshift, MySQL, PostgreSQL and SQL Server)](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/007%20-%20Redshift%2C%20MySQL%2C%20PostgreSQL%2C%20SQL%20Server.ipynb)
- [007 - Databases (Redshift, MySQL, PostgreSQL, SQL Server and Oracle)](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/007%20-%20Redshift%2C%20MySQL%2C%20PostgreSQL%2C%20SQL%20Server%2C%20Oracle.ipynb)
- [008 - Redshift - Copy & Unload.ipynb](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/008%20-%20Redshift%20-%20Copy%20%26%20Unload.ipynb)
- [009 - Redshift - Append, Overwrite and Upsert](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/009%20-%20Redshift%20-%20Append%2C%20Overwrite%2C%20Upsert.ipynb)
- [010 - Parquet Crawler](https://github.com/awslabs/aws-data-wrangler/blob/main/tutorials/010%20-%20Parquet%20Crawler.ipynb)
Expand Down Expand Up @@ -151,6 +151,7 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3
- [PostgreSQL](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#postgresql)
- [MySQL](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#mysql)
- [SQL Server](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#sqlserver)
- [Oracle](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#oracle)
- [Data API Redshift](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#data-api-redshift)
- [Data API RDS](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#data-api-rds)
- [OpenSearch](https://aws-data-wrangler.readthedocs.io/en/2.15.1/api.html#opensearch)
Expand Down
2 changes: 2 additions & 0 deletions awswrangler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
mysql,
neptune,
opensearch,
oracle,
postgresql,
quicksight,
redshift,
Expand All @@ -42,6 +43,7 @@
"dynamodb",
"exceptions",
"opensearch",
"oracle",
"quicksight",
"s3",
"sts",
Expand Down
35 changes: 35 additions & 0 deletions awswrangler/_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,41 @@ def pyarrow2mysql( # pylint: disable=too-many-branches,too-many-return-statemen
raise exceptions.UnsupportedType(f"Unsupported MySQL type: {dtype}")


def pyarrow2oracle( # pylint: disable=too-many-branches,too-many-return-statements
dtype: pa.DataType, string_type: str
) -> str:
"""Pyarrow to Oracle Database data types conversion."""
if pa.types.is_int8(dtype):
return "NUMBER(3)"
if pa.types.is_int16(dtype) or pa.types.is_uint8(dtype):
return "NUMBER(5)"
if pa.types.is_int32(dtype) or pa.types.is_uint16(dtype):
return "NUMBER(10)"
if pa.types.is_int64(dtype) or pa.types.is_uint32(dtype):
return "NUMBER(19)"
if pa.types.is_uint64(dtype):
raise exceptions.UnsupportedType("There is no support for uint64, please consider int64 or uint32.")
if pa.types.is_float32(dtype):
return "BINARY_FLOAT"
if pa.types.is_float64(dtype):
return "BINARY_DOUBLE"
if pa.types.is_boolean(dtype):
return "NUMBER(3)"
if pa.types.is_string(dtype):
return string_type
if pa.types.is_timestamp(dtype):
return "TIMESTAMP"
if pa.types.is_date(dtype):
return "DATE"
if pa.types.is_decimal(dtype):
return f"NUMBER({dtype.precision},{dtype.scale})"
if pa.types.is_dictionary(dtype):
return pyarrow2oracle(dtype=dtype.value_type, string_type=string_type)
if pa.types.is_binary(dtype):
return "RAW"
raise exceptions.UnsupportedType(f"Unsupported Oracle type: {dtype}")


def pyarrow2postgresql( # pylint: disable=too-many-branches,too-many-return-statements
dtype: pa.DataType, string_type: str
) -> str:
Expand Down
23 changes: 21 additions & 2 deletions awswrangler/_databases.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Databases Utilities."""

import importlib.util
import logging
import ssl
from typing import Any, Dict, Generator, Iterator, List, NamedTuple, Optional, Tuple, Union, cast
Expand All @@ -11,6 +12,10 @@
from awswrangler import _data_types, _utils, exceptions, secretsmanager
from awswrangler.catalog import get_connection

_cx_Oracle_found = importlib.util.find_spec("cx_Oracle")
if _cx_Oracle_found:
import cx_Oracle # pylint: disable=import-error

_logger: logging.Logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -42,7 +47,7 @@ def _get_connection_attributes_from_catalog(
database_sep = ";databaseName="
else:
database_sep = "/"
port, database = details["JDBC_CONNECTION_URL"].split(":")[3].split(database_sep)
port, database = details["JDBC_CONNECTION_URL"].split(":")[-1].split(database_sep)
ssl_context: Optional[ssl.SSLContext] = None
if details.get("JDBC_ENFORCE_SSL") == "true":
ssl_cert_path: Optional[str] = details.get("CUSTOM_JDBC_CERT")
Expand All @@ -57,11 +62,12 @@ def _get_connection_attributes_from_catalog(
f"No CA certificate found at {ssl_cert_path}."
)
ssl_context = ssl.create_default_context(cadata=ssl_cadata)

return ConnectionAttributes(
kind=details["JDBC_CONNECTION_URL"].split(":")[1].lower(),
user=details["USERNAME"],
password=details["PASSWORD"],
host=details["JDBC_CONNECTION_URL"].split(":")[2].replace("/", ""),
host=details["JDBC_CONNECTION_URL"].split(":")[-2].replace("/", "").replace("@", ""),
port=int(port),
database=dbname if dbname is not None else database,
ssl_context=ssl_context,
Expand Down Expand Up @@ -122,6 +128,16 @@ def _convert_params(sql: str, params: Optional[Union[List[Any], Tuple[Any, ...],
return args


def _convert_db_specific_objects(col_values: List[Any]) -> List[Any]:
if _cx_Oracle_found:
if any(isinstance(col_value, cx_Oracle.LOB) for col_value in col_values):
col_values = [
col_value.read() if isinstance(col_value, cx_Oracle.LOB) else col_value for col_value in col_values
]

return col_values


def _records2df(
records: List[Tuple[Any]],
cols_names: List[str],
Expand All @@ -133,12 +149,15 @@ def _records2df(
arrays: List[pa.Array] = []
for col_values, col_name in zip(tuple(zip(*records)), cols_names): # Transposing
if (dtype is None) or (col_name not in dtype):
col_values = _convert_db_specific_objects(col_values)
try:
array: pa.Array = pa.array(obj=col_values, safe=safe) # Creating Arrow array
except pa.ArrowInvalid as ex:
array = _data_types.process_not_inferred_array(ex, values=col_values) # Creating Arrow array
else:
try:
if dtype[col_name] == pa.string():
col_values = _convert_db_specific_objects(col_values)
array = pa.array(obj=col_values, type=dtype[col_name], safe=safe) # Creating Arrow array with dtype
except pa.ArrowInvalid:
array = pa.array(obj=col_values, safe=safe) # Creating Arrow array
Expand Down
Loading

0 comments on commit 4bf77b6

Please sign in to comment.