Update hive.py

datahub-project · Nov 12, 2024 · 5f7af5b · 5f7af5b
1 parent 4f3b64d
commit 5f7af5b
Showing 1 changed file with 14 additions and 6 deletions.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
@@ -58,6 +58,7 @@
     StringTypeClass,
     BytesTypeClass,
     BooleanTypeClass,
+    SchemaFieldDataTypeClass,
 )
 
 from datahub.utilities import config_clean
@@ -530,7 +531,7 @@ def _schema_from_spark(
             version=0,
             fields=fields,
             hash="",
-            platformSchema={"name": "spark", "version": "1.0"}
+            platformSchema=OtherSchemaClass(rawSchema="")
         )
 
     def _schema_from_arrow(
@@ -559,9 +560,16 @@ def _schema_from_arrow(
             platformSchema=OtherSchemaClass(rawSchema="")
         )
 
-    def _get_field_type(self, data_type: Any) -> SchemaFieldClass.typeClass:
-        """Map storage data types to DataHub types"""
-        # Map Spark/Arrow types to DataHub types
+    def _get_field_type(self, data_type: Any) -> SchemaFieldDataTypeClass:
+        """
+        Map storage data types to DataHub types
+
+        Args:
+            data_type: The source data type (from Spark/Arrow/etc.)
+
+        Returns:
+            DataHub SchemaFieldDataTypeClass instance
+        """
         type_mapping = {
             "string": StringTypeClass,
             "binary": BytesTypeClass,
@@ -580,9 +588,9 @@ def _get_field_type(self, data_type: Any) -> SchemaFieldClass.typeClass:
         type_str = str(data_type).lower()
         for key, type_class in type_mapping.items():
             if key in type_str:
-                return type_class()
+                return SchemaFieldDataTypeClass(type=type_class())
 
-        return NullTypeClass()
+        return SchemaFieldDataTypeClass(type=NullTypeClass())
 
     def get_storage_dataset_mcp(
         self,