Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: indexer option to skip constraints creation #1691

Merged
merged 3 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions frictionless/formats/sql/__spec__/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,15 @@ def test_sql_mapper_write_field():
column2 = mapper.write_field(field2, table_name="table")
assert isinstance(column1.type, sa.Integer)
assert isinstance(column2.type, sa.Text)


def test_sql_mapper_write_field_ignore_constraints():
mapper = formats.sql.SqlMapper("sqlite")
schema = Schema.describe("data/table.csv")
field1, field2 = schema.fields
field1.constraints = {"required": True}
field2.constraints = {"required": True}
column1 = mapper.write_field(field1, table_name="table")
column2 = mapper.write_field(field2, table_name="table", ignore_constraints=True)
assert column1.nullable is False
assert column2.nullable is True
6 changes: 5 additions & 1 deletion frictionless/formats/sql/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def write_schema(
table_name: str,
force: bool = False,
with_metadata: bool = False,
ignore_constraints: bool = False,
) -> None:
with self.engine.begin() as conn:
if force:
Expand All @@ -130,7 +131,10 @@ def write_schema(
self.metadata.drop_all(conn, tables=[existing_table])
self.metadata.remove(existing_table)
table = self.mapper.write_schema(
schema, table_name=table_name, with_metadata=with_metadata
schema,
table_name=table_name,
with_metadata=with_metadata,
ignore_constraints=ignore_constraints,
)
table = table.to_metadata(self.metadata)
self.metadata.create_all(conn, tables=[table])
Expand Down
62 changes: 42 additions & 20 deletions frictionless/formats/sql/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,19 @@ def read_type(self, column_type: str) -> str:
# Write

def write_schema( # type: ignore
self, schema: Schema, *, table_name: str, with_metadata: bool = False
self,
schema: Schema,
*,
table_name: str,
with_metadata: bool = False,
ignore_constraints: bool = False,
) -> Table:
"""Convert frictionless schema to sqlalchemy table"""
sa = platform.sqlalchemy
columns: List[Column] = [] # type: ignore
constraints: List[Constraint] = []

# Fields
# Metadata
if with_metadata:
columns.append( # type: ignore
sa.Column(
Expand All @@ -171,16 +176,19 @@ def write_schema( # type: ignore
)
)
columns.append(sa.Column(settings.ROW_VALID_IDENTIFIER, sa.Boolean)) # type: ignore

# Fields
for field in schema.fields:
column = self.write_field(field, table_name=table_name) # type: ignore
column = self.write_field( # type: ignore
field, table_name=table_name, ignore_constraints=ignore_constraints
)
columns.append(column) # type: ignore

# Primary key
if schema.primary_key:
Class = sa.UniqueConstraint if with_metadata else sa.PrimaryKeyConstraint
if not with_metadata:
constraint = Class(*schema.primary_key)
constraints.append(constraint)
constraint = Class(*schema.primary_key)
constraints.append(constraint)

# Foreign keys
for fk in schema.foreign_keys:
Expand All @@ -192,11 +200,18 @@ def write_schema( # type: ignore
constraint = sa.ForeignKeyConstraint(fields, foreign_fields)
constraints.append(constraint)

# Table
table = sa.Table(table_name, sa.MetaData(), *(columns + constraints))
# Prepare table
table_args = [table_name, sa.MetaData(), *columns] # type: ignore
if not ignore_constraints:
table_args += constraints # type: ignore

# Create table
table = sa.Table(*table_args)
return table

def write_field(self, field: Field, *, table_name: str) -> Column: # type: ignore
def write_field( # type: ignore
self, field: Field, *, table_name: str, ignore_constraints: bool = False
) -> Column: # type: ignore
"""Convert frictionless Field to sqlalchemy Column"""
sa = platform.sqlalchemy
quote = self.dialect.identifier_preparer.quote # type: ignore
Expand All @@ -206,8 +221,17 @@ def write_field(self, field: Field, *, table_name: str) -> Column: # type: igno
# General properties
quoted_name = quote(field.name)
column_type = self.write_type(field.type) # type: ignore

# Required constraint
nullable = not field.required

# Unique constraint
unique = field.constraints.get("unique", False)
if self.dialect.name == "mysql":
# MySQL requires keys to have an explicit maximum length
# https://stackoverflow.com/questions/1827063/mysql-error-key-specification-without-a-key-length
unique = unique and column_type is not sa.Text

# Length constraints
if field.type == "string":
min_length = field.constraints.get("minLength", None)
Expand All @@ -227,13 +251,6 @@ def write_field(self, field: Field, *, table_name: str) -> Column: # type: igno
if not isinstance(column_type, sa.CHAR) or self.dialect.name == "sqlite":
checks.append(Check("LENGTH(%s) >= %s" % (quoted_name, min_length)))

# Unique constraint
unique = field.constraints.get("unique", False)
if self.dialect.name == "mysql":
# MySQL requires keys to have an explicit maximum length
# https://stackoverflow.com/questions/1827063/mysql-error-key-specification-without-a-key-length
unique = unique and column_type is not sa.Text

# Others constraints
for const, value in field.constraints.items():
if const == "minimum":
Expand All @@ -252,15 +269,20 @@ def write_field(self, field: Field, *, table_name: str) -> Column: # type: igno
enum_name = "%s_%s_enum" % (table_name, field.name)
column_type = sa.Enum(*value, name=enum_name)

# Create column
column_args = [field.name, column_type] + checks # type: ignore
# Prepare column
# TODO: shall it use "autoincrement=False"
# https://github.com/Mause/duckdb_engine/issues/595#issuecomment-1495408566
column_kwargs = {"nullable": nullable, "unique": unique}
column_args = [field.name, column_type] # type: ignore
column_kwargs = {}
if field.description:
column_kwargs["comment"] = field.description
column = sa.Column(*column_args, **column_kwargs)
if not ignore_constraints:
column_args += checks # type: ignore
column_kwargs["nullable"] = nullable
column_kwargs["unique"] = unique

# Create column
column = sa.Column(*column_args, **column_kwargs)
return column

def write_type(self, field_type: str) -> Type[TypeEngine]: # type: ignore
Expand Down
4 changes: 3 additions & 1 deletion frictionless/indexer/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Indexer:
qsv_path: Optional[str] = None
use_fallback: bool = False
with_metadata: bool = False
ignore_constraints: bool = False
on_row: Optional[types.IOnRow] = None
on_progress: Optional[types.IOnProgress] = None
adapter: SqlAdapter = attrs.field(init=False)
Expand Down Expand Up @@ -72,6 +73,7 @@ def create_table(self):
table_name=self.table_name,
force=True,
with_metadata=self.with_metadata,
ignore_constraints=self.ignore_constraints,
)

def populate_table(self) -> Optional[Report]:
Expand Down Expand Up @@ -117,7 +119,7 @@ def populate_table_fast_sqlite(self):

def populate_table_fast_postgresql(self):
database_url = self.adapter.engine.url.render_as_string(hide_password=False)
with platform.psycopg.connect(database_url) as connection:
with platform.psycopg.connect(database_url) as connection: # type: ignore
with connection.cursor() as cursor:
query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name
with cursor.copy(query) as copy: # type: ignore
Expand Down