Skip to content

Commit

Permalink
fix(synthesizer): conform with recent changes to the metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
portellaa committed Sep 7, 2023
1 parent 5bf1d00 commit 890a418
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ indent_size = 2
indent_style = tab

[*.py]
indent_size = 2
indent_size = 4
indent_style = space
62 changes: 26 additions & 36 deletions src/ydata/sdk/synthesizers/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,47 +179,43 @@ def _validate_datasource_attributes(X: Union[DataSource, pdDataFrame], dataset_a
"The dataset attributes are invalid:\n {}".format('\n'.join(error_msgs)))

@staticmethod
def _metadata_to_payload(datatype: DataSourceType, ds_metadata: Metadata, dataset_attrs: Optional[DataSourceAttrs] = None) -> list:
def _metadata_to_payload(
datatype: DataSourceType, ds_metadata: Metadata,
dataset_attrs: Optional[DataSourceAttrs] = None, target: str | None = None
) -> dict:
"""Transform a the metadata and dataset attributes into a valid
payload.
Arguments:
datatype (DataSourceType): datasource type
ds_metadata (Metadata): datasource metadata object
dataset_attrs ( Optional[DataSourceAttrs] ): (optional) Dataset attributes
target (Optional[str]): (optional) target column name
Returns:
payload dictionary
metadata payload dictionary
"""
columns = {}
for c in ds_metadata.columns:
columns[c.name] = {

columns = [
{
'name': c.name,
'generation': True,
'dataType': c.datatype if c.datatype != DataType.STR.value else DataType.CATEGORICAL.value,
'generation': c.name in dataset_attrs.sortbykey or (c.name in dataset_attrs.generate_cols and c.name not in dataset_attrs.exclude_cols),
'dataType': DataType(dataset_attrs.dtypes[c.name]).value if c.name in dataset_attrs.dtypes else c.datatype,
'varType': c.vartype,
'entity': False,
}
if dataset_attrs is not None:
if datatype == DataSourceType.TIMESERIES:
for c in ds_metadata.columns:
columns[c.name]['sortBy'] = c.name in dataset_attrs.sortbykey

for c in dataset_attrs.entities:
columns[c]['entity'] = True

for c in dataset_attrs.generate_cols:
columns[c]['generation'] = True
for c in ds_metadata.columns ]

for c in dataset_attrs.exclude_cols:
columns[c]['generation'] = False
metadata = {
'columns': columns,
'target': target
}

# Update metadata based on the datatypes and vartypes provided by the user
for k, v in dataset_attrs.dtypes.items():
if k in columns and columns[k]['generation']:
columns[k]['dataType'] = v.value
if dataset_attrs is not None:
if datatype == DataSourceType.TIMESERIES:
metadata['sortBy'] = [c for c in dataset_attrs.sortbykey]
metadata['entity'] = [c for c in dataset_attrs.entities]

return list(columns.values())
return metadata

def _fit_from_datasource(
self,
Expand All @@ -232,25 +228,19 @@ def _fit_from_datasource(
condition_on: Optional[List[str]] = None
) -> None:
_name = name if name is not None else str(uuid4())
columns = self._metadata_to_payload(
DataSourceType(X.datatype), X.metadata, dataset_attrs)
metadata = self._metadata_to_payload(
DataSourceType(X.datatype), X.metadata, dataset_attrs, target)
payload = {
'name': _name,
'dataSourceUID': X.uid,
'metadata': {
'dataType': X.datatype,
"columns": columns,
},
'extraData': {
'privacy_level': privacy_level.value
}
'metadata': metadata,
'extraData': {},
'privacyLevel': privacy_level.value
}
if anonymize is not None:
payload["extraData"]["anonymize"] = anonymize
if condition_on is not None:
payload["extraData"]["condition_on"] = condition_on
if target is not None:
payload['metadata']['target'] = target

response = self._client.post('/synthesizer/', json=payload)
data: list = response.json()
Expand Down

0 comments on commit 890a418

Please sign in to comment.