Skip to content

Commit

Permalink
make release-tag: Merge branch 'master' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
amontanez24 committed Aug 10, 2023
2 parents 502a77e + bfb726a commit fa37ca2
Show file tree
Hide file tree
Showing 57 changed files with 4,011 additions and 2,445 deletions.
12 changes: 1 addition & 11 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,14 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11']
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- if: matrix.os == 'windows-latest' && matrix.python-version != 3.10
name: Install dependencies - Windows
run: |
python -m pip install --upgrade pip
python -m pip install 'torch>=1.8.0,<2' -f https://download.pytorch.org/whl/cpu/torch/
- if: matrix.os == 'windows-latest' && matrix.python-version == 3.10
name: Install dependencies - Windows
run: |
python -m pip install --upgrade pip
python -m pip install 'torch>=1.11.0,<2' -f https://download.pytorch.org/whl/cpu/torch/
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set up Python 3.8
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.8
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
12 changes: 1 addition & 11 deletions .github/workflows/minimum.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,14 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11']
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- if: matrix.os == 'windows-latest' && matrix.python-version != '3.10'
name: Install dependencies - Windows
run: |
python -m pip install --upgrade pip
python -m pip install 'torch==1.8.0' -f https://download.pytorch.org/whl/cpu/torch/
- if: matrix.os == 'windows-latest' && matrix.python-version == '3.10'
name: Install dependencies - Windows
run: |
python -m pip install --upgrade pip
python -m pip install 'torch==1.11.0' -f https://download.pytorch.org/whl/cpu/torch/
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/readme.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11']
os: [ubuntu-latest, macos-latest] # skip windows bc rundoc fails
steps:
- uses: actions/checkout@v1
Expand Down
12 changes: 1 addition & 11 deletions .github/workflows/unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,14 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11']
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- if: matrix.os == 'windows-latest' && matrix.python-version != 3.10
name: Install dependencies - Windows
run: |
python -m pip install --upgrade pip
python -m pip install 'torch>=1.8.0,<2' -f https://download.pytorch.org/whl/cpu/torch/
- if: matrix.os == 'windows-latest' && matrix.python-version == 3.10
name: Install dependencies - Windows
run: |
python -m pip install --upgrade pip
python -m pip install 'torch>=1.11.0,<2' -f https://download.pytorch.org/whl/cpu/torch/
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
41 changes: 41 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,46 @@
# History

## v0.11.0 - 2023-08-10

This release adds a function that allows users to plot the cardinality of foreign and primary keys in synthetic data. More specifically, it graphs the frequency that each number of children per parent row occurs in the parent table.

Additionally, architectural changes are made to improve the efficiency and error handling of the `QualityReport`! The progress bar is also enhanced to be more informative when the report is generating.

This release also adds support for Python 3.11 and drops support for Python 3.7.

### New Features

* Visualize cardinality of foreign key columns - Issue [#283](https://github.com/sdv-dev/SDMetrics/issues/283) by @R-Palazzo
* Create single table BaseProperty class - Issue [#354](https://github.com/sdv-dev/SDMetrics/issues/354) by @amontanez24
* Create single table column shapes property - Issue [#355](https://github.com/sdv-dev/SDMetrics/issues/355) by @R-Palazzo
* Create single table column pair trends property - Issue [#356](https://github.com/sdv-dev/SDMetrics/issues/356) by @R-Palazzo
* Create multi table BaseProperty class - Issue [#357](https://github.com/sdv-dev/SDMetrics/issues/357) by @pvk-developer
* Create multi table column shapes and column pair trends properties - Issue [#358](https://github.com/sdv-dev/SDMetrics/issues/358) by @R-Palazzo
* Create Parent Child Relationships property class - Issue [#359](https://github.com/sdv-dev/SDMetrics/issues/359) by @pvk-developer
* In Multi Table Quality Report: Rename "Table Relationships" property to "Cardinality" - Issue [#360](https://github.com/sdv-dev/SDMetrics/issues/360) by @frances-h
* More accurate progress bar for single table Quality Report - Issue [#361](https://github.com/sdv-dev/SDMetrics/issues/361) by @R-Palazzo
* More accurate progress bar for multi table Quality Report - Issue [#362](https://github.com/sdv-dev/SDMetrics/issues/362) by @fealho
* Raise error in CorrelationSimilarity if either column is constant - Issue [#407](https://github.com/sdv-dev/SDMetrics/issues/407) by @fealho

### Bug Fixes

* Issue in building the denormalized table inside the Parent-Child Detection metrics - Issue [#328](https://github.com/sdv-dev/SDMetrics/issues/328) by @fealho
* Don't modify the rounding in the quality report - Issue [#401](https://github.com/sdv-dev/SDMetrics/issues/401) by @R-Palazzo
* The Cardinality property is missing some relationships - Issue [#404](https://github.com/sdv-dev/SDMetrics/issues/404) by @pvk-developer
* The Cardinality property is not returning a DataFrame - Issue [#405](https://github.com/sdv-dev/SDMetrics/issues/405) by @fealho
* Overall property score should be the average across all breakdowns - Issue [#415](https://github.com/sdv-dev/SDMetrics/issues/415) by @amontanez24

### Internal

* Use property classes in single table QualityReport - Issue [#370](https://github.com/sdv-dev/SDMetrics/issues/370) by @R-Palazzo
* Use property classes in multi table QualityReport - Issue [#371](https://github.com/sdv-dev/SDMetrics/issues/371) by @fealho
* Add add-on detection for premium metrics - Issue [#388](https://github.com/sdv-dev/SDMetrics/issues/388) by @amontanez24

### Maintenance

* Add support for Python 3.11 - Issue [#353](https://github.com/sdv-dev/SDMetrics/issues/353) by @amontanez24
* Drop support for Python 3.7 - Issue [#380](https://github.com/sdv-dev/SDMetrics/issues/380) by @amontanez24

## v0.10.1 - 2023-06-06

This release fixes a bug that was causing the `DiagnosticReport` to crash on the `NewRowSynthesis` metric. It also adds support for PyTorch 2.0!
Expand Down
2 changes: 1 addition & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Requirements

**SDMetrics** has been developed and tested on [Python 3.7, 3.8, 3.9, and 3.10](https://www.python.org/downloads/)
**SDMetrics** has been developed and tested on [Python 3.8, 3.9, 3.10 and 3.11](https://www.python.org/downloads/)

Also, although it is not strictly required, the usage of a [virtualenv](
https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid
Expand Down
2 changes: 1 addition & 1 deletion conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% set version = '0.10.1' %}
{% set version = '0.11.0.dev1' %}

package:
name: "{{ name|lower }}"
Expand Down
90 changes: 86 additions & 4 deletions sdmetrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@

__author__ = 'MIT Data To AI Lab'
__email__ = '[email protected]'
__version__ = '0.10.1'
__version__ = '0.11.0.dev1'

import sys
import warnings as python_warnings
from operator import attrgetter
from types import ModuleType

import pandas as pd
from pkg_resources import iter_entry_points

from sdmetrics import (
column_pairs, demos, goal, multi_table, single_column, single_table, timeseries)
from sdmetrics._addons import _find_addons
from sdmetrics.demos import load_demo

__all__ = [
Expand All @@ -24,8 +29,6 @@
'timeseries',
]

_find_addons(group='sdmetrics_modules', parent_globals=globals())


def compute_metrics(metrics, real_data, synthetic_data, metadata=None, **kwargs):
"""Compute a collection of metrics on the given data.
Expand Down Expand Up @@ -75,3 +78,82 @@ def compute_metrics(metrics, real_data, synthetic_data, metadata=None, **kwargs)
})

return pd.DataFrame(scores)


def _get_addon_target(addon_path_name):
"""Find the target object for the add-on.
Args:
addon_path_name (str):
The add-on's name. The add-on's name should be the full path of valid Python
identifiers (i.e. importable.module:object.attr).
Returns:
tuple:
* object:
The base module or object the add-on should be added to.
* str:
The name the add-on should be added to under the module or object.
"""
module_path, _, object_path = addon_path_name.partition(':')
module_path = module_path.split('.')

if module_path[0] != __name__:
msg = f"expected base module to be '{__name__}', found '{module_path[0]}'"
raise AttributeError(msg)

target_base = sys.modules[__name__]
for submodule in module_path[1:-1]:
target_base = getattr(target_base, submodule)

addon_name = module_path[-1]
if object_path:
if len(module_path) > 1 and not hasattr(target_base, module_path[-1]):
msg = f"cannot add '{object_path}' to unknown submodule '{'.'.join(module_path)}'"
raise AttributeError(msg)

if len(module_path) > 1:
target_base = getattr(target_base, module_path[-1])

split_object = object_path.split('.')
addon_name = split_object[-1]

if len(split_object) > 1:
target_base = attrgetter('.'.join(split_object[:-1]))(target_base)

return target_base, addon_name


def _find_addons():
"""Find and load all SDMetrics add-ons.
If the add-on is a module, we add it both to the target module and to
``system.modules`` so that they can be imported from the top of a file as follows:
from top_module.addon_module import x
"""
group = 'sdmetrics_modules'
for entry_point in iter_entry_points(group=group):
try:
addon = entry_point.load()
except Exception: # pylint: disable=broad-exception-caught
msg = f'Failed to load "{entry_point.name}" from "{entry_point.module_name}".'
python_warnings.warn(msg)
continue

try:
addon_target, addon_name = _get_addon_target(entry_point.name)
except AttributeError as error:
msg = f"Failed to set '{entry_point.name}': {error}."
python_warnings.warn(msg)
continue

if isinstance(addon, ModuleType):
addon_module_name = f'{addon_target.__name__}.{addon_name}'
if addon_module_name not in sys.modules:
sys.modules[addon_module_name] = addon

setattr(addon_target, addon_name, addon)


_find_addons()
26 changes: 0 additions & 26 deletions sdmetrics/_addons.py

This file was deleted.

39 changes: 19 additions & 20 deletions sdmetrics/column_pairs/statistical/correlation_similarity.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
"""Correlation Similarity Metric."""

import warnings

import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr

from sdmetrics.column_pairs.base import ColumnPairsMetric
from sdmetrics.errors import ConstantInputError
from sdmetrics.goal import Goal
from sdmetrics.utils import is_datetime
from sdmetrics.warnings import ConstantInputWarning


class CorrelationSimilarity(ColumnPairsMetric):
Expand All @@ -32,20 +31,30 @@ class CorrelationSimilarity(ColumnPairsMetric):
max_value = 1.0

@staticmethod
def _generate_warning_msg(columns, prefix, warning_messages):
def _raise_constant_data_error(columns, prefix):
if len(columns) > 1:
cols = ', '.join(columns)
warning_messages.append(
f"The {prefix} in columns '{cols}' contain a constant value. "
raise ConstantInputError(
f"The {prefix} in columns '{cols}' contains a constant value. "
'Correlation is undefined for constant data.'
)

elif len(columns):
warning_messages.append(
raise ConstantInputError(
f"The {prefix} in column '{columns[0]}' contains a constant value. "
'Correlation is undefined for constant data.'
)

@classmethod
def _validate_data_not_constant(cls, real_data, synthetic_data):
if (real_data.nunique() == 1).any():
real_columns = list(real_data.loc[:, real_data.nunique() == 1].columns)
cls._raise_constant_data_error(real_columns, 'real data')

if (synthetic_data.nunique() == 1).any():
synthetic_columns = list(synthetic_data.loc[:, synthetic_data.nunique() == 1].columns)
cls._raise_constant_data_error(synthetic_columns, 'synthetic data')

@classmethod
def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
"""Compare the breakdown of correlation similarity of two continuous columns.
Expand All @@ -67,21 +76,11 @@ def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
real_data = pd.DataFrame(real_data)
synthetic_data = pd.DataFrame(synthetic_data)

if (real_data.nunique() == 1).any() or (synthetic_data.nunique() == 1).any():
warning_messages = []
real_columns = list(real_data.loc[:, real_data.nunique() == 1].columns)
synthetic_columns = list(synthetic_data.loc[:, synthetic_data.nunique() == 1].columns)
cls._generate_warning_msg(real_columns, 'real data', warning_messages)
cls._generate_warning_msg(synthetic_columns, 'synthetic data', warning_messages)

for msg in warning_messages:
warnings.warn(ConstantInputWarning(msg))

return {'score': np.nan}
cls._validate_data_not_constant(real_data, synthetic_data)

real_data = real_data.dropna()
synthetic_data = synthetic_data.dropna()
column1, column2 = real_data.columns[:2]
real_data = real_data[[column1, column2]].dropna()
synthetic_data = synthetic_data[[column1, column2]].dropna()

if is_datetime(real_data[column1]):
real_data[column1] = pd.to_numeric(real_data[column1])
Expand Down Expand Up @@ -109,7 +108,7 @@ def compute_breakdown(cls, real_data, synthetic_data, coefficient='Pearson'):
return {
'score': 1 - abs(correlation_real - correlation_synthetic) / 2,
'real': correlation_real,
'synthetic': correlation_synthetic,
'synthetic': correlation_synthetic
}

@classmethod
Expand Down
4 changes: 4 additions & 0 deletions sdmetrics/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@

class IncomputableMetricError(Exception):
"""Raised when a metric cannot be computed."""


class ConstantInputError(Exception):
"""Thrown when the input data has all the same values."""
Loading

0 comments on commit fa37ca2

Please sign in to comment.