Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dedup rule validation #39

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion pdgstaging/ConfigManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1433,6 +1433,19 @@ def list_updates(self):

return updates

@staticmethod
def validate_dedup_rules(gdf, config):
gdf_cols = gdf.columns
print("cols: " + str(gdf_cols))
allowed_comp_operators = ['smaller', 'larger']
for rule in config:
if rule[0] not in gdf_cols:
raise ValueError(
f'invalid deduplication rule: property {rule[0]} does not exist in dataset')
if rule[1] not in allowed_comp_operators:
raise ValueError(
f'invalid deduplication rule: second parameter must be one of {allowed_comp_operators}')

@staticmethod
def validate_palette(palette):
if isinstance(palette, list):
Expand Down Expand Up @@ -1471,4 +1484,6 @@ def color_list_from_cmaps(cmap_name):
pal_len = 10 if cmap.N > 10 else cmap.N
rgb_vals = (cmap.discrete(pal_len).colors * 255).astype(int).tolist()
rgb_hex = [f'#{i:02x}{j:02x}{k:02x}' for i, j, k in rgb_vals]
return rgb_hex
return rgb_hex


17 changes: 17 additions & 0 deletions tests/test_deduplicator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import geopandas as gpd
import shapely
from pdgstaging.ConfigManager import ConfigManager


def test_validate_dedup_rules():
tests = {
"valid_rule1": {
"data": gpd.GeoDataFrame({
"col1": ["a", "b"],
"geometry": [shapely.Point(1, 1), shapely.Point(1,2)]
}),
"rules": [["col1", "smaller"]]
}
}
for test in tests.values():
ConfigManager.validate_dedup_rules(test["data"], test["rules"])