Skip to content

Commit

Permalink
Merge pull request #39 from factly/fix/obj-description
Browse files Browse the repository at this point in the history
feat: Add insurance companies and district as validation
  • Loading branch information
100mi authored Aug 4, 2023
2 parents f924191 + 9736e49 commit 67c2643
Show file tree
Hide file tree
Showing 14 changed files with 1,816 additions and 110 deletions.
15 changes: 0 additions & 15 deletions app/api/api_v1/routers/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,6 @@ async def execute_dataset_expectation(request: Request):

@router.post(
"/expectation/datasets/",
# response_model=Dict[
# str,
# Dict[
# str,
# Union[
# List[GeneralTableExpectation],
# RegexPatternExpectation,
# RegexMatchList,
# ColumnValuesToBeInSet,
# DateStrftimePattern,
# ],
# ],
# ],
# response_model_exclude_none=True,
# response_model_exclude_unset=True,
summary="Execute all possible expectation to a dataset",
)
async def execute_dataset_expectation_post(
Expand Down
91 changes: 34 additions & 57 deletions app/api/api_v1/routers/s3_checks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import List, Union

from fastapi import APIRouter, Form, HTTPException, status
from fastapi import APIRouter, Body, HTTPException, status

from app.core.config import Settings
from app.models.s3_checks import s3FileKeyCheckRequest, s3FileKeysCheckRequest
from app.utils.s3_checks import (
check_file_metadata,
check_files_metadata,
Expand All @@ -17,43 +16,33 @@
"/files/key/",
)
async def check_if_file_exist_in_bucket(
file_key: str = Form(...),
s3_access_key: Union[str, None] = Form(
None,
description="S3 access key. If None then take the default one from env variables",
),
s3_secret_key: Union[str, None] = Form(
None,
description="S3 secret key. If None then take the default one from env variables",
),
s3_endpoint_url: Union[str, None] = Form(
None,
description="S3 endpoint url key. If None then take the default one from env variables",
),
resource: Union[str, None] = Form(
None,
description="S3 resource. If None then take the default one from env variables",
),
request: s3FileKeyCheckRequest = Body(
None, examples=s3FileKeyCheckRequest.Config.schema_extra["examples"]
)
):
"""
Check if file exist in bucket
"""
s3_access_key = (
settings.S3_SOURCE_ACCESS_KEY
if s3_access_key is None
else s3_access_key
if request.s3_access_key is None
else request.s3_access_key
)
s3_secret_key = (
settings.S3_SOURCE_SECRET_KEY
if s3_secret_key is None
else s3_secret_key
if request.s3_secret_key is None
else request.s3_secret_key
)
s3_endpoint_url = (
settings.S3_SOURCE_ENDPOINT_URL
if s3_endpoint_url is None
else s3_endpoint_url
if request.s3_endpoint_url is None
else request.s3_endpoint_url
)
resource = (
settings.S3_SOURCE_RESOURCE
if request.resource is None
else request.resource
)
resource = settings.S3_SOURCE_RESOURCE if resource is None else resource
try:
s3_resource = get_s3_resource(
s3_access_key=s3_access_key,
Expand All @@ -67,48 +56,40 @@ async def check_if_file_exist_in_bucket(
detail=f"Error connecting to S3: {e}",
)
else:
file_metadata = await check_file_metadata(s3_resource, file_key)
file_metadata = await check_file_metadata(
s3_resource, request.file_key
)
return file_metadata


@router.post(
"/files",
)
async def check_if_files_exist_in_bucket(
file_keys: List[str] = Form(...),
s3_access_key: Union[str, None] = Form(
None,
description="S3 access key. If None then take the default one from env variables",
),
s3_secret_key: Union[str, None] = Form(
None,
description="S3 secret key. If None then take the default one from env variables",
),
s3_endpoint_url: Union[str, None] = Form(
None,
description="S3 endpoint url . If None then take the default one from env variables",
),
resource: Union[str, None] = Form(
None,
description="S3 resource. If None then take the default one from env variables",
),
request: s3FileKeysCheckRequest = Body(
None, examples=s3FileKeysCheckRequest.Config.schema_extra["examples"]
)
):
s3_access_key = (
settings.S3_SOURCE_ACCESS_KEY
if s3_access_key is None
else s3_access_key
if request.s3_access_key is None
else request.s3_access_key
)
s3_secret_key = (
settings.S3_SOURCE_SECRET_KEY
if s3_secret_key is None
else s3_secret_key
if request.s3_secret_key is None
else request.s3_secret_key
)
s3_endpoint_url = (
settings.S3_SOURCE_ENDPOINT_URL
if s3_endpoint_url is None
else s3_endpoint_url
if request.s3_endpoint_url is None
else request.s3_endpoint_url
)
resource = (
settings.S3_SOURCE_RESOURCE
if request.resource is None
else request.resource
)
resource = settings.S3_SOURCE_RESOURCE if resource is None else resource
try:
s3_resource = get_s3_resource(
s3_access_key=s3_access_key,
Expand All @@ -123,12 +104,8 @@ async def check_if_files_exist_in_bucket(
)
else:
# TODO : Check how Form is combining all the strings inside list
if len(file_keys) == 1:
file_keys = [key for key in file_keys[0].split(",")]
file_keys_set = set(file_keys)

files_metadata = await check_files_metadata(
session=s3_resource, file_keys=file_keys_set
session=s3_resource, file_keys=request.file_keys
)

return files_metadata
62 changes: 55 additions & 7 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class GeographySettings(BaseSettings):

COUNTRY_KEYWORD = "country"
STATE_KEYWORD = "state"
CITY_KEYWORD = "city"
DISTRICT_KEYWORD = "district"
COUNTRY_EXPECTATION = {
"data_asset_type": None,
"expectation_suite_name": "country_expectation_suite",
Expand All @@ -198,7 +198,7 @@ class GeographySettings(BaseSettings):
}
STATE_EXPECTATION = {
"data_asset_type": None,
"expectation_suite_name": "date_expectation_suite",
"expectation_suite_name": "state_expectation_suite",
"expectations": [
{
"expectation_type": "expect_column_values_to_be_in_set",
Expand All @@ -215,21 +215,21 @@ class GeographySettings(BaseSettings):
}
],
}
CITY_EXPECTATION = {
DISTRICT_EXPECTATION = {
"data_asset_type": None,
"expectation_suite_name": "city_expectation_suite",
"expectation_suite_name": "district_expectation_suite",
"expectations": [
{
"expectation_type": "expect_column_values_to_be_in_set",
"kwargs": {
"column": "city",
"column": "district",
"value_set": [],
"result_format": "SUMMARY",
},
"meta": {
"expectation_name": "City Name",
"expectation_name": "District Name",
"cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg",
"expectation_error_message": "City Name should be from the Data Dictionary",
"expectation_error_message": "District Name should be from the Data Dictionary",
},
}
],
Expand Down Expand Up @@ -553,3 +553,51 @@ class TagsSettings(BaseSettings):
}
],
}


class InsuranceCompanySettings(BaseSettings):

INSURANCE_COMPANY_NAME_KEYWORD: str = "insurance_company"
INSURANCE_COMPANY_NAME_EXPECTATION = {
"data_asset_type": None,
"expectation_suite_name": "insurance_company_name_expectation_suite",
"expectations": [
{
"expectation_type": "expect_column_values_to_be_in_set",
"kwargs": {
"column": "insurance_company",
"value_set": [],
"result_format": "SUMMARY",
},
"meta": {
"expectation_name": "Insurance Company Name",
"cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg",
"expectation_error_message": "Insurance Company Name should be from the Data Dictionary",
},
}
],
}


class PsuCompanySettings(BaseSettings):

PSU_COMPANY_NAME_KEYWORD: str = "psu_companies"
PSU_COMPANY_NAME_EXPECTATION = {
"data_asset_type": None,
"expectation_suite_name": "psu_company_name_expectation_suite",
"expectations": [
{
"expectation_type": "expect_column_values_to_be_in_set",
"kwargs": {
"column": "psu_companies",
"value_set": [],
"result_format": "SUMMARY",
},
"meta": {
"expectation_name": "PSU Company Name",
"cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg",
"expectation_error_message": "PSU Company Name should be from the Data Dictionary",
},
}
],
}
Loading

0 comments on commit 67c2643

Please sign in to comment.