From 93df312fc2a67f08b0285c64e4ee4d16ad506969 Mon Sep 17 00:00:00 2001 From: 100mi Date: Mon, 31 Jul 2023 18:14:20 +0530 Subject: [PATCH 1/2] feat: Add insurance companies and district as validation --- app/api/api_v1/routers/dataset.py | 15 - app/api/api_v1/routers/s3_checks.py | 91 ++-- app/core/config.py | 38 +- app/core/diseases.csv | 183 +++++++ app/core/district.csv | 738 ++++++++++++++++++++++++++++ app/core/insurance_companies.csv | 150 ++++++ app/models/s3_checks.py | 55 ++- app/utils/column_mapping.py | 20 +- app/utils/dataset.py | 2 + app/utils/geography.py | 5 +- app/utils/insurance.py | 66 +++ app/utils/s3_checks.py | 7 +- 12 files changed, 1284 insertions(+), 86 deletions(-) create mode 100644 app/core/diseases.csv create mode 100644 app/core/district.csv create mode 100644 app/core/insurance_companies.csv create mode 100644 app/utils/insurance.py diff --git a/app/api/api_v1/routers/dataset.py b/app/api/api_v1/routers/dataset.py index 97453c5..486eea3 100644 --- a/app/api/api_v1/routers/dataset.py +++ b/app/api/api_v1/routers/dataset.py @@ -51,21 +51,6 @@ async def execute_dataset_expectation(request: Request): @router.post( "/expectation/datasets/", - # response_model=Dict[ - # str, - # Dict[ - # str, - # Union[ - # List[GeneralTableExpectation], - # RegexPatternExpectation, - # RegexMatchList, - # ColumnValuesToBeInSet, - # DateStrftimePattern, - # ], - # ], - # ], - # response_model_exclude_none=True, - # response_model_exclude_unset=True, summary="Execute all possible expectation to a dataset", ) async def execute_dataset_expectation_post( diff --git a/app/api/api_v1/routers/s3_checks.py b/app/api/api_v1/routers/s3_checks.py index ea86db3..82780bd 100644 --- a/app/api/api_v1/routers/s3_checks.py +++ b/app/api/api_v1/routers/s3_checks.py @@ -1,8 +1,7 @@ -from typing import List, Union - -from fastapi import APIRouter, Form, HTTPException, status +from fastapi import APIRouter, Body, HTTPException, status from app.core.config import Settings +from app.models.s3_checks import s3FileKeyCheckRequest, s3FileKeysCheckRequest from app.utils.s3_checks import ( check_file_metadata, check_files_metadata, @@ -17,43 +16,33 @@ "/files/key/", ) async def check_if_file_exist_in_bucket( - file_key: str = Form(...), - s3_access_key: Union[str, None] = Form( - None, - description="S3 access key. If None then take the default one from env variables", - ), - s3_secret_key: Union[str, None] = Form( - None, - description="S3 secret key. If None then take the default one from env variables", - ), - s3_endpoint_url: Union[str, None] = Form( - None, - description="S3 endpoint url key. If None then take the default one from env variables", - ), - resource: Union[str, None] = Form( - None, - description="S3 resource. If None then take the default one from env variables", - ), + request: s3FileKeyCheckRequest = Body( + None, examples=s3FileKeyCheckRequest.Config.schema_extra["examples"] + ) ): """ Check if file exist in bucket """ s3_access_key = ( settings.S3_SOURCE_ACCESS_KEY - if s3_access_key is None - else s3_access_key + if request.s3_access_key is None + else request.s3_access_key ) s3_secret_key = ( settings.S3_SOURCE_SECRET_KEY - if s3_secret_key is None - else s3_secret_key + if request.s3_secret_key is None + else request.s3_secret_key ) s3_endpoint_url = ( settings.S3_SOURCE_ENDPOINT_URL - if s3_endpoint_url is None - else s3_endpoint_url + if request.s3_endpoint_url is None + else request.s3_endpoint_url + ) + resource = ( + settings.S3_SOURCE_RESOURCE + if request.resource is None + else request.resource ) - resource = settings.S3_SOURCE_RESOURCE if resource is None else resource try: s3_resource = get_s3_resource( s3_access_key=s3_access_key, @@ -67,7 +56,9 @@ async def check_if_file_exist_in_bucket( detail=f"Error connecting to S3: {e}", ) else: - file_metadata = await check_file_metadata(s3_resource, file_key) + file_metadata = await check_file_metadata( + s3_resource, request.file_key + ) return file_metadata @@ -75,40 +66,30 @@ async def check_if_file_exist_in_bucket( "/files", ) async def check_if_files_exist_in_bucket( - file_keys: List[str] = Form(...), - s3_access_key: Union[str, None] = Form( - None, - description="S3 access key. If None then take the default one from env variables", - ), - s3_secret_key: Union[str, None] = Form( - None, - description="S3 secret key. If None then take the default one from env variables", - ), - s3_endpoint_url: Union[str, None] = Form( - None, - description="S3 endpoint url . If None then take the default one from env variables", - ), - resource: Union[str, None] = Form( - None, - description="S3 resource. If None then take the default one from env variables", - ), + request: s3FileKeysCheckRequest = Body( + None, examples=s3FileKeysCheckRequest.Config.schema_extra["examples"] + ) ): s3_access_key = ( settings.S3_SOURCE_ACCESS_KEY - if s3_access_key is None - else s3_access_key + if request.s3_access_key is None + else request.s3_access_key ) s3_secret_key = ( settings.S3_SOURCE_SECRET_KEY - if s3_secret_key is None - else s3_secret_key + if request.s3_secret_key is None + else request.s3_secret_key ) s3_endpoint_url = ( settings.S3_SOURCE_ENDPOINT_URL - if s3_endpoint_url is None - else s3_endpoint_url + if request.s3_endpoint_url is None + else request.s3_endpoint_url + ) + resource = ( + settings.S3_SOURCE_RESOURCE + if request.resource is None + else request.resource ) - resource = settings.S3_SOURCE_RESOURCE if resource is None else resource try: s3_resource = get_s3_resource( s3_access_key=s3_access_key, @@ -123,12 +104,8 @@ async def check_if_files_exist_in_bucket( ) else: # TODO : Check how Form is combining all the strings inside list - if len(file_keys) == 1: - file_keys = [key for key in file_keys[0].split(",")] - file_keys_set = set(file_keys) - files_metadata = await check_files_metadata( - session=s3_resource, file_keys=file_keys_set + session=s3_resource, file_keys=request.file_keys ) return files_metadata diff --git a/app/core/config.py b/app/core/config.py index 653ec0b..628b789 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -176,7 +176,7 @@ class GeographySettings(BaseSettings): COUNTRY_KEYWORD = "country" STATE_KEYWORD = "state" - CITY_KEYWORD = "city" + DISTRICT_KEYWORD = "district" COUNTRY_EXPECTATION = { "data_asset_type": None, "expectation_suite_name": "country_expectation_suite", @@ -198,7 +198,7 @@ class GeographySettings(BaseSettings): } STATE_EXPECTATION = { "data_asset_type": None, - "expectation_suite_name": "date_expectation_suite", + "expectation_suite_name": "state_expectation_suite", "expectations": [ { "expectation_type": "expect_column_values_to_be_in_set", @@ -215,21 +215,21 @@ class GeographySettings(BaseSettings): } ], } - CITY_EXPECTATION = { + DISTRICT_EXPECTATION = { "data_asset_type": None, - "expectation_suite_name": "city_expectation_suite", + "expectation_suite_name": "district_expectation_suite", "expectations": [ { "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { - "column": "city", + "column": "district", "value_set": [], "result_format": "SUMMARY", }, "meta": { - "expectation_name": "City Name", + "expectation_name": "District Name", "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg", - "expectation_error_message": "City Name should be from the Data Dictionary", + "expectation_error_message": "District Name should be from the Data Dictionary", }, } ], @@ -553,3 +553,27 @@ class TagsSettings(BaseSettings): } ], } + + +class InsuranceCompanySettings(BaseSettings): + + INSURANCE_COMPANY_NAME_KEYWORD: str = "insurance_company" + INSURANCE_COMPANY_NAME_EXPECTATION = { + "data_asset_type": None, + "expectation_suite_name": "insurance_company_name_expectation_suite", + "expectations": [ + { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "insurance_company", + "value_set": [], + "result_format": "SUMMARY", + }, + "meta": { + "expectation_name": "Insurance Company Name", + "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg", + "expectation_error_message": "Insurance Company Name should be from the Data Dictionary", + }, + } + ], + } diff --git a/app/core/diseases.csv b/app/core/diseases.csv new file mode 100644 index 0000000..81fcb01 --- /dev/null +++ b/app/core/diseases.csv @@ -0,0 +1,183 @@ +diseases +Acute Diarrhoeal Disease +Acute Diarrhoeal Disease (Rota virus) +Acute Diarrhoeal Disease and Respiratory Tract Infection/Viral Infection +Acute Encephalitis Syndrome +Acute Encephalitis Syndrome (Japanese Encephalitis) +Acute Febrile Illness +Acute Febrile Illness (Enteric Fever) +Acute Febrile Illness/Malaria +Acute Febrile Illness/Viral Fever +Acute Flaccid Paralysis +Acute Flaccid Paralysis (Epstein Bar Virus & Human Herpes Virus) +Acute Meningitis +Acute Meningoencephalitis (Naegleria fowleri) +Acute Respiratory Illness +Acute Respiratory Infection +Acute Respiratory Influenza +Acute Viral Conjunctivitis +Acute Viral Fever +Adenovirus +Alcohol Poisoning +Allergic Conjunctivitis +Amoebic Dysentery +Anthrax +Avian Influenza +Bacillary Dysentery +Benzene Hexachloride (BHC) Poisoning +Brucellosis +Chandipura Virus (CHPV) +Chemical Gas Poisoning +Chickenpox +Chickenpox/Measles +Chickenpox/Mumps +Chikungunya +Chikungunya/Leptospirosis +Cholera +Cholera (Classical) +COVID-19 +Crimean-Congo Haemorrhagic Fever (CCHF) +Cutaneous Anthrax +Cutaneous Leishmaniasis +Dengue +Dengue/Chikungunya +Dengue/Japanese Encephalitis +Dengue/Leptospirosis +Dengue/Scrub Typhus +Diarrhoea +Diphtheria +Dog Bite +Drug side effects +DVD +Dysentery +Encephalitis +Enteric Fever +Enteric Fever (Typhiod) +Epidemic Dropsy +Fever +Fever (Mixed Infection) +Fever and Headache +Fever and Upper Respiratory Tract Infection (URTI) +Fever Leptospirosis +Fever of Unknown Origin (PUO) +Fever with Altered Sensorium +Fever with Arthralgia +Fever with Body Pains +Fever with Cough +Fever with Joint Pain +Fever with Rash +Fever with Rash (Enterovirus) +Fever/Chikungunya +Fever/Dengue +Filariasis +Food Poisoning +Food Poisoning (Acute Diarrhoeal Disease) +Food poisoning (Adverse effect following Vitamin A administration) +Food Poisoning (Bamangadh) +Food Poisoning (Caster Fruit) +Food Poisoning (Castor Seed) +Food Poisoning (EIEC and Shigella) +Food Poisoning (Jatropha Seeds) +Food Poisoning (Mushroom) +Food Poisoning (NoroVirus) +Food Poisoning (Salmonellosis) +Food Poisoning (Seed) +Food Poisoning (Staphylococcus) +Food poisoning (Trichinosis) +Gas Poisoning +Gastroenteritis +H1N1 +H3N2 +Hand Foot and Mouth Disease +Hepatitis +Hepatitis A/ Leptospirosis +Hepatitis-A +Hepatitis-A and B/ Leptospirosis +Hepatitis-A and E +Hepatitis-B +Hepatitis-B and C +Hepatitis-C +Hepatitis-E +Hepatitis/ Jaundice +Herpes Simplex Encephalitis +Human Rabies +Indigenous Malaria +Influenza like illness +Influenza-A +Influenza-B +Japanese Encephalitis +Jaundice +Kala Azar +Kyasanur Forest Disease +Leishmaniasis +Leptospirosis +Leptospirosis/Scrub Typhus +Leptospirosis/Typhoid +Lyme Disease +Malaria +Malaria (Plasmodium Falciparum) +Malaria (Plasmodium Vivax) +Malaria/Dengue +Malaria/Leptospirosis +Measles +Measles / Chickenpox +Measles/Rubella +Melioidosis +Meningitis +Meningococcal Meningitis +Methanol Poisoning +Mixed Fever +Monkeypox +Mumps +Mushroom Poisoning +Neonatal Tetanus +Nipah Viral Disease +Nipah Viral Encephalitis +Norovirus +Paederus Dermatitis From Rove Beetle +Pertussis +Poisoning +Primary Amoebic Meningoencephalitis (PAM) +Pyrexia of Unknown Origin (PUO) +Rabies +Rickettsia Scrub Typhus +Rubella +Salmonella Gastroenteritis +Salmonellosis +Scabies +Scrub Typhus +Scrub Typhus and Chikungunya +Scrub Typhus/Dengue/Chikungunya +Scrub Typhus/Dengue/Leptospirosis +Seasonal +Seasonal Influenza +Shigellosis +Staphylococcal Food Poisoning +Sub-cutaneous Worm +Swine Flu (H1N1) +Trichinella +Trichinellosis +Trypanosomiasis +Typhoid +Typhoid (Enteric Fever) +Typhoid and Chikungunya +Typhoid Fever +Typhoid (Dengue and Scrub Typhus) +Uneasiness (Vomiting and Diarrhoea) +Upper Respiratory Tract Infection (URTI) +Viral Encephalitis +Viral Exanthems (Multiple Papulo Pustular Lesions) +Viral Fever +Viral Fever (Chikungunya) +Viral Fever (Dengue) +Viral Fever (Dengue/Chikungunya) +Viral Fever (Leptospirosis / Chikungunya) +Viral Fever (Leptospirosis) +Viral Infection +Visceral Leishmaniasis +Vitamin A Overdosage +Vomiting/Diarrhoea +West Nile +West Nile Fever +West Nile/Dengue +Zika Virus Disease \ No newline at end of file diff --git a/app/core/district.csv b/app/core/district.csv new file mode 100644 index 0000000..a2ce8ef --- /dev/null +++ b/app/core/district.csv @@ -0,0 +1,738 @@ +districts +Nicobars +North and Middle Andaman +South Andamans +Anantapur +Chittoor +East Godavari +Guntur +Krishna +Kurnool +Prakasam +SPSR Nellore +Srikakulam +Visakhapatanam +Vizianagaram +West Godavari +Y.S.R. +Anjaw +Changlang +Dibang Valley +East Kameng +East Siang +Kamle +Kra Daadi +Kurung Kumey +Leparada +Lohit +Longding +Lower Dibang Valley +Lower Siang +Lower Subansiri +Namsai +Pakke Kessang +Papum Pare +Shi Yomi +Siang +Tawang +Tirap +Upper Siang +Upper Subansiri +West Kameng +West Siang +Baksa +Barpeta +Biswanath +Bongaigaon +Cachar +Charaideo +Chirang +Darrang +Dhemaji +Dhubri +Dibrugarh +Dima Hasao +Goalpara +Golaghat +Hailakandi +Hojai +Jorhat +Kamrup +Kamrup Metro +Karbi Anglong +Karimganj +Kokrajhar +Lakhimpur +Majuli +Marigaon +Nagaon +Nalbari +Sivasagar +Sonitpur +South Salmara Mancachar +Tinsukia +Udalguri +West Karbi Anglong +Araria +Arwal +Aurangabad +Banka +Begusarai +Bhagalpur +Bhojpur +Buxar +Darbhanga +Gaya +Gopalganj +Jamui +Jehanabad +Kaimur (Bhabua) +Katihar +Khagaria +Kishanganj +Lakhisarai +Madhepura +Madhubani +Munger +Muzaffarpur +Nalanda +Nawada +Pashchim Champaran +Patna +Purbi Champaran +Purnia +Rohtas +Saharsa +Samastipur +Saran +Sheikhpura +Sheohar +Sitamarhi +Siwan +Supaul +Vaishali +Chandigarh +Balod +Baloda Bazar +Balrampur +Bastar +Bemetara +Bijapur +Bilaspur +Dantewada +Dhamtari +Durg +Gariyaband +Gaurella Pendra Marwahi +Janjgir-Champa +Jashpur +Kabirdham +Kanker +Kondagaon +Korba +Korea +Mahasamund +Mungeli +Narayanpur +Raigarh +Raipur +Rajnandgaon +Sukma +Surajpur +Surguja +Central +East +New Delhi +North +North East +North West +Shahdara +South +South East +South West +West +North Goa +South Goa +Ahmadabad +Amreli +Anand +Arvalli +Banas Kantha +Bharuch +Bhavnagar +Botad +Chhotaudepur +Dang +Devbhumi Dwarka +Dohad +Gandhinagar +Gir Somnath +Jamnagar +Junagadh +Kachchh +Kheda +Mahesana +Mahisagar +Morbi +Narmada +Navsari +Panch Mahals +Patan +Porbandar +Rajkot +Sabar Kantha +Surat +Surendranagar +Tapi +Vadodara +Valsad +Ambala +Bhiwani +Charki Dadri +Faridabad +Fatehabad +Gurugram +Hisar +Jhajjar +Jind +Kaithal +Karnal +Kurukshetra +Mahendragarh +Nuh +Palwal +Panchkula +Panipat +Rewari +Rohtak +Sirsa +Sonipat +Yamunanagar +Bilaspur +Chamba +Hamirpur +Kangra +Kinnaur +Kullu +Lahul and Spiti +Mandi +Shimla +Sirmaur +Solan +Una +Anantnag +Bandipora +Baramulla +Budgam +Doda +Ganderbal +Jammu +Kathua +Kishtwar +Kulgam +Kupwara +Poonch +Pulwama +Rajouri +Ramban +Reasi +Samba +Shopian +Srinagar +Udhampur +Bokaro +Chatra +Deoghar +Dhanbad +Dumka +East Singhbum +Garhwa +Giridih +Godda +Gumla +Hazaribagh +Jamtara +Khunti +Koderma +Latehar +Lohardaga +Pakur +Palamu +Ramgarh +Ranchi +Sahebganj +Saraikela Kharsawan +Simdega +West Singhbhum +Bagalkote +Ballari +Belagavi +Bengaluru Rural +Bengaluru Urban +Bidar +Chamarajanagara +Chikkaballapura +Chikkamagaluru +Chitradurga +Dakshina Kannada +Davangere +Dharwad +Gadag +Hassan +Haveri +Kalaburagi +Kodagu +Kolar +Koppal +Mandya +Mysuru +Raichur +Ramanagara +Shivamogga +Tumakuru +Udupi +Uttara Kannada +Vijayapura +Yadgir +Alappuzha +Ernakulam +Idukki +Kannur +Kasaragod +Kollam +Kottayam +Kozhikode +Malappuram +Palakkad +Pathanamthitta +Thiruvananthapuram +Thrissur +Wayanad +Kargil +Leh Ladakh +Lakshadweep District +Agar Malwa +Alirajpur +Anuppur +Ashoknagar +Balaghat +Barwani +Betul +Bhind +Bhopal +Burhanpur +Chhatarpur +Chhindwara +Damoh +Datia +Dewas +Dhar +Dindori +East Nimar +Guna +Gwalior +Harda +Hoshangabad +Indore +Jabalpur +Jhabua +Katni +Khargone +Mandla +Mandsaur +Morena +Narsinghpur +Neemuch +Niwari +Panna +Raisen +Rajgarh +Ratlam +Rewa +Sagar +Satna +Sehore +Seoni +Shahdol +Shajapur +Sheopur +Shivpuri +Sidhi +Singrauli +Tikamgarh +Ujjain +Umaria +Vidisha +Ahmednagar +Akola +Amravati +Aurangabad +Beed +Bhandara +Buldhana +Chandrapur +Dhule +Gadchiroli +Gondia +Hingoli +Jalgaon +Jalna +Kolhapur +Latur +Mumbai +Mumbai Suburban +Nagpur +Nanded +Nandurbar +Nashik +Osmanabad +Palghar +Parbhani +Pune +Raigad +Ratnagiri +Sangli +Satara +Sindhudurg +Solapur +Thane +Wardha +Washim +Yavatmal +Bishnupur +Chandel +Churachandpur +Imphal East +Imphal West +Jiribam +Kakching +Kamjong +Kangpokpi +Noney +Pherzawl +Senapati +Tamenglong +Tengnoupal +Thoubal +Ukhrul +East Garo Hills +East Jaintia Hills +East Khasi Hills +North Garo Hills +Ri Bhoi +South Garo Hills +South West Garo Hills +South West Khasi Hills +West Garo Hills +West Jaintia Hills +West Khasi Hills +Aizawl +Champhai +Hnahthial +Khawzawl +Kolasib +Lawngtlai +Lunglei +Mamit +Saiha +Saitual +Serchhip +Dimapur +Kiphire +Kohima +Longleng +Mokokchung +Mon +Noklak +Peren +Phek +Tuensang +Wokha +Zunheboto +Anugul +Balangir +Baleshwar +Bargarh +Bhadrak +Boudh +Cuttack +Deogarh +Dhenkanal +Gajapati +Ganjam +Jagatsinghapur +Jajapur +Jharsuguda +Kalahandi +Kandhamal +Kendrapara +Kendujhar +Khordha +Koraput +Malkangiri +Mayurbhanj +Nabarangpur +Nayagarh +Nuapada +Puri +Rayagada +Sambalpur +Sonepur +Sundargarh +Karaikal +Mahe +Pondicherry +Yanam +Amritsar +Barnala +Bathinda +Faridkot +Fatehgarh Sahib +Fazilka +Ferozepur +Gurdaspur +Hoshiarpur +Jalandhar +Kapurthala +Ludhiana +Malerkotla +Mansa +Moga +Pathankot +Patiala +Rupnagar +Sangrur +S.A.S Nagar +Shahid Bhagat Singh Nagar +Sri Muktsar Sahib +Tarn Taran +Ajmer +Alwar +Banswara +Baran +Barmer +Bharatpur +Bhilwara +Bikaner +Bundi +Chittorgarh +Churu +Dausa +Dholpur +Dungarpur +Ganganagar +Hanumangarh +Jaipur +Jaisalmer +Jalore +Jhalawar +Jhunjhunu +Jodhpur +Karauli +Kota +Nagaur +Pali +Pratapgarh +Rajsamand +Sawai Madhopur +Sikar +Sirohi +Tonk +Udaipur +East District +North District +South District +West District +Ariyalur +Chengalpattu +Chennai +Coimbatore +Cuddalore +Dharmapuri +Dindigul +Erode +Kallakurichi +Kanchipuram +Kanniyakumari +Karur +Krishnagiri +Madurai +Mayiladuthurai +Nagapattinam +Namakkal +Perambalur +Pudukkottai +Ramanathapuram +Ranipet +Salem +Sivaganga +Tenkasi +Thanjavur +Theni +The Nilgiris +Thiruvallur +Thiruvarur +Tiruchirappalli +Tirunelveli +Tirupathur +Tiruppur +Tiruvannamalai +Tuticorin +Vellore +Villupuram +Virudhunagar +Adilabad +Bhadradri Kothagudem +Hanumakonda +Hyderabad +Jagitial +Jangoan +Jayashankar Bhupalapally +Jogulamba Gadwal +Kamareddy +Karimnagar +Khammam +Kumuram Bheem Asifabad +Mahabubabad +Mahabubnagar +Mancherial +Medak +Medchal Malkajgiri +Mulugu +Nagarkurnool +Nalgonda +Narayanpet +Nirmal +Nizamabad +Peddapalli +Rajanna Sircilla +Ranga Reddy +Sangareddy +Siddipet +Suryapet +Vikarabad +Wanaparthy +Warangal +Yadadri Bhuvanagiri +Dadra and Nagar Haveli +Daman +Diu +Dhalai +Gomati +Khowai +North Tripura +Sepahijala +South Tripura +Unakoti +West Tripura +Almora +Bageshwar +Chamoli +Champawat +Dehradun +Haridwar +Nainital +Pauri Garhwal +Pithoragarh +Rudra Prayag +Tehri Garhwal +Udam Singh Nagar +Uttar Kashi +Agra +Aligarh +Ambedkar Nagar +Amethi +Amroha +Auraiya +Ayodhya +Azamgarh +Baghpat +Bahraich +Ballia +Balrampur +Banda +Barabanki +Bareilly +Basti +Bhadohi +Bijnor +Budaun +Bulandshahr +Chandauli +Chitrakoot +Deoria +Etah +Etawah +Farrukhabad +Fatehpur +Firozabad +Gautam Buddha Nagar +Ghaziabad +Ghazipur +Gonda +Gorakhpur +Hamirpur +Hapur +Hardoi +Hathras +Jalaun +Jaunpur +Jhansi +Kannauj +Kanpur Dehat +Kanpur Nagar +Kasganj +Kaushambi +Kheri +Kushi Nagar +Lalitpur +Lucknow +Maharajganj +Mahoba +Mainpuri +Mathura +Mau +Meerut +Mirzapur +Moradabad +Muzaffarnagar +Pilibhit +Pratapgarh +Prayagraj +Rae Bareli +Rampur +Saharanpur +Sambhal +Sant Kabeer Nagar +Shahjahanpur +Shamli +Shravasti +Siddharth Nagar +Sitapur +Sonbhadra +Sultanpur +Unnao +Varanasi +24 Paraganas North +24 Paraganas South +Alipurduar +Bankura +Birbhum +Coochbehar +Darjeeling +Dinajpur Dakshin +Dinajpur Uttar +Hooghly +Howrah +Jalpaiguri +Jhargram +Kalimpong +Kolkata +Maldah +Medinipur East +Medinipur West +Murshidabad +Nadia +Paschim Bardhaman +Purba Bardhaman +Purulia \ No newline at end of file diff --git a/app/core/insurance_companies.csv b/app/core/insurance_companies.csv new file mode 100644 index 0000000..3823490 --- /dev/null +++ b/app/core/insurance_companies.csv @@ -0,0 +1,150 @@ +insurance_comapnaies +Acko General Insurance Limited +Acko General Insurance Limited +Aditya Birla Health Insurance Company Limited +Aditya Birla Health Insurance Company Limited +Aditya Birla Sun Life +Aegas Federal Life +Aegas Federal Life +Aegon Life +Aegon Religare Life +Aegon Religare Life +Agricultural Insurance Company of India Limited +Agricultural Insurance Company of India Limited +Apollo Munich Health Insurance Company Limited +Apollo Munich Health Insurance Company Limited +Aviva Life +Aviva Life +Bajaj Allianz Life +Bajaj Allianz General Insurance Company Limited +Bajaj Allianz General Insurance Company Limited +Bajaj Allianz Life +Bharti AXA General Insurance Company Limited +Bharti AXA General Insurance Company Limited +Bharti AXA General Insurance Company Limited +Bharti AXA Life +Birla Sun Life +Birla Sun Life +Canara HSBC Life +Canara HSBC OBC Life +Care Health Insurance Limited +Cholamandalam +Cholamandalam MS General Insurance Company Limited +Cigna TTK Health Insurance Company Limited +Cigna TTK Health Insurance Company Limited +DHFL General Insurance Limited +DHFL General Insurance Limited +DHFL Pramerica Life +DHFL Pramerica Life +DHFL Pramerica Life +DHFL Pramerica Life +ECGC Limited +ECGC Limited +Edelweiss +Edelweiss +Edelweiss General Insurance Company Limited +Edelweiss General Insurance Company Limited +Edelweiss Tokio Life +Edelweiss Tokio Life +Edelweiss Tokio Life +Exide Life +Export Credit Guaranteed Corporation of India Limited +Export Credit Guaranteed Corporation of India Limited +Future Generali India Insurance Company Limited +Future Generali India Insurance Company Limited +Future Generali Life +Go Digit General Insurance Limited +Go Digit General Insurance Limited +HDFC Ergo General insurance Company Limited +HDFC Ergo General insurance Company Limited +HDFC Ergo Health Insurance Company Limited +HDFC Ergo Health Insurance Company Limited +HDFC Life +HDFC Standard Life +HDFC Standard Life +ICICI Lombard General Insurance Company Limited +ICICI Lombard General Insurance Company Limited +ICICI Prudential Life +ICICI Prudential Life +IDBI Federal Life +IDBI Federal Life +IDBI Fortis Life +IFFCO Tokio General Insurance Company Limited +IFFCO Tokio General Insurance Company Limited +IndiaFirst Life +IndiaFirst Life +IndiaFirst Life +IndiaFirst Life +IndiaFirst Life +ING Vysya +Kotak Mahindra General Insurance Company Limited +Kotak Mahindra General Insurance Company Limited +Kotak Mahindra Life +Kotak Mahindra Old Mutual Life +Kotak Mahindra Old Mutual Life +L&T General +Liberty +Liberty General Insurance Limited +Liberty Videocon General Insurance Company Limited +LIC of India +LIC of India +LIC of India +Magma HDI General Insurance Company Limited +Magma HDI General Insurance Company Limited +Manipal Cigna Health Insurance Company Limited +Max Bupa Health Insurance Company Limited +Max Bupa Health Insurance Company Limited +Max Life +Max Life +Max New York +Met Life +National +National Insurance Company Limited +NAVI General Insurance Limited +New India +Niva Bupa Health Insurance Company Limited +Oriental +PNB Met Life +Pramerica Life +Raheja QBE General Insurance Company Limited +Raheja QBE General Insurance Company Limited +Reliance General Insurance Company Limited +Reliance General Insurance Company Limited +Reliance General Insurance Company Limited +Reliance Health Insurance Limited +Reliance Health Insurance Limited +Reliance Life +Reliance Nippon Life +Religare Health Insurance Company Limited +Religare Health Insurance Company Limited +Royal Sundaram +Royal Sundaram Alliance Insurance Company Limited +Royal Sundaram General Insurance Company Limited +Sahara Life +SBI General Insurance Company Limited +SBI General Insurance Company Limited +SBI Life +Shriram General Insurance Company Limited +Shriram General Insurance Company Limited +Shriram Life +Specialized Insurers +Star Health & Allied Insurance Company Limited +Star Health & Allied Insurance Company Limited +Star Union Dai-ichi Life +Star Union Dai-ichi Life +Star Union Dai-ichi Life +Star Union Dai-ichi Life +Star Union Dai-ichi Life +Star Union Dai-ichi Life +Star Union Dai-ichi Life +Tata AIA Life +Tata AIA Life +Tata AIG +Tata AIG +Tata AIG General Insurance Company Limited +The New India Assurance Company Limited +The Oriental Insurance Company Limited +United India Insurance Company Limited +United India Insurance Company Limited +Universal Sompo General Insurance Company Limited +Universal Sompo General Insurance Company Limited \ No newline at end of file diff --git a/app/models/s3_checks.py b/app/models/s3_checks.py index 03043c2..8da54e4 100644 --- a/app/models/s3_checks.py +++ b/app/models/s3_checks.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Union from pydantic import BaseModel @@ -16,3 +16,56 @@ class ObjectDetail(BaseModel): class s3FileCheckResponse(BaseModel): exists: List[ObjectDetail] non_exists: List[str] + + +class s3FileKeyCheckRequest(BaseModel): + file_key: str + s3_access_key: Union[str, None] = None + s3_secret_key: Union[str, None] = None + s3_endpoint_url: Union[str, None] = None + resource: Union[str, None] = None + + class Config: + schema_extra = { + "examples": { + "file": { + "summary": "s3://roapitest/processed/wheat/2015/output.csv", + "description": "Provide file key to check if it exists in S3", + "value": { + "file_key": "file", + "s3_access_key": None, + "s3_secret_key": None, + "s3_endpoint_url": None, + "resource": None, + }, + }, + } + } + + +class s3FileKeysCheckRequest(BaseModel): + file_keys: List[str] + s3_access_key: Union[str, None] = None + s3_secret_key: Union[str, None] = None + s3_endpoint_url: Union[str, None] = None + resource: Union[str, None] = None + + class Config: + schema_extra = { + "examples": { + "files": { + "summary": "Files", + "description": "Provide file keys to check if it exists in S3", + "value": { + "file_keys": [ + "s3://roapitest/processed/wheat/2015/output.csv", + "s3://roapitest/processed/wheat/2016/output.csv", + ], + "s3_access_key": None, + "s3_secret_key": None, + "s3_endpoint_url": None, + "resource": None, + }, + }, + } + } diff --git a/app/utils/column_mapping.py b/app/utils/column_mapping.py index 78cb1e6..76e2084 100644 --- a/app/utils/column_mapping.py +++ b/app/utils/column_mapping.py @@ -6,6 +6,7 @@ AirlineSettings, DateTimeSettings, GeographySettings, + InsuranceCompanySettings, MetadataSettings, NoteSettings, TagsSettings, @@ -17,6 +18,7 @@ unit_settings = UnitSettings() note_settings = NoteSettings() airline_settings = AirlineSettings() +insurance_company_settings = InsuranceCompanySettings() metadata_settings = MetadataSettings() tags_settings = TagsSettings() @@ -90,7 +92,7 @@ async def find_geography_columns(columns: set): r".*({})".format(geography_settings.STATE_KEYWORD) ) city_pattern = re.compile( - r".*({})".format(geography_settings.CITY_KEYWORD) + r".*({})".format(geography_settings.DISTRICT_KEYWORD) ) country_column, columns = extract_pattern_from_columns( @@ -118,6 +120,18 @@ async def find_airline_name_columns(columns: set): return {"airline_name": airline_name} +async def find_insurance_company_columns(columns: set): + insurance_name_pattern = re.compile( + r".*({})".format( + insurance_company_settings.INSURANCE_COMPANY_NAME_KEYWORD + ) + ) + airline_name, _ = extract_pattern_from_columns( + columns, insurance_name_pattern + ) + return {"insurance_name": airline_name} + + async def find_unit_columns(columns: set): unit_pattern = re.compile(r"({})".format(unit_settings.UNIT_KEYWORD)) unit_column, _ = extract_pattern_from_columns(columns, unit_pattern) @@ -269,6 +283,9 @@ async def find_mapped_columns(columns): unit_columns = await find_unit_columns(columns) note_columns = await find_note_columns(columns) airline_name_columns = await find_airline_name_columns(columns) + insurance_company_name_columns = await find_insurance_company_columns( + columns + ) metadata_columns = await find_metadata_columns(columns) mapped_columns = { **datetime_columns, @@ -277,6 +294,7 @@ async def find_mapped_columns(columns): **note_columns, **airline_name_columns, **metadata_columns, + **insurance_company_name_columns, } not_mapped_columns = list( set(columns).difference( diff --git a/app/utils/dataset.py b/app/utils/dataset.py index 5239b3d..ad03135 100644 --- a/app/utils/dataset.py +++ b/app/utils/dataset.py @@ -11,6 +11,7 @@ from app.utils.datetime import datetime_expectation_suite from app.utils.general import general_table_expectation_suite from app.utils.geography import geography_expectation_suite +from app.utils.insurance import insurance_company_name_expectation_suite from app.utils.note import note_expectation_suite from app.utils.unit import unit_expectation_suite @@ -29,6 +30,7 @@ async def dataset_expectation( datetime_expectation_suite(dataset, result_type), geography_expectation_suite(dataset, result_type), airline_name_expectation_suite(dataset, result_type), + insurance_company_name_expectation_suite(dataset, result_type), note_expectation_suite(dataset, result_type), unit_expectation_suite(dataset, result_type), general_table_expectation_suite(dataset, result_type), diff --git a/app/utils/geography.py b/app/utils/geography.py index 852ecba..cceeacf 100644 --- a/app/utils/geography.py +++ b/app/utils/geography.py @@ -19,8 +19,8 @@ async def modify_city_expectation_suite(column_name: str, result_format: str): default_expectation_suite = geograhy_setting.STATE_EXPECTATION - city_dataset = await read_pandas_dataset(APP_DIR / "core" / "city.csv") - city_list = city_dataset["city"].tolist() + city_dataset = await read_pandas_dataset(APP_DIR / "core" / "district.csv") + city_list = city_dataset["districts"].tolist() changed_config = { "expect_column_values_to_be_in_set": { @@ -38,6 +38,7 @@ async def modify_city_expectation_suite(column_name: str, result_format: str): async def city_expectation_suite(dataset, result_format): results = {} geography_columns = await find_geography_columns(set(dataset.columns)) + for each_column in geography_columns["city"]: expectation_suite = await modify_city_expectation_suite( each_column, result_format diff --git a/app/utils/insurance.py b/app/utils/insurance.py new file mode 100644 index 0000000..ff480f7 --- /dev/null +++ b/app/utils/insurance.py @@ -0,0 +1,66 @@ +import great_expectations as ge +from fastapi.encoders import jsonable_encoder + +from app.core.config import APP_DIR, InsuranceCompanySettings, Settings +from app.utils.column_mapping import find_insurance_company_columns +from app.utils.common import modify_values_to_be_in_set, read_pandas_dataset + +settings = Settings() +insurance_company_settings = InsuranceCompanySettings() + + +async def modify_insurance_company_name_expectation_suite( + column_name: str, result_format: str +): + default_expectation_suite = ( + insurance_company_settings.INSURANCE_COMPANY_NAME_EXPECTATION + ) + + insurance_company_names_dataset = await read_pandas_dataset( + APP_DIR / "core" / "insurance_companies.csv" + ) + insurance_company_names_list = insurance_company_names_dataset[ + "insurance_comapnaies" + ].tolist() + + changed_config = { + "expect_column_values_to_be_in_set": { + "value_set": insurance_company_names_list, + "column": column_name, + "result_format": result_format, + } + } + changed_expectation_suite = await modify_values_to_be_in_set( + changed_config, default_expectation_suite + ) + return changed_expectation_suite + + +async def insurance_company_name_expectation_suite(dataset, result_format): + results = {} + insurance_company_name_columns = await find_insurance_company_columns( + set(dataset.columns) + ) + for each_column in insurance_company_name_columns["insurance_name"]: + expectation_suite = ( + await modify_insurance_company_name_expectation_suite( + each_column, result_format + ) + ) + # convert pandas dataset to great_expectations dataset + ge_pandas_dataset = ge.from_pandas( + dataset, expectation_suite=expectation_suite + ) + validation = ge_pandas_dataset.validate() + validation_ui_name = ( + validation["results"][0]["expectation_config"]["meta"][ + "expectation_name" + ] + + " - " + + validation["results"][0]["expectation_config"]["_kwargs"][ + "column" + ] + ) + results[validation_ui_name] = validation + + return jsonable_encoder(results) diff --git a/app/utils/s3_checks.py b/app/utils/s3_checks.py index 9ddd370..64c1005 100644 --- a/app/utils/s3_checks.py +++ b/app/utils/s3_checks.py @@ -1,5 +1,5 @@ import asyncio -from typing import Set +from typing import List from urllib.parse import urlparse import boto3 @@ -33,6 +33,7 @@ async def check_file_metadata(session, file_key: str): file_parts = urlparse(file_key) bucket, obj_key = file_parts.netloc, file_parts.path.lstrip("/") obj = session.ObjectSummary(bucket, obj_key) + logger.info(f"Checking for key: {file_key}") # get the metadata try: @@ -44,8 +45,8 @@ async def check_file_metadata(session, file_key: str): return jsonable_encoder(metadata) -async def check_files_metadata(session, file_keys: Set[str]): +async def check_files_metadata(session, file_keys: List[str]): files_metadata = await asyncio.gather( *[check_file_metadata(session, file_key) for file_key in file_keys] ) - return jsonable_encoder(files_metadata) + return files_metadata From 9736e495b970b0d74dd7be86e03db3c0256e2aa1 Mon Sep 17 00:00:00 2001 From: 100mi Date: Fri, 4 Aug 2023 16:05:15 +0530 Subject: [PATCH 2/2] feat: Add PSU names for validations --- app/core/config.py | 24 ++ app/core/insurance_companies.csv | 63 +--- app/core/psu.csv | 490 +++++++++++++++++++++++++++++++ app/utils/column_mapping.py | 25 +- app/utils/dataset.py | 18 +- app/utils/insurance.py | 2 +- app/utils/psu.py | 64 ++++ 7 files changed, 597 insertions(+), 89 deletions(-) create mode 100644 app/core/psu.csv create mode 100644 app/utils/psu.py diff --git a/app/core/config.py b/app/core/config.py index 628b789..9e02b28 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -577,3 +577,27 @@ class InsuranceCompanySettings(BaseSettings): } ], } + + +class PsuCompanySettings(BaseSettings): + + PSU_COMPANY_NAME_KEYWORD: str = "psu_companies" + PSU_COMPANY_NAME_EXPECTATION = { + "data_asset_type": None, + "expectation_suite_name": "psu_company_name_expectation_suite", + "expectations": [ + { + "expectation_type": "expect_column_values_to_be_in_set", + "kwargs": { + "column": "psu_companies", + "value_set": [], + "result_format": "SUMMARY", + }, + "meta": { + "expectation_name": "PSU Company Name", + "cleaning_pdf_link": "https://wp.me/ad1WQ9-dvg", + "expectation_error_message": "PSU Company Name should be from the Data Dictionary", + }, + } + ], + } diff --git a/app/core/insurance_companies.csv b/app/core/insurance_companies.csv index 3823490..00535d1 100644 --- a/app/core/insurance_companies.csv +++ b/app/core/insurance_companies.csv @@ -1,100 +1,57 @@ -insurance_comapnaies +insurance_companies Acko General Insurance Limited -Acko General Insurance Limited -Aditya Birla Health Insurance Company Limited Aditya Birla Health Insurance Company Limited Aditya Birla Sun Life Aegas Federal Life -Aegas Federal Life Aegon Life Aegon Religare Life -Aegon Religare Life Agricultural Insurance Company of India Limited -Agricultural Insurance Company of India Limited -Apollo Munich Health Insurance Company Limited Apollo Munich Health Insurance Company Limited Aviva Life -Aviva Life Bajaj Allianz Life Bajaj Allianz General Insurance Company Limited -Bajaj Allianz General Insurance Company Limited -Bajaj Allianz Life -Bharti AXA General Insurance Company Limited -Bharti AXA General Insurance Company Limited Bharti AXA General Insurance Company Limited Bharti AXA Life Birla Sun Life -Birla Sun Life Canara HSBC Life Canara HSBC OBC Life Care Health Insurance Limited Cholamandalam Cholamandalam MS General Insurance Company Limited Cigna TTK Health Insurance Company Limited -Cigna TTK Health Insurance Company Limited -DHFL General Insurance Limited DHFL General Insurance Limited DHFL Pramerica Life -DHFL Pramerica Life -DHFL Pramerica Life -DHFL Pramerica Life ECGC Limited -ECGC Limited -Edelweiss Edelweiss Edelweiss General Insurance Company Limited -Edelweiss General Insurance Company Limited -Edelweiss Tokio Life -Edelweiss Tokio Life Edelweiss Tokio Life Exide Life Export Credit Guaranteed Corporation of India Limited -Export Credit Guaranteed Corporation of India Limited -Future Generali India Insurance Company Limited Future Generali India Insurance Company Limited Future Generali Life Go Digit General Insurance Limited -Go Digit General Insurance Limited HDFC Ergo General insurance Company Limited -HDFC Ergo General insurance Company Limited -HDFC Ergo Health Insurance Company Limited HDFC Ergo Health Insurance Company Limited HDFC Life HDFC Standard Life -HDFC Standard Life -ICICI Lombard General Insurance Company Limited ICICI Lombard General Insurance Company Limited ICICI Prudential Life -ICICI Prudential Life -IDBI Federal Life IDBI Federal Life IDBI Fortis Life IFFCO Tokio General Insurance Company Limited -IFFCO Tokio General Insurance Company Limited -IndiaFirst Life -IndiaFirst Life -IndiaFirst Life -IndiaFirst Life IndiaFirst Life ING Vysya Kotak Mahindra General Insurance Company Limited -Kotak Mahindra General Insurance Company Limited Kotak Mahindra Life Kotak Mahindra Old Mutual Life -Kotak Mahindra Old Mutual Life L&T General Liberty Liberty General Insurance Limited Liberty Videocon General Insurance Company Limited LIC of India -LIC of India -LIC of India -Magma HDI General Insurance Company Limited Magma HDI General Insurance Company Limited Manipal Cigna Health Insurance Company Limited Max Bupa Health Insurance Company Limited -Max Bupa Health Insurance Company Limited -Max Life Max Life Max New York Met Life @@ -107,44 +64,26 @@ Oriental PNB Met Life Pramerica Life Raheja QBE General Insurance Company Limited -Raheja QBE General Insurance Company Limited -Reliance General Insurance Company Limited Reliance General Insurance Company Limited -Reliance General Insurance Company Limited -Reliance Health Insurance Limited Reliance Health Insurance Limited Reliance Life Reliance Nippon Life Religare Health Insurance Company Limited -Religare Health Insurance Company Limited Royal Sundaram Royal Sundaram Alliance Insurance Company Limited Royal Sundaram General Insurance Company Limited Sahara Life SBI General Insurance Company Limited -SBI General Insurance Company Limited SBI Life Shriram General Insurance Company Limited -Shriram General Insurance Company Limited Shriram Life Specialized Insurers Star Health & Allied Insurance Company Limited -Star Health & Allied Insurance Company Limited -Star Union Dai-ichi Life -Star Union Dai-ichi Life -Star Union Dai-ichi Life -Star Union Dai-ichi Life -Star Union Dai-ichi Life -Star Union Dai-ichi Life Star Union Dai-ichi Life Tata AIA Life -Tata AIA Life -Tata AIG Tata AIG Tata AIG General Insurance Company Limited The New India Assurance Company Limited The Oriental Insurance Company Limited United India Insurance Company Limited -United India Insurance Company Limited -Universal Sompo General Insurance Company Limited Universal Sompo General Insurance Company Limited \ No newline at end of file diff --git a/app/core/psu.csv b/app/core/psu.csv new file mode 100644 index 0000000..0d80cb3 --- /dev/null +++ b/app/core/psu.csv @@ -0,0 +1,490 @@ +psu_companies +AAI Cargo Logistics and Allied Services Company Limited +Agrinnovate India Limited +Air India Airport Services Limited +Air India Engineering Services Limited +Air India Air Transport Services Limited +Air India Assets Holding Limited +Air India Charters Limited +Air India Engineering Services Limited +Air India Express Limited +Air India Limited +Airline Allied Services Limited +Airports Authority of India +Ajmer Phagi Transco Limited +Alliance Air Aviation Limited +Ananthpuram Kurnool Transmission Limited +Andaman and Nicobar Islands Forest and Plant Development Corporation Limited +Andrew Yule and Company Limited +Antrix Corporation Limited +Anushakti Vidhyut Nigam Limited +Apollo Design Apparel Parks Limited +Artificial Limbs Manufacturing Corporation of India +Assam Ashok Hotel Corporation Limited +Aurangabad Textiles and Apparel Parks Limited +Baira Siul Sarna Transmission Limited +Ballabgarh - Gn Transmission Company Limited +Balmer Lawrie and Company Limited +Balmer Lawrie Investments Limited +BBJ Construction Company Limited +BEL Optronics Devices Limited +BEL-Thales Systems Limited +BEML Limited +Bengal Chemicals and Pharmaceuticals Limited +Bhadlasikar Transmission Limited +Bharat Bhari Udyog Nigam Limited +Bharat Broadband Network Limited +Bharat Coking Coal Limited +Bharat Dynamics Limited +Bharat Electronics Limited +Bharat Gas Resources Limited +Bharat Heavy Electricals Limited +Bharat Heavy Plate and Vessels Limited +Bharat Immunologicals and Biologicals Corporation Limited +Bharat Oman Refineries Limited +Bharat Petro Resources JPDA +Bharat Petro Resources Limited +Bharat Petroleum Corporation Limited +Bharat Pumps and Compressors Limited +Bharat Sanchar Nigam Limited +Bharat Wagon and Engineering Company Limited +Bharatiya Nabhikiya Vidyut Nigam Limited +Bhartiya Rail Bijlee Company Limited +BHEL Electrical Machines Limited +Bhind Guna Transmission Limited +Bidar Transmission Limited +Biecco Lawrie Limited +Biecco Lawrie Limited +Bihar Drugs And Organic Chemicals Limited +Bihar Infrapower Limited +Bihar Mega Power Limited +Bijawar-vidarbha Transmission Limited +Bikaner-ii Bhiwadi Transco Limited +Biotechnology Industry Research Assistance Council +Birds Jute and Exports Limited +Bisra Stone Lime Company Limited +BPCL-KIAL Fuel Farm Private Limited +BPCL-KIAL Fuel Farm Private Limited +Brahamputra Crackers and Polymer Limited +Brahmaputra Valley Fertilizer Corporation Limited +Braithwaite and Company Limited +Braithwaite Burn and Jessop Construction Company Limited +Bridge and Roof Company India Limited +British India Corporation Limited +Broadcast Engineering Consultants India Limited +BSNL Tower Corporation +Bundelkhand Saur Urja Limited +Burn Standard Company Limited +Cement Corporation of India Limited +Central Coalfields Limited +Central Cottage Industries Corporation of India Limited +Central Electronics Limited +Central Inland Water Transport Corporation Limited +Central Mine Planning and Design Institute Limited +Central Railside Warehouse Company Limited +Central Railside Warehouse Company Limited +Central Registry of Securitsation Asset Reconstruction and Security Interest of India +Central Transmission Utility of India Limited +Central Warehousing Corporation +Certification Engineers International Limited +Chandigarh International Airport Limited +Chandil Transmission Limited +Chennai Petroleum Corporation Limited +Cheyyur Infra Limited +Chhattisgarh Western Railways Transmission Limited +Chhattisgarh Copper Limited +Chhattisgarh Eastern Railways Limited +Chhattisgarh East-west Railways Limited +Chhattisgarh Mega Steel Company Limited +Chhattishgarh Surguja Power Limited +Coal India Limited +Coastal Karnataka Power Limited +Coastal Maharashtra Mega Power Limited +Coastal Tamil Nadu Power Limited +Cochin Shipyard Limited +Concor Air Limited +Concor Last Mile Logistics Limited +Container Corporation of India Limited +Cotton Corporation of India Limited +Creda HPCL Biofuel Limited +Darbhanga Motihari Transmission Company Limited +Dedicated Fright Corridor Corporation of India Limited +Delhi Police Housing Corporation Limited +Deoghar Airport Limited +Deoghar Infra Limited +Deoghar Mega Power Limited +Dgen Transmission Company Limited +Dhalbhumgarh Airport Limited +Dholera International Airport +Dingchang Transmission Limited +Donyi Polo Ashok Hotel Limited +Dredging Corporation of India Limited +Dumka Transmission Limited +Export Credit Guarantee Corporation of India Limited +Eastern Coalfields Limited +Eastern Investment Limited +EdCIL India Limited +Electronics Corporation of India Limited +Engineering Projects India Limited +Engineers India Limited +Ennore Port Limited +EPI Urban Infra Developers Limited +ERSS XXI Transmission Limited +Export Credit Guarantee Corporation of India Limited +Fatehgarh Bhadla Transmission Limited +Fatehgarh Bhadla Transmission Limited +FCI Aravali Gypsum and Minerals India Limited +Ferro Scrap Nigam Limited +Fertilizer Corporation of India Limited +Fertilizers and Chemicals Travancore Limited +Food Corporation of India +Fresh and Healthy Enterprises Limited +Gadag Transmission Limited +GAIL India Limited +GAIL Gas Limited +Garden Reach Shipbuilders and Engineers Limited +Ghatampur Transmission Limited +Ghogarpalli Integrated Power Company Limited +Goa Antibiotics and Pharmaceuticals Limited +Goa Shipyard Limited +Goa Tamnar Transmission Project Limited +Goldmohur Design and Apparel Parks Limited +Grid Conductor Limited +Handicrafts and Handloom Exports Corporation of India Limited +Heavy Engineering Corporation Limited +Hemisphere Properties India Limited +High Speed Rail Corporation of India Limited +High Speed Rail Corporation of India Limited +HIL India Limited +Hindustan Aeronautics Limited +Hindustan Antibiotics Limited +Hindustan Cables Limited +Hindustan Copper Limited +Hindustan Fertilizer Corporation Limited +Hindustan Fluorocarbons Limited +Hindustan Insecticides Limited +Hindustan Newsprint Limited +Hindustan Organic Chemicals Limited +Hindustan Paper Corporation Limited +Hindustan Petroleum Corporation Limited +Hindustan Photo Films Manufacturing Company Limited +Hindustan Prefab Limited +Hindustan Salts Limited +Hindustan Shipyard Limited +Hindustan Steelworks Construction Limited +Hindustan Vegetable Oils Corporation Limited +Hll Biotech Limited +Hll Infra Tech Services Limited +Hll Lifecare Limited +Hll Medipark Limited +Hll Mother and Child Care Hospitals Limited +HMT Bearings Limited +HMT Bearings Limited +HMT Chinar Watches Limited +HMT Limited +HMT Machine Tools Limited +HMT Watches Limited +HMT International Limited +Hooghly Dock and Port Engineers Limited +Hooghly Printing Company Limited +Hooghly Cochin Shipyard Limited +Hotel Corporation of India Limited +Housing and Urban Development Corporation Limited +HPCL Biofuels Limited +HPCL Rajasthan Refinery Limited +HPCL Shapoorji Energy Private Limited +HSCC India Limited +HSRC Infra Services Limited +ITI Limited +IDPL Tamil Nadu Limited +IIFCL Asset Management Company Limited +IIFCL Projects Limited +India Infrastructure Finance Company Limited +India International Convention And Exhibition Centre Limited +India Post Payment Bank Limited +India Tourism Development Corporation Limited +India Trade Promotion Organisation +India United Textile Mills Limited +Indian Catalyst Private Limited +Indian Drugs and Pharmaceuticals Limited +Indian Medicines and Pharmaceutical Corporation Limited +Indian Oil Corporation Limited +Indian Oil Creda Biofuels Limited +Indian Ports Global Limited +Indian Railway Catering And Tourism Corporation Limited +Indian Railway Finance Corporation Limited +Indian Railway Stations Development Corporation Limited +Indian Rare Earths Limited +Indian Renewable Energy Development Agency Limited +Indian Vaccine Corporation Limited +Indo CAT Private Limited +Indo CAT Private Limited +Indo Russian Helicopters Limited +Inland and Coastal Shipping Limited +Instrumentation Limited +IRCON Davanagere Haveri Highway Limited +IRCON Infrastructure and Services Limited +IRCON International Limited +IRCON PB Tollway Limited +IRCON Shivpuri Guna Tollway Limited +IRCON Vadodara Kim Expressway Limited +IREL India Limited +IREL IDCOL Limited +Irrigation and Water Resources Finance Corporation Limited +ITPO Services Limited +Jammu and Kashmir Mineral Development Corporation Limited +Jagdishpur Paper Mills Limited +Jalpower Corporation Limited +Jam Khambaliya Transco Limited +Jammu and Kashmir Development Finance Corporation Limited +Jharkhand Central Railway Limited +Jharkhand Infrapower Limited +Jharkhand Kolhan Steel Limited +Jharkhand National Mineral Development Corporation Limited +Jute Corporation of India Limited +Kallam Transmission Limited +Kamarajar Port Limited +Kanti Bijlee Utpadan Nigam Limited +Karnataka Antibiotics and Pharmaceuticals Limited +Karnataka Trade Promotion Organisation +Karnataka Vijay Nagar Steel Limited +Karur Transmission Limited +Khargone Transmission Limited +Khetri Transco Limited +Khetri Narela Transmission Limited +KIOCL Limited +Koderma Transmission Limited +Kohima Mariani Transmission Limited +Kolkata Metro Rail Corporation Limited +Konkan LNG Limited +Konkan Railway Corporation Limited +Koppal-narendra Transmission Limited +Kumarakruppa Frontier Hotels Private Limited +Kumarakruppa Frontier Hotels Private Limited +Lakadia Banaskantha Transco Limited +Lancoteesta Hydro Power Limited +Loktak Downstream Hydroelectric Corporation Limited +Ludhiana International Airport Limited +MMTC Limited +MSTC Limited +Madhya Pradesh Ashok Hotel Corporation Limited +Madras Fertilizers Limited +Mahanadi Basin Power Limited +Mahanadi Coal Railway Limited +Mahanadi Coalfields Limited +Mahanadi Coalfields Limited +Mahanagar Telephone Nigam Limited +Maharashtra Antibiotics and Pharmaceuticals Limited +MAMC Industries Limited +Mandar Transmission Limited +Mangalore Refinery and Petrochemicals Limited +Manipur State Drugs and Pharemaceuticals Limited +Mazagon Dock Shipbuilders Limited +Mazagon Dock Shipbuilders Limited +MECON Limited +Medinipur-Jeerat Transmission Limited +Millennium Telecom Limited +Mineral Exploration Corporation Limited +Mishra Dhatu Nigam Limited +MJSJ Coal Limited +MNH Shakti Limited +Mohinder Garh Bhiwani Transmission Limited +MOIL Limited +MP Power Transmission Package-i Limited +MP Power Transmission Package-ii Limited +Mumbai Railway Vikas Corporation Limited +Nabinagar Power Generating Company Limited +Nagaland Pulp and Paper Company Limited +Naini Aerospace Limited +National Aluminium Company Limited +National Backward Classes Finance and Development Corporation +National Buildings Construction Corporation Limited +National Fertilizers Limited +National Film Development Corporation Limited +National Handicapped Finance and Development Corporation +National Handloom Development Corporation Limited +National Highways and Infrastructure Development Corporation Limited +National Informatics Centre Services Incorporated +National Jute Manufactures Corporation Limited +National Minorities Development and Finance Corporation +National Minorities Development and Finance Corporation +National Projects Construction Corporation Limited +National Research Development Corporation +National Safai Karamcharis Finance and Development Corporation +National Scheduled Castes Finance and Development Corporation +National Scheduled Tribes Finance and Development Corporation +National Seeds Corporation Limited +National Small Industries Corporation Limited +National Textile Corporation Limited +NBCC Engineering and Consultancy Limited +NBCC Environment Engineering Limited +NBCC International Limited +NBCC Services Limited +NBCC India Limited +Nellore Transmission Limited +NEPA Limited +NER-II Transmission Limited +New City of Bombay Manufacturing Mills Limited +Newspace India Limited +Neyveli Lignite Corporation Limited +Neyveli Uttar Pradesh Power Limited +NHDC Limited +NHPC Limited +NLC India Limited +NLC Tamil Nadu Power Limited +NMDC CSR Foundation +NMDC Limited +NMDC Power Limited +NMDC Steel Limited +NMDC-CMDC Limited +North Eastern Electric Power Corporation Limited +North Eastern Handicrafts and Handloom Dev.Corporation Limited +North Eastern Regional Agri. Marketing CorporationLimited +North Karanpura Transco Limited +Northern Coalfields Limited +NPCIL-Indian Oil Nuclear Energy Corporation Limited +NPCIL-NALCO Power Company Limited +NRSS XXXVI Transmission Limited +NSIC Venture Capital Fund Limited +NTPC EDMC Waste Solutions +NTPC Electric Supply Company Limited +NTPC Hydro Limited +NTPC Limited +NTPC Mining Limited +NTPC Renewable Energy Limited +NTPC Vidyut Vyapar Nigam Limited +Nuclear Power Corporation of India Limited +Numaligarh Refinery Limited +Numaligarh Refinery Limited +Odisha Infrapower Limited +Oil and Natural Gas Corporation Limited +Oil India International Limited +Oil India Limited +ONGC Mangalore Petrochemicals Limited +ONGC Videsh Limited +ONGC Videsh Rovuma Limited India +Orissa Drugs and Chemicals Limited +Orissa Integrated Power Limited +Orissa Mineral Development Company Limited +PEC Limited +Patran Transmission Company Limited +Patratu Vidyut Utpadan Nigam Limited +Pawan Hans Limited +Pawan Hans Limited +Petronet CCK Limited +PFC Capital Advisory Service Limited +PFC Consulting Limited +PFC Green Energy Limited +Pondicherry Ashok Hotel Corporation Limited +Power Equity Capital Advisors Private. Limited +Power Finance Corporation Limited +Power Finance Corporation Limited +Power Grid Ajmer Phagi Transmission Limited +Power Grid Bhind Guna Transmission Limited +Power Grid Bhuj Transmission Limited +Power Grid Corporation of India Limited +Power Grid Fategarh Transmission Limited +Power Grid Jabalpur Transmission Limited +Power Grid Khetri Transmission Limited +Power Grid Medinipur Jeerat Transmission Limited +Power Grid Meerut Simbhavali Transmission Limited +Power Grid Mithilanchal Transmission Limited +Power Grid NM Transmission Limited +Power Grid Parli Transmission Limited +Power Grid Ramgarh Transmission Limited +Power Grid Rampur Sambhal Transmission Limited +Power Grid Southern Interconnector Transmission System Limited +Power Grid Varanasi Transmission Limited +Power Grid Vemagiri Transmission Limited +Power Grid Warora Transmission Limited +Power System Operation Corporation Limited +Powergrid Himachal Transmission Limited +Powergrid Jawaharpur Firozabad Transmission Limited +Powergrid Kala Amb Transmission Limited +Powergrid Nm Tranmission Limited +Powergrid Unchahar Transmission Limited +Powergrid Vizag Transmission Limited +Prize Petroleum Company Limited +Projects and Development India Limited +Punjab Ashok Hotel Company Limited +Punjab Logistic Infrastructure Limited +Purilia and Kharagpur Transmission Company Limited +Rail Vikas Nigam Limited +Railtel Corporation India Limited +Railtel Enterprises Limited +Railway Energy Management Company Limited +Raipur-Rajanand Warona Transmission Limited +Rajasthan Drugs and Pharmaceuticals Limited +Rajasthan Electronics And Instruments Limited +Rajgarh Transmission Limited +Ranchi Ashok Bihar Hotel Corporation Limited +Rapp Transmission Company Limited +Rashtriya Chemicals and Fertilizers Limited +Rashtriya Ispat Nigam Limited +Ratnagiri Gas and Power Private Limited +REC Limited +REC Power Development and Consultancy Limited +REC Power Distribution Company Limited +REC Transmission Projects Company Limited +REC Transmission Projects Company Limited +Richardson and Cruddas Limited +RITES Infrastructure Services Limited +RITES Limited +Rural Electrification Corporation Limited +Sagarmala Development Company Limited +SAIL Jagadishpur Power Plant Limited +SAIL Refractory Company Limited +SAIL Refractory Company Limited +SAIL Sindri Projects Limited +Sakhigopal Integrated Power Company Limited +Sambhar Salts Limited +Scooters India Limited +Security Printing and Minting Corporation India Limited +Sethusamudram Corporation Limited +Shipping Corporation of India Limited +Shongtong Karcham Wangtoo Transmission Limited +Sidcul Concor Infra Company Limited +Sikar New Transmission Limited +Sikar-II Aligarh Transmission Limited +Sipat Transmission Limited +SJVN Limited +SJVN Thermal Private. Limited +Solar Energy Corporation of India +South-Central East Delhi Power Transmission Limited +South Eastern Coalfields Limited +South-Central East Delhi Power Transmission Limited +State Farms Corporation of India Limited +State Trading Corporation of India Limited +STCL Limited +Steel Authority of India Limited +Tamil Nadu Trade Promotion Organisation +Tanda Transmission Company Limited +Tatiya Andhra Mega Power Limited +TCIL Bina Toll Road Limited +TCIL Lakhnadone Toll Road Limited +Tebma Shipyards Limited +Telecommunications Consultants India Limited +THDC India Limited +THDC India Limited +The Bisra Stone Lime Company Limited +The Cotton Corporation of India Limited +The Fertilizer Corporation of India Limited +The Jute Corporation of India Limited +Triveni Structurals Limited +Tungabhadra Steel Products Limited +TUSCO Limited +Tyre Corporation of India Limited +Udupi Kasargode Transmission Limited +Unchahar Transmission Limited +Uranium Corporation of India Limited +Utkal Ashok Hotel Corporation Limited +Vapi-ii North Lakhimpur Transmission Limited +Vignyan Industries Limited +Visakhapatanam Port Logistics Park Limited +Vizag Transmission Limited +WAPCOS Limited +Western Coalfields Limited +WR-NR Power Transmission Limited +WRSS XXIa Transco Limited +Yule Electrical Limited +Yule Engineering Company Limited \ No newline at end of file diff --git a/app/utils/column_mapping.py b/app/utils/column_mapping.py index 76e2084..098e0bb 100644 --- a/app/utils/column_mapping.py +++ b/app/utils/column_mapping.py @@ -9,6 +9,7 @@ InsuranceCompanySettings, MetadataSettings, NoteSettings, + PsuCompanySettings, TagsSettings, UnitSettings, ) @@ -21,6 +22,7 @@ insurance_company_settings = InsuranceCompanySettings() metadata_settings = MetadataSettings() tags_settings = TagsSettings() +psu_company_settings = PsuCompanySettings() def extract_pattern_from_columns( @@ -126,10 +128,18 @@ async def find_insurance_company_columns(columns: set): insurance_company_settings.INSURANCE_COMPANY_NAME_KEYWORD ) ) - airline_name, _ = extract_pattern_from_columns( + insurance_company_name, _ = extract_pattern_from_columns( columns, insurance_name_pattern ) - return {"insurance_name": airline_name} + return {"insurance_name": insurance_company_name} + + +async def find_psu_company_columns(columns: set): + psu_name_pattern = re.compile( + r".*({})".format(psu_company_settings.PSU_COMPANY_NAME_KEYWORD) + ) + psu_name, _ = extract_pattern_from_columns(columns, psu_name_pattern) + return {"psu_name": psu_name} async def find_unit_columns(columns: set): @@ -148,14 +158,6 @@ async def find_note_columns(columns: set): } -# async def find_tags_columns(columns: set): -# tags_pattern = re.compile(r"({})".format(tags_settings.TAGS_KEYWORD)) -# tags_column, _ = extract_pattern_from_columns(columns, tags_pattern) -# return { -# "tags": tags_column, -# } - - async def find_object_columns(dataset): object_columns = list(dataset.select_dtypes(include=["object"]).columns) return {"object_columns": object_columns} @@ -286,6 +288,7 @@ async def find_mapped_columns(columns): insurance_company_name_columns = await find_insurance_company_columns( columns ) + psu_columns = await find_psu_company_columns(columns) metadata_columns = await find_metadata_columns(columns) mapped_columns = { **datetime_columns, @@ -295,10 +298,12 @@ async def find_mapped_columns(columns): **airline_name_columns, **metadata_columns, **insurance_company_name_columns, + **psu_columns, } not_mapped_columns = list( set(columns).difference( list(chain.from_iterable(mapped_columns.values())) ) ) + print({**mapped_columns, "unmapped": not_mapped_columns}) return {**mapped_columns, "unmapped": not_mapped_columns} diff --git a/app/utils/dataset.py b/app/utils/dataset.py index ad03135..ab3d53f 100644 --- a/app/utils/dataset.py +++ b/app/utils/dataset.py @@ -13,6 +13,7 @@ from app.utils.geography import geography_expectation_suite from app.utils.insurance import insurance_company_name_expectation_suite from app.utils.note import note_expectation_suite +from app.utils.psu import psu_company_name_expectation_suite from app.utils.unit import unit_expectation_suite settings = Settings() @@ -31,6 +32,7 @@ async def dataset_expectation( geography_expectation_suite(dataset, result_type), airline_name_expectation_suite(dataset, result_type), insurance_company_name_expectation_suite(dataset, result_type), + psu_company_name_expectation_suite(dataset, result_type), note_expectation_suite(dataset, result_type), unit_expectation_suite(dataset, result_type), general_table_expectation_suite(dataset, result_type), @@ -39,22 +41,6 @@ async def dataset_expectation( return {dataset_path: jsonable_encoder(expectation)} -# async def datasets_expectation(dataset_folder_path, result_type): - -# # Currently dataset_folder_path is in local machine -# if not Path(dataset_folder_path).exists(): -# raise PathError - -# expectations = await asyncio.gather( -# *[ -# dataset_expectation(dataset_path, result_type) -# for dataset_path in Path(dataset_folder_path).glob("**/*.csv") -# ] -# ) -# expectations = ChainMap(*expectations) -# return jsonable_encoder(expectations) - - async def datasets_expectation(s3_files_key, result_type): client = Minio( endpoint=settings.S3_ENDPOINT, diff --git a/app/utils/insurance.py b/app/utils/insurance.py index ff480f7..c78dad9 100644 --- a/app/utils/insurance.py +++ b/app/utils/insurance.py @@ -20,7 +20,7 @@ async def modify_insurance_company_name_expectation_suite( APP_DIR / "core" / "insurance_companies.csv" ) insurance_company_names_list = insurance_company_names_dataset[ - "insurance_comapnaies" + "insurance_companies" ].tolist() changed_config = { diff --git a/app/utils/psu.py b/app/utils/psu.py new file mode 100644 index 0000000..a3e6651 --- /dev/null +++ b/app/utils/psu.py @@ -0,0 +1,64 @@ +import great_expectations as ge +from fastapi.encoders import jsonable_encoder + +from app.core.config import APP_DIR, PsuCompanySettings, Settings +from app.utils.column_mapping import find_psu_company_columns +from app.utils.common import modify_values_to_be_in_set, read_pandas_dataset + +settings = Settings() +psu_company_settings = PsuCompanySettings() + + +async def modify_psu_company_name_expectation_suite( + column_name: str, result_format: str +): + default_expectation_suite = ( + psu_company_settings.PSU_COMPANY_NAME_EXPECTATION + ) + + psu_company_names_dataset = await read_pandas_dataset( + APP_DIR / "core" / "psu.csv" + ) + psu_company_names_list = psu_company_names_dataset[ + "psu_companies" + ].tolist() + + changed_config = { + "expect_column_values_to_be_in_set": { + "value_set": psu_company_names_list, + "column": column_name, + "result_format": result_format, + } + } + changed_expectation_suite = await modify_values_to_be_in_set( + changed_config, default_expectation_suite + ) + return changed_expectation_suite + + +async def psu_company_name_expectation_suite(dataset, result_format): + results = {} + psu_company_name_columns = await find_psu_company_columns( + set(dataset.columns) + ) + for each_column in psu_company_name_columns["psu_name"]: + expectation_suite = await modify_psu_company_name_expectation_suite( + each_column, result_format + ) + # convert pandas dataset to great_expectations dataset + ge_pandas_dataset = ge.from_pandas( + dataset, expectation_suite=expectation_suite + ) + validation = ge_pandas_dataset.validate() + validation_ui_name = ( + validation["results"][0]["expectation_config"]["meta"][ + "expectation_name" + ] + + " - " + + validation["results"][0]["expectation_config"]["_kwargs"][ + "column" + ] + ) + results[validation_ui_name] = validation + + return jsonable_encoder(results)