From 2e193f0eddec3f5e422e357da3fc76915c3b15be Mon Sep 17 00:00:00 2001
From: HemanthM005 <mhemanthmanikala@gmail.com>
Date: Fri, 16 Feb 2024 19:16:27 +0530
Subject: [PATCH 1/3] Fix: fiscal years and dates temporal coverage issue

---
 .dockerignore                  |  1 +
 app/utils/columns_mapping.py   |  4 +-
 app/utils/temporal_coverage.py | 74 +++++++++++++++++++++++++++-------
 3 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index bf3a88f..904280c 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,5 +1,6 @@
 # Volumes
 volumes/
+.venv/
 
 # taken from : https://github.com/themattrix/python-pypi-template/blob/master/.dockerignore
 # Git
diff --git a/app/utils/columns_mapping.py b/app/utils/columns_mapping.py
index b19d917..2626953 100644
--- a/app/utils/columns_mapping.py
+++ b/app/utils/columns_mapping.py
@@ -7,7 +7,7 @@
     NoteSettings,
     UnitSettings,
 )
-
+from fastapi.logger import logger
 datetime_settings = DateTimeSettings()
 geography_settings = GeographySettings()
 unit_settings = UnitSettings()
@@ -62,7 +62,7 @@ async def find_datetime_columns(columns: set):
         columns, month_pattern
     )
     date_columns, columns = extract_pattern_from_columns(columns, date_pattern)
-
+    logger.info(f"date_columns: {date_columns}")
     # filter out `as_on_date` from date columns
     date_columns = {
         col for col in date_columns if not as_on_date_pattern.match(col)
diff --git a/app/utils/temporal_coverage.py b/app/utils/temporal_coverage.py
index 760656c..b22dead 100644
--- a/app/utils/temporal_coverage.py
+++ b/app/utils/temporal_coverage.py
@@ -1,10 +1,12 @@
 import re
 from itertools import chain
 from typing import List
+import pandas as pd
 
 from app.core.config import DateTimeSettings
 
 datetime_settings = DateTimeSettings()
+from fastapi.logger import logger
 
 
 def convert_to_calender_year(other_year):
@@ -76,16 +78,53 @@ def is_sequence(year_mapping):
 
 def temporal_coverage_representation(is_sequence, year_mapping):
     year_values_from_mapping = sorted(year_mapping.keys())
+    logger.warning(f"Year Values from Mapping: {year_values_from_mapping}")
 
     if len(year_values_from_mapping) == 1:
         return f"{year_values_from_mapping[0]}"
 
     if not is_sequence:
+        logger.warning(f"Year Mapping: {', '.join(str(year) for year in year_values_from_mapping)}")
         return ", ".join(str(year) for year in year_values_from_mapping)
 
     return f"{year_values_from_mapping[0]} to {year_values_from_mapping[-1]}"
 
 
+def is_fiscal_check(unique_years):
+    for year in unique_years:
+        if "-" not in year:
+            return False
+    return True
+
+
+def get_time_periods(years, is_fiscal=False):
+    if not is_fiscal:
+        years = sorted(map(int, years))
+        years = list(map(str, years))
+    time_periods = []
+    start_year = years[0]
+    end_year = years[0]
+
+    for year in years[1:]:
+        if int(year.split('-')[0]) == int(end_year.split('-')[0]) + 1:
+            end_year = year
+        else:
+            if start_year == end_year:
+                time_periods.append(start_year)
+            else:
+                time_periods.append(f"{start_year} to {end_year}")
+            start_year = year
+            end_year = year
+    
+    # Add the last time period
+    if start_year == end_year:
+        time_periods.append(start_year)
+    else:
+        time_periods.append(f"{start_year} to {end_year}")
+
+    return ", ".join(time_periods)
+
+
 async def get_temporal_coverage(dataset, mapped_columns: dict):
     year_columns = (
         list(mapped_columns["calender_year"])
@@ -93,24 +132,31 @@ async def get_temporal_coverage(dataset, mapped_columns: dict):
         + list(mapped_columns["other_year"])
     )
     year_columns = [year_column for year_column in year_columns if year_column]
-
-    # do operation on the first year column
-    if len(year_columns) == 0:
+    date_columns = list(mapped_columns["date"])
+
+    if len(date_columns) != 0:
+        date_column = date_columns[0]
+        # Extract unique years
+        unique_year_values = pd.to_datetime(dataset[date_column], format='%d-%m-%Y').dt.year.unique()
+        unique_year_values = [str(year) for year in unique_year_values]
+    elif len(year_columns) != 0:
+        year_column = year_columns[0]
+        unique_year_values = [
+            f"{year_val}" for year_val in dataset[year_column].unique() if year_val
+        ]
+    else:
         return {"temporal_coverage": ""}
 
-    year_column = year_columns[0]
-    unique_year_values = [
-        f"{year_val}" for year_val in dataset[year_column].unique() if year_val
-    ]
-
     if not verify_proper_format_of_year_values(unique_year_values):
         return {"temporal_coverage": ""}
+    is_fiscal = is_fiscal_check(unique_year_values)
+    temporal_coverage = get_time_periods(unique_year_values, is_fiscal)
+    # year_mapping = get_list_mappings(unique_year_values)
 
-    year_mapping = get_list_mappings(unique_year_values)
+    # year_in_sequence = is_sequence(year_mapping)
 
-    year_in_sequence = is_sequence(year_mapping)
-
-    temporal_coverage = temporal_coverage_representation(
-        year_in_sequence, year_mapping
-    )
+    # temporal_coverage = temporal_coverage_representation(
+    #     year_in_sequence, year_mapping
+    # )
+    logger.warning(f"Temporal Coverage: {temporal_coverage}")
     return {"temporal_coverage": temporal_coverage}

From 738fcbdd932d92333375594157fd27abc00b4915 Mon Sep 17 00:00:00 2001
From: HemanthM005 <mhemanthmanikala@gmail.com>
Date: Fri, 16 Feb 2024 19:28:32 +0530
Subject: [PATCH 2/3] Fix: fiscal years and dates temporal coverage issue

---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f9d834e..04f428c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,14 +1,14 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 24.2.0
     hooks:
     - id: black
       language_version: python3
 -   repo: https://github.com/pycqa/flake8
-    rev: 3.9.0
+    rev: 7.0.0
     hooks:
     - id: flake8
 -   repo: https://github.com/timothycrosley/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
     - id: isort
\ No newline at end of file

From ce48cc4f81be43c75066ea96d083fcb52b22e505 Mon Sep 17 00:00:00 2001
From: HemanthM005 <mhemanthmanikala@gmail.com>
Date: Fri, 16 Feb 2024 19:36:10 +0530
Subject: [PATCH 3/3] Fix: fiscal years and dates temporal coverage issue

---
 app/utils/columns_mapping.py   |  4 +++-
 app/utils/spatial_coverage.py  |  8 +++++---
 app/utils/temporal_coverage.py | 19 +++++++++++++------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/app/utils/columns_mapping.py b/app/utils/columns_mapping.py
index 2626953..d0c339b 100644
--- a/app/utils/columns_mapping.py
+++ b/app/utils/columns_mapping.py
@@ -1,13 +1,15 @@
 import re
 from itertools import chain
 
+from fastapi.logger import logger
+
 from app.core.config import (
     DateTimeSettings,
     GeographySettings,
     NoteSettings,
     UnitSettings,
 )
-from fastapi.logger import logger
+
 datetime_settings = DateTimeSettings()
 geography_settings = GeographySettings()
 unit_settings = UnitSettings()
diff --git a/app/utils/spatial_coverage.py b/app/utils/spatial_coverage.py
index 813d518..dd663b8 100644
--- a/app/utils/spatial_coverage.py
+++ b/app/utils/spatial_coverage.py
@@ -74,9 +74,11 @@ async def get_spatial_coverage(dataset):
     ]
     part, whole = (
         None,
-        None
-        if "country" in ordered_geographic_entity
-        else geography_settings.DEFAULT_SPATIAL_COVERAGE,
+        (
+            None
+            if "country" in ordered_geographic_entity
+            else geography_settings.DEFAULT_SPATIAL_COVERAGE
+        ),
     )
 
     for entity in ordered_geographic_entity:
diff --git a/app/utils/temporal_coverage.py b/app/utils/temporal_coverage.py
index b22dead..7f2ad49 100644
--- a/app/utils/temporal_coverage.py
+++ b/app/utils/temporal_coverage.py
@@ -1,12 +1,13 @@
 import re
 from itertools import chain
 from typing import List
+
 import pandas as pd
+from fastapi.logger import logger
 
 from app.core.config import DateTimeSettings
 
 datetime_settings = DateTimeSettings()
-from fastapi.logger import logger
 
 
 def convert_to_calender_year(other_year):
@@ -84,7 +85,9 @@ def temporal_coverage_representation(is_sequence, year_mapping):
         return f"{year_values_from_mapping[0]}"
 
     if not is_sequence:
-        logger.warning(f"Year Mapping: {', '.join(str(year) for year in year_values_from_mapping)}")
+        logger.warning(
+            f"Year Mapping: {', '.join(str(year) for year in year_values_from_mapping)}"
+        )
         return ", ".join(str(year) for year in year_values_from_mapping)
 
     return f"{year_values_from_mapping[0]} to {year_values_from_mapping[-1]}"
@@ -106,7 +109,7 @@ def get_time_periods(years, is_fiscal=False):
     end_year = years[0]
 
     for year in years[1:]:
-        if int(year.split('-')[0]) == int(end_year.split('-')[0]) + 1:
+        if int(year.split("-")[0]) == int(end_year.split("-")[0]) + 1:
             end_year = year
         else:
             if start_year == end_year:
@@ -115,7 +118,7 @@ def get_time_periods(years, is_fiscal=False):
                 time_periods.append(f"{start_year} to {end_year}")
             start_year = year
             end_year = year
-    
+
     # Add the last time period
     if start_year == end_year:
         time_periods.append(start_year)
@@ -137,12 +140,16 @@ async def get_temporal_coverage(dataset, mapped_columns: dict):
     if len(date_columns) != 0:
         date_column = date_columns[0]
         # Extract unique years
-        unique_year_values = pd.to_datetime(dataset[date_column], format='%d-%m-%Y').dt.year.unique()
+        unique_year_values = pd.to_datetime(
+            dataset[date_column], format="%d-%m-%Y"
+        ).dt.year.unique()
         unique_year_values = [str(year) for year in unique_year_values]
     elif len(year_columns) != 0:
         year_column = year_columns[0]
         unique_year_values = [
-            f"{year_val}" for year_val in dataset[year_column].unique() if year_val
+            f"{year_val}"
+            for year_val in dataset[year_column].unique()
+            if year_val
         ]
     else:
         return {"temporal_coverage": ""}