Skip to content

Commit

Permalink
Merge pull request #19 from factly/feat/attr-order
Browse files Browse the repository at this point in the history
feat: Add file size and bucket name in s3 files list
  • Loading branch information
paul-tharun authored Mar 6, 2023
2 parents 8476f28 + 3bf2692 commit 2ad0480
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 16 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ repos:
- id: black
language_version: python3
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
rev: 3.9.0
hooks:
- id: flake8
- repo: https://github.com/timothycrosley/isort
rev: 5.9.3
rev: 5.12.0
hooks:
- id: isort
8 changes: 7 additions & 1 deletion app/api/api_v1/routers/meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,17 @@ async def list_bucket_objects(
else:
objects = await get_list_of_s3_objects(s3_resource, s3_bucket, prefix)
objects_json = [
{"key": obj.key, "last_modified": obj.last_modified}
{
"key": obj.key,
"last_modified": obj.last_modified,
"size": obj.size / 1e3,
}
for obj in objects
if obj.key.endswith(file_format)
]
return {
"total": len(objects_json),
"file_size": "KB",
"bucket": s3_bucket,
"objects": objects_json,
}
6 changes: 4 additions & 2 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ class Config:

class DateTimeSettings(BaseSettings):

CALENDAR_YEAR_KEYWORD = "year"
CALENDAR_YEAR_KEYWORD = "^year$"
FISCAL_YEAR_KEYWORD = "fiscal_year"
ACADEMIC_YEAR_KEYWORD = "academic_year"
OTHER_YEAR_KEYWORD = ".*_year"
QUARTER_KEYWORD = "quarter"
MONTH_KEYWORD = "month"
DATE_KEYWORD = "date"
Expand All @@ -46,7 +47,7 @@ class DateTimeSettings(BaseSettings):
2: ["week"],
3: ["month"],
4: ["quarter"],
5: ["calender_year", "non_calendar_year"],
5: ["calender_year", "non_calendar_year", "other_year"],
}
GRANULARITY_REPRESENTATION = {
"date": "Daily",
Expand All @@ -55,6 +56,7 @@ class DateTimeSettings(BaseSettings):
"quarter": "Quarterly",
"calender_year": "Yearly",
"non_calendar_year": "Yearly",
"other_year": "Yearly",
}


Expand Down
8 changes: 0 additions & 8 deletions app/models/meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,3 @@ class MetaData(BaseModel):
spatial_coverage: Optional[str]
formats_available: Optional[str]
is_public: Optional[bool]


"""
[
"https://storage.factly.org/mande/edu-ministry/data/processed/statistics/1_AISHE_report/1_universities_count_by_state/output.csv",
"https://storage.factly.org/mande/edu-ministry/data/processed/statistics/1_AISHE_report/19_enrolment_foreign/output.csv"
]
"""
9 changes: 8 additions & 1 deletion app/utils/columns_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ async def find_datetime_columns(columns: set):
cal_year_pattern = re.compile(
r".*({})".format(datetime_settings.CALENDAR_YEAR_KEYWORD)
)
other_year_pattern = re.compile(
r".*({})".format(datetime_settings.OTHER_YEAR_KEYWORD)
)
quarter_pattern = re.compile(
r".*({})".format(datetime_settings.QUARTER_KEYWORD)
)
Expand All @@ -49,6 +52,9 @@ async def find_datetime_columns(columns: set):
year_columns, columns = extract_pattern_from_columns(
columns, cal_year_pattern
)
other_year_columns, columns = extract_pattern_from_columns(
columns, other_year_pattern
)
quarter_columns, columns = extract_pattern_from_columns(
columns, quarter_pattern
)
Expand All @@ -61,10 +67,10 @@ async def find_datetime_columns(columns: set):
date_columns = {
col for col in date_columns if not as_on_date_pattern.match(col)
}

return {
"non_calendar_year": fiscal_year_columns,
"calender_year": year_columns,
"other_year": other_year_columns,
"quarter": quarter_columns,
"month": month_columns,
"date": date_columns,
Expand Down Expand Up @@ -131,6 +137,7 @@ async def find_mapped_columns(columns):
**unit_columns,
**note_columns,
}

not_mapped_columns = list(
set(columns).difference(
list(chain.from_iterable(mapped_columns.values()))
Expand Down
6 changes: 4 additions & 2 deletions app/utils/temporal_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,10 @@ def temporal_coverage_representation(is_sequence, year_mapping):


async def get_temporal_coverage(dataset, mapped_columns: dict):
year_columns = list(mapped_columns["calender_year"]) + list(
mapped_columns["non_calendar_year"]
year_columns = (
list(mapped_columns["calender_year"])
+ list(mapped_columns["non_calendar_year"])
+ list(mapped_columns["other_year"])
)
year_columns = [year_column for year_column in year_columns if year_column]

Expand Down

0 comments on commit 2ad0480

Please sign in to comment.