Skip to content

Commit

Permalink
Refactor update function to be reusable
Browse files Browse the repository at this point in the history
  • Loading branch information
Ru Chern Chong committed May 7, 2024
1 parent 1f2bf06 commit 7c810ae
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 131 deletions.
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
MONGODB_URI=
MONGODB_DB_NAME=
53 changes: 0 additions & 53 deletions lambda_function.py

This file was deleted.

5 changes: 0 additions & 5 deletions main.py

This file was deleted.

69 changes: 16 additions & 53 deletions update_cars.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,23 @@
import boto3
from dotenv import load_dotenv

from updater import update

load_dotenv()


COLLECTION_NAME: str = "cars"
ZIP_FILE_NAME: str = "Monthly New Registration of Cars by Make.zip"
ZIP_URL: str = (
f"https://datamall.lta.gov.sg/content/dam/datamall/datasets/Facts_Figures/Vehicle Registration/{ZIP_FILE_NAME}"
)

print(f"ZIP_URL {ZIP_URL}")
import asyncio
import updater
from typing import List


async def main():
result = await update()

if __name__ == "__main__":
# Create a DynamoDB table
my_table = create_dynamodb_table("cars")

# Put the item into the table
put_item_into_table(my_table, result)


def create_dynamodb_table(table_name):
# Create a DynamoDB resource
dynamodb = boto3.resource("dynamodb")

# Create the table
table = dynamodb.create_table(
TableName=table_name,
KeySchema=[
{"AttributeName": "id", "KeyType": "HASH"}, # Partition key
{"AttributeName": "timestamp", "KeyType": "RANGE"}, # Sort key
],
AttributeDefinitions=[
{"AttributeName": "id", "AttributeType": "S"}, # String data type
{
"AttributeName": "timestamp",
"AttributeType": "N", # Number data type
},
],
ProvisionedThroughput={"ReadCapacityUnits": 5, "WriteCapacityUnits": 5},
collection_name: str = "cars"
zip_file_name: str = "Monthly New Registration of Cars by Make.zip"
zip_url: str = (
f"https://datamall.lta.gov.sg/content/dam/datamall/datasets/Facts_Figures/Vehicle Registration/{zip_file_name}"
)
key_fields: List[str] = ["month"]

# Wait until the table exists
table.meta.client.get_waiter("table_exists").wait(TableName=table_name)

print(f"DynamoDB Table {table_name} created")

return table
await updater.main(
collection_name=collection_name,
zip_url=zip_url,
zip_file_name=zip_file_name,
key_fields=key_fields,
)


def put_item_into_table(table, item_data):
# Put an item into the DynamoDB table
table.put_item(Item=item_data)
if __name__ == "__main__":
asyncio.run(main())
23 changes: 23 additions & 0 deletions update_coe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import asyncio
import updater
from typing import List


async def main():
collection_name: str = "coe"
zip_file_name: str = "COE Bidding Results.zip"
zip_url: str = (
f"https://datamall.lta.gov.sg/content/dam/datamall/datasets/Facts_Figures/Vehicle Registration/{zip_file_name}"
)
key_fields: List[str] = ["month", "bidding_no"]

await updater.main(
collection_name=collection_name,
zip_url=zip_url,
zip_file_name=zip_file_name,
key_fields=key_fields,
)


if __name__ == "__main__":
asyncio.run(main())
23 changes: 3 additions & 20 deletions updater.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
import asyncio
import csv
import datetime
import os
from zipfile import ZipFile
from pymongo import MongoClient

from download_file import download_file

EXTRACT_PATH = "tmp"

csv_file_path = "tmp/Monthly New Registration of Cars by Make/M03-Car_Regn_by_make.csv"

json_file_path = (
"tmp/Monthly New Registration of Cars by Make/M03-Car_Regn_by_make.csv.json"
)

csv_data = []


Expand All @@ -25,7 +17,7 @@ async def update(collection_name, zip_file_name, zip_url, key_fields):

try:
zip_file_path = os.path.join(EXTRACT_PATH, zip_file_name)
await download_file(zip_url, zip_file_path) # You'll need to define this
await download_file(zip_url, zip_file_path)

extracted_file_name = extract_zip_file(zip_file_path, EXTRACT_PATH)
destination_path = os.path.join(EXTRACT_PATH, extracted_file_name)
Expand Down Expand Up @@ -71,19 +63,14 @@ def extract_zip_file(zip_file_path, extract_to_path):
for entry in zip_ref.infolist():
if not entry.is_dir():
return entry.filename
return ""


async def main():
zip_file_name = "Monthly New Registration of Cars by Make.zip"
zip_url = f"https://datamall.lta.gov.sg/content/dam/datamall/datasets/Facts_Figures/Vehicle Registration/{zip_file_name}" # Replace with the actual URL
collection_name = "cars"

async def main(collection_name, zip_file_name, zip_url, key_fields):
message = await update(
collection_name=collection_name,
zip_file_name=zip_file_name,
zip_url=zip_url,
key_fields=["month"],
key_fields=key_fields,
)

response = {
Expand All @@ -96,7 +83,3 @@ async def main():
print(response)

return response


if __name__ == "__main__":
asyncio.run(main())

0 comments on commit 7c810ae

Please sign in to comment.