Skip to content

Commit

Permalink
Upgrade classutil to 2.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
lecafard committed Feb 25, 2022
1 parent e212d0d commit a057b13
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 22 deletions.
12 changes: 9 additions & 3 deletions cfn-stack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,10 @@ Resources:
Type: AWS::Lambda::Function
Properties:
Description: Lambda API handler
Runtime: python3.6
Runtime: python3.8
MemorySize: 256
Architectures:
- arm64
Handler: lambda_function.lambda_handler
Role: !GetAtt IAMLambdaAPI.Arn
Code: build/lambda.api/
Expand All @@ -193,8 +195,10 @@ Resources:
Type: AWS::Lambda::Function
Properties:
Description: Lambda crawler and notifier
Runtime: python3.6
Runtime: python3.8
MemorySize: 768
Architectures:
- arm64
Role: !GetAtt IAMLambdaCrawler.Arn
Handler: lambda_function.lambda_handler
Code: build/lambda.crawler/
Expand All @@ -208,8 +212,10 @@ Resources:
Type: AWS::Lambda::Function
Properties:
Description: Lambda cleaner
Runtime: python3.6
Runtime: python3.8
MemorySize: 512
Architectures:
- arm64
Role: !GetAtt IAMLambdaCleaner.Arn
Handler: lambda_function.lambda_handler
Code: build/lambda.cleaner/
Expand Down
25 changes: 23 additions & 2 deletions lambdas/api/classutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os

# reload classutil
CACHE_TIME = timedelta(minutes=5)
CACHE_TIME = timedelta(minutes=10)
BUCKET = os.getenv("CLASSUTIL_BUCKET", "classful-data-testing")
FILENAME = "classutil.json.gz"
CLASS_REGEX = re.compile(r'^(\d{4}[STU][123])_([A-Z]{4}\d{4})_(\d{1,5})$')
Expand All @@ -18,6 +18,7 @@
classutil_data = {}
classutil_etag = ""
classutil_expires = datetime(1970, 1, 1, tzinfo=timezone.utc)

# get classutil data,
def get_classutil():
global classutil_etag
Expand All @@ -29,7 +30,7 @@ def get_classutil():

try:
res = obj.get(IfNoneMatch=classutil_etag)
classutil_data = json.loads(gzip.decompress(res["Body"].read()))
classutil_data = convert_to_indexed(json.loads(gzip.decompress(res["Body"].read())))
classutil_etag = res['ResponseMetadata']['HTTPHeaders']['etag']
classutil_expires = datetime.now(timezone.utc) + CACHE_TIME
except ClientError as ex:
Expand All @@ -39,6 +40,26 @@ def get_classutil():
raise ex
return classutil_data

def convert_to_indexed(data):
courses = {}
for i in data["courses"]:
session = "{}{}".format(i["year"], i["term"])
if session not in courses:
courses[session] = {}
courses[session][i["code"]] = {
"name": i["name"],
"components": {str(j["id"]): {k: j[k] for k in j if k != "id"} for j in i["components"]}
}
return {
"correct_at": data["correct_at"],
"courses": courses,
"course_names": {
term: {
code: courses[term][code]["name"] for code in courses[term]
} for term in courses
}
}

def validate_section(data, class_id):
match = CLASS_REGEX.match(class_id)
if not match:
Expand Down
4 changes: 2 additions & 2 deletions lambdas/api/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ def get_terms(event):
@lambda_handler.handle("get", path="/terms/<string:term>")
def get_courses_by_term(event, term):
data = get_classutil()
if term not in data['courses']:
if term not in data['course_names']:
return send_error("NotFound", 404)
return send_response({i: data['courses'][term][i]['name'] for i in data['courses'][term]})
return send_response(data['course_names'][term])

@lambda_handler.handle("get", path="/terms/<string:term>/<string:course>")
def get_course_by_term(event, term, course):
Expand Down
13 changes: 8 additions & 5 deletions lambdas/crawler/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@
import re
import os
import gzip
import logging

logging.basicConfig(level=logging.INFO)

KEY = "classutil.json.gz"
CLASS_REGEX = r"^(\d{4}[STU][123])_([A-Z]{4}\d{4})_(\d{1,5})$"
META_CLASSUTIL_CORRECT_AT = "classutil_correct_at"
CONCURRENCY = 8

BUCKET = os.getenv("CLASSUTIL_BUCKET", "classful-data-testing")
TABLE = os.getenv("DYNAMODB_TABLE", "classful-testing-pending")
Expand All @@ -35,18 +39,17 @@ def lambda_handler(event, context):
print(f"Invalid {META_CLASSUTIL_CORRECT_AT} metadata for object {KEY}, deleting")
obj.delete()

res = scrape(concurrency=8, last_updated=last_updated)
data = convert_to_indexed(res)
data = scrape(concurrency=CONCURRENCY, last_updated=last_updated)

if res["correct_at"] != last_updated:
if data["correct_at"] != last_updated:
obj.put(
Body=gzip.compress(json.dumps(data).encode("utf-8"), compresslevel=9),
Metadata={
META_CLASSUTIL_CORRECT_AT: str(data["correct_at"])
}
)
if last_updated != 0:
send_notifications(data)
send_notifications(convert_to_indexed(data))
else:
print("Not updated, current version: {}".format(last_updated))

Expand Down Expand Up @@ -143,4 +146,4 @@ def send_notifications(data):
Key={
"id": i["id"]
}
)
)
24 changes: 14 additions & 10 deletions lambdas/crawler/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
beautifulsoup4==4.9.1
certifi==2020.4.5.1
chardet==3.0.4
classutil==1.0.6
idna==2.9
python-dateutil==2.8.1
requests==2.23.0
six==1.15.0
soupsieve==2.0.1
urllib3==1.25.9
aiohttp==3.8.1
aiosignal==1.2.0
async-timeout==4.0.2
attrs==21.4.0
beautifulsoup4==4.10.0
charset-normalizer==2.0.12
classutil==2.0.1
frozenlist==1.3.0
idna==3.3
multidict==6.0.2
python-dateutil==2.8.2
six==1.16.0
soupsieve==2.3.1
yarl==1.7.2

0 comments on commit a057b13

Please sign in to comment.