Skip to content
This repository has been archived by the owner on Apr 22, 2024. It is now read-only.

Commit

Permalink
Add term table, accomodate multiple terms, fix course.sis_id bug (#113,
Browse files Browse the repository at this point in the history
#117) (#115)

* Add term WIP

* Modify Canvas course fetch func to use multiple terms

* Improve formatting

* Update configuration variable

* Fix end_at parsing

* Update canvas_zoom_meetings to handle multiple terms

* Add fix for issue 117: migration changing data type, string type coercion

* Fix ADD_COURSE_IDS functionality

* Fix comment

* Remove unneeded logging

* Reformat params; change var name

* Make term a dropped table

* Remove migrate from comment

* Make course.term_id NOT NULL

* Tweak term func name

* Reorder local imports

* Fix mutable default parameter anti-pattern

* Add log message to loop
  • Loading branch information
ssciolla authored Apr 28, 2020
1 parent 6fe2b94 commit a818e64
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 49 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ This includes the creation of a configuration file called `env.json`. Complete t
`LOG_LEVEL` | The minimum level for log messages that will appear in output. `INFO` or `DEBUG` is recommended for most use cases; see [Python's logging module](https://docs.python.org/3/library/logging.html).
`JOB_NAMES` | The names of one or more jobs (not case sensitive) that have been implemented and defined in `run_jobs.py` (see the **Implementing a New Job** section below).
`CANVAS_ACCOUNT_ID` | The Canvas instance root account ID number associated with the courses for which data will be collected.
`CANVAS_TERM_ID` | The Canvas instance term ID number that will be used to limit the query for Canvas courses. Set to 0 to use `ADD_COURSE_IDS`.
`ADD_COURSE_IDS` | Additional Canvas course IDs to retrieve. Duplicates found in `CANVAS_TERM_ID` (if defined) will be removed.
`CANVAS_TERM_IDS` | The Canvas instance term ID numbers that will be used to limit queries for Canvas courses. Set to `[]` (empty array) to only use `ADD_COURSE_IDS` (see below).
`ADD_COURSE_IDS` | Additional Canvas course IDs to retrieve when using `online_meetings/canvas_zoom_meetings.py`. Duplicates found in `CANVAS_TERM_ID` (if defined) will be removed.
`API_BASE_URL` | The base URL for making requests using the U-M API Directory; the default value should be correct.
`API_SCOPE_PREFIX` | The scope prefix that will be added after the `API_BASE_URL`; this is usually an acronym for the university location and the API Directory subscription name in CamelCase, separated by `/`.
`API_SUBSCRIPTION_NAME` | The name of the API Directory subscription all in lowercase.
Expand Down
2 changes: 1 addition & 1 deletion config/env_blank.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"LOG_LEVEL": "DEBUG",
"JOB_NAMES": ["COURSE_INVENTORY"],
"CANVAS_ACCOUNT_ID": 1,
"CANVAS_TERM_ID": 164,
"CANVAS_TERM_IDS": [164],
"ADD_COURSE_IDS": [],
"API_BASE_URL": "https://apigw.it.umich.edu/um",
"API_SCOPE_PREFIX": "",
Expand Down
136 changes: 97 additions & 39 deletions course_inventory/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,21 @@
from umich_api.api_utils import ApiUtil

# local libraries
from course_inventory.async_enroll_gatherer import AsyncEnrollGatherer
from course_inventory.canvas_course_usage import CanvasCourseUsage
from course_inventory.gql_queries import queries as QUERIES
from course_inventory.published_date import FetchPublishedDate
from db.db_creator import DBCreator
from environ import ENV
from vocab import ValidDataSourceName
from .async_enroll_gatherer import AsyncEnrollGatherer
from .canvas_course_usage import CanvasCourseUsage
from .gql_queries import queries as QUERIES
from .published_date import FetchPublishedDate


# Initialize settings and globals

logger = logging.getLogger(__name__)

ACCOUNT_ID = ENV.get('CANVAS_ACCOUNT_ID', 1)
TERM_ID = ENV['CANVAS_TERM_ID']
TERM_IDS = ENV['CANVAS_TERM_IDS']

API_UTIL = ApiUtil(ENV['API_BASE_URL'], ENV['API_CLIENT_ID'], ENV['API_CLIENT_SECRET'])
SUBSCRIPTION_NAME = ENV['API_SUBSCRIPTION_NAME']
Expand All @@ -39,6 +39,8 @@
INVENTORY_DB = ENV['INVENTORY_DB']
APPEND_TABLE_NAMES = ENV.get('APPEND_TABLE_NAMES', ['job_run', 'data_source_status'])

CANVAS_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'


# Function(s) - Canvas

Expand All @@ -65,14 +67,49 @@ def make_request_using_api_utils(url: str, params: Dict[str, Any] = {}) -> Respo
return response


def gather_term_data_from_api(account_id: int, term_ids: Sequence[int]) -> pd.DataFrame:
logger.info('** gather_new_term_data_from_api')

# Fetch data for terms from config
logger.info(f'Canvas terms specified in config: {term_ids}')
url_ending_with_scope = f'{API_SCOPE_PREFIX}/accounts/{account_id}/terms/'

term_dicts = []
for term_id in term_ids:
logger.info(f'Pulling data for term number {term_id}')
term_url_ending = url_ending_with_scope + str(term_id)
response = make_request_using_api_utils(term_url_ending)

term_data = json.loads(response.text)
slim_term_dict = {
'canvas_id': term_data['id'],
'name': term_data['name'],
'sis_id': int(term_data['sis_term_id']),
'start_at': pd.to_datetime(
term_data['start_at'],
format=CANVAS_DATETIME_FORMAT
),
'end_at': pd.to_datetime(
term_data['end_at'],
format=CANVAS_DATETIME_FORMAT
)
}
term_dicts.append(slim_term_dict)

term_df = pd.DataFrame(term_dicts)
logger.debug(term_df.head())
return term_df


def slim_down_course_data(course_data: List[Dict]) -> List[Dict]:
slim_course_dicts = []
for course_dict in course_data:
slim_course_dict = {
'canvas_id': course_dict['id'],
'sis_id': course_dict['sis_course_id'],
'sis_id': str(course_dict['sis_course_id']),
'name': course_dict['name'],
'account_id': course_dict['account_id'],
'term_id': course_dict['enrollment_term_id'],
'created_at': course_dict['created_at'],
'workflow_state': course_dict['workflow_state']
}
Expand All @@ -85,39 +122,44 @@ def slim_down_course_data(course_data: List[Dict]) -> List[Dict]:
return slim_course_dicts


def gather_course_data_from_api(account_id: int, term_id: int) -> pd.DataFrame:
def gather_course_data_from_api(account_id: int, term_ids: Sequence[int]) -> pd.DataFrame:
logger.info('** gather_course_data_from_api')
url_ending_with_scope = f'{API_SCOPE_PREFIX}/accounts/{account_id}/courses'
params = {
'with_enrollments': True,
'enrollment_type': ['student', 'teacher'],
'enrollment_term_id': term_id,
'per_page': 100,
'include': ['total_students']
}

# Make first course request
page_num = 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, params)
all_course_data = json.loads(response.text)
course_dicts = slim_down_course_data(all_course_data)
more_pages = True

while more_pages:
next_params = API_UTIL.get_next_page(response)
if next_params:
page_num += 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, next_params)
all_course_data = json.loads(response.text)
course_dicts += slim_down_course_data(all_course_data)
else:
logger.info('No more pages!')
more_pages = False
course_dicts = []
for term_id in term_ids:
logger.info(f'Fetching course data for term {term_id}')

params = {
'with_enrollments': True,
'enrollment_type': ['student', 'teacher'],
'enrollment_term_id': term_id,
'per_page': 100,
'include': ['total_students']
}

# Make first course request
page_num = 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, params)
all_course_data = json.loads(response.text)
course_dicts += slim_down_course_data(all_course_data)
more_pages = True

while more_pages:
next_params = API_UTIL.get_next_page(response)
if next_params:
page_num += 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, next_params)
all_course_data = json.loads(response.text)
course_dicts += slim_down_course_data(all_course_data)
else:
logger.info('No more pages!')
more_pages = False

num_course_dicts = len(course_dicts)
logger.info(f'Total course records: {num_course_dicts}')
logger.info(f'Total course records for all active terms: {num_course_dicts}')
course_dicts_with_students = []
for course_dict in course_dicts:
if course_dict['total_students'] > 0:
Expand Down Expand Up @@ -183,10 +225,14 @@ def pull_sis_section_data_from_udw(section_ids: Sequence[int], conn: connection)

def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.Timestamp]]]:
logger.info("* run_course_inventory")

logger.info('Making requests against the Canvas API')

# Gather term data
term_df = gather_term_data_from_api(ACCOUNT_ID, TERM_IDS)

# Gather course data
course_df = gather_course_data_from_api(ACCOUNT_ID, TERM_ID)
course_df = gather_course_data_from_api(ACCOUNT_ID, TERM_IDS)

logger.info("*** Fetching the published date ***")
course_available_df = course_df.loc[course_df.workflow_state == 'available'].copy()
Expand All @@ -198,11 +244,12 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T

logger.info("*** Checking for courses available and no published date ***")
logger.info(course_df[(course_df['workflow_state'] == 'available') & (course_df['published_at'].isnull())])

course_df['created_at'] = pd.to_datetime(course_df['created_at'],
format="%Y-%m-%dT%H:%M:%SZ",
format=CANVAS_DATETIME_FORMAT,
errors='coerce')
course_df['published_at'] = pd.to_datetime(course_df['published_at'],
format="%Y-%m-%dT%H:%M:%SZ",
format=CANVAS_DATETIME_FORMAT,
errors='coerce')

logger.info("*** Fetching the canvas course usage data ***")
Expand Down Expand Up @@ -260,6 +307,7 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T
}

# Produce output
num_term_records = len(term_df)
num_course_records = len(course_df)
num_user_records = len(user_df)
num_section_records = len(section_df)
Expand All @@ -268,6 +316,10 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T

if CREATE_CSVS:
# Generate CSV Output
logger.info(f'Writing {num_term_records} term records to CSV')
term_df.to_csv(os.path.join('data', 'term.csv'), index=False)
logger.info('Wrote data to data/term.csv')

logger.info(f'Writing {num_course_records} course records to CSV')
course_df.to_csv(os.path.join('data', 'course.csv'), index=False)
logger.info('Wrote data to data/course.csv')
Expand All @@ -288,12 +340,18 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T
canvas_course_usage_df.to_csv(os.path.join('data', 'canvas_course_usage.csv'), index=False)
logger.info('Wrote data to data/canvas_course_usage.csv')

# Empty tables (if any) in database, then migrate
logger.info('Emptying tables in DB')
# Initialize DBCreator object
db_creator_obj = DBCreator(INVENTORY_DB, APPEND_TABLE_NAMES)

# Empty tables (if any) in database
logger.info('Emptying tables in DB')
db_creator_obj.drop_records()

# Insert gathered data
logger.info(f'Inserting {num_term_records} term records to DB')
term_df.to_sql('term', db_creator_obj.engine, if_exists='append', index=False)
logger.info(f'Inserted data into term table in {db_creator_obj.db_name}')

logger.info(f'Inserting {num_course_records} course records to DB')
course_df.to_sql('course', db_creator_obj.engine, if_exists='append', index=False)
logger.info(f'Inserted data into course table in {db_creator_obj.db_name}')
Expand Down
31 changes: 31 additions & 0 deletions db/migrations/0013.add_term_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# file: migrations/0013.add_term_table.py
#
from yoyo import step

steps = [
step('''
CREATE TABLE IF NOT EXISTS term
(
canvas_id INTEGER NOT NULL UNIQUE,
name VARCHAR(100) NOT NULL UNIQUE,
sis_id INTEGER NOT NULL,
start_at DATETIME NOT NULL,
end_at DATETIME NOT NULL,
PRIMARY KEY (canvas_id)
)
ENGINE=InnoDB
CHARACTER SET utf8mb4;
'''),
step('''
ALTER TABLE course
ADD COLUMN term_id INTEGER NOT NULL AFTER account_id;
'''),
step('''
ALTER TABLE course
ADD CONSTRAINT fk_term_id
FOREIGN KEY (term_id)
REFERENCES term(canvas_id)
ON UPDATE CASCADE ON DELETE CASCADE;
''')
]
10 changes: 10 additions & 0 deletions db/migrations/0014.change_course_sis_id_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# file: migrations/0014.change_course_sis_id_data_type.py
#
from yoyo import step

step('''
ALTER TABLE course
MODIFY
sis_id VARCHAR(15) NULL;
''')
29 changes: 22 additions & 7 deletions online_meetings/canvas_zoom_meetings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
import sys
from datetime import datetime
from typing import Dict, Optional, List
from typing import Dict, List, Optional, Sequence, Union

import canvasapi
import pandas as pd
Expand Down Expand Up @@ -160,16 +160,31 @@ def get_zoom_course(self, course: canvasapi.course.Course) -> None:
self.get_zoom_details(posturl, formdata, course.id)
return None

def zoom_course_report(self, canvas_account: int = 1, enrollment_term_id: int = 0,
published: bool = True, add_course_ids: list = None) -> None:
def zoom_course_report(
self,
canvas_account: int = 1,
enrollment_term_ids: Union[Sequence[int], None] = None,
published: bool = True,
add_course_ids: list = None
) -> None:

account = CANVAS.get_account(canvas_account)
# Canvas has a limit of 100 per page on this API
per_page = 100
# Get all published courses from the defined enrollment term

# Get all published courses from the defined enrollment terms
courses = []
if enrollment_term_id:
courses = account.get_courses(enrollment_term_id=enrollment_term_id, published=published, per_page=per_page)
if enrollment_term_ids is not None:
for enrollment_term_id in enrollment_term_ids:
logger.info(f'Fetching published course data for term {enrollment_term_id}')
courses_list = list(
account.get_courses(
enrollment_term_id=enrollment_term_id,
published=published,
per_page=per_page
)
)
courses += courses_list

course_count = 0
for course in courses:
Expand All @@ -191,7 +206,7 @@ def zoom_course_report(self, canvas_account: int = 1, enrollment_term_id: int =
start_time = datetime.now()
logger.info(f"Script started at {start_time}")
zoom_placements = ZoomPlacements()
zoom_placements.zoom_course_report(ENV.get("CANVAS_ACCOUNT_ID", 1), ENV.get("CANVAS_TERM_ID", 0),
zoom_placements.zoom_course_report(ENV.get("CANVAS_ACCOUNT_ID", 1), ENV.get("CANVAS_TERM_IDS", []),
True, ENV.get("ADD_COURSE_IDS", []))

zoom_courses_df = pd.DataFrame(zoom_placements.zoom_courses)
Expand Down

0 comments on commit a818e64

Please sign in to comment.