diff --git a/README.md b/README.md index f5041cf..0bd2cf5 100644 --- a/README.md +++ b/README.md @@ -72,8 +72,8 @@ This includes the creation of a configuration file called `env.json`. Complete t `LOG_LEVEL` | The minimum level for log messages that will appear in output. `INFO` or `DEBUG` is recommended for most use cases; see [Python's logging module](https://docs.python.org/3/library/logging.html). `JOB_NAMES` | The names of one or more jobs (not case sensitive) that have been implemented and defined in `run_jobs.py` (see the **Implementing a New Job** section below). `CANVAS_ACCOUNT_ID` | The Canvas instance root account ID number associated with the courses for which data will be collected. - `CANVAS_TERM_ID` | The Canvas instance term ID number that will be used to limit the query for Canvas courses. Set to 0 to use `ADD_COURSE_IDS`. - `ADD_COURSE_IDS` | Additional Canvas course IDs to retrieve. Duplicates found in `CANVAS_TERM_ID` (if defined) will be removed. + `CANVAS_TERM_IDS` | The Canvas instance term ID numbers that will be used to limit queries for Canvas courses. Set to `[]` (empty array) to only use `ADD_COURSE_IDS` (see below). + `ADD_COURSE_IDS` | Additional Canvas course IDs to retrieve when using `online_meetings/canvas_zoom_meetings.py`. Duplicates found in `CANVAS_TERM_ID` (if defined) will be removed. `API_BASE_URL` | The base URL for making requests using the U-M API Directory; the default value should be correct. `API_SCOPE_PREFIX` | The scope prefix that will be added after the `API_BASE_URL`; this is usually an acronym for the university location and the API Directory subscription name in CamelCase, separated by `/`. `API_SUBSCRIPTION_NAME` | The name of the API Directory subscription all in lowercase. diff --git a/config/env_blank.json b/config/env_blank.json index e477730..7646e5d 100644 --- a/config/env_blank.json +++ b/config/env_blank.json @@ -2,7 +2,7 @@ "LOG_LEVEL": "DEBUG", "JOB_NAMES": ["COURSE_INVENTORY"], "CANVAS_ACCOUNT_ID": 1, - "CANVAS_TERM_ID": 164, + "CANVAS_TERM_IDS": [164], "ADD_COURSE_IDS": [], "API_BASE_URL": "https://apigw.it.umich.edu/um", "API_SCOPE_PREFIX": "", diff --git a/course_inventory/inventory.py b/course_inventory/inventory.py index fbc4597..47d76be 100644 --- a/course_inventory/inventory.py +++ b/course_inventory/inventory.py @@ -11,13 +11,13 @@ from umich_api.api_utils import ApiUtil # local libraries +from course_inventory.async_enroll_gatherer import AsyncEnrollGatherer +from course_inventory.canvas_course_usage import CanvasCourseUsage +from course_inventory.gql_queries import queries as QUERIES +from course_inventory.published_date import FetchPublishedDate from db.db_creator import DBCreator from environ import ENV from vocab import ValidDataSourceName -from .async_enroll_gatherer import AsyncEnrollGatherer -from .canvas_course_usage import CanvasCourseUsage -from .gql_queries import queries as QUERIES -from .published_date import FetchPublishedDate # Initialize settings and globals @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) ACCOUNT_ID = ENV.get('CANVAS_ACCOUNT_ID', 1) -TERM_ID = ENV['CANVAS_TERM_ID'] +TERM_IDS = ENV['CANVAS_TERM_IDS'] API_UTIL = ApiUtil(ENV['API_BASE_URL'], ENV['API_CLIENT_ID'], ENV['API_CLIENT_SECRET']) SUBSCRIPTION_NAME = ENV['API_SUBSCRIPTION_NAME'] @@ -39,6 +39,8 @@ INVENTORY_DB = ENV['INVENTORY_DB'] APPEND_TABLE_NAMES = ENV.get('APPEND_TABLE_NAMES', ['job_run', 'data_source_status']) +CANVAS_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' + # Function(s) - Canvas @@ -65,14 +67,49 @@ def make_request_using_api_utils(url: str, params: Dict[str, Any] = {}) -> Respo return response +def gather_term_data_from_api(account_id: int, term_ids: Sequence[int]) -> pd.DataFrame: + logger.info('** gather_new_term_data_from_api') + + # Fetch data for terms from config + logger.info(f'Canvas terms specified in config: {term_ids}') + url_ending_with_scope = f'{API_SCOPE_PREFIX}/accounts/{account_id}/terms/' + + term_dicts = [] + for term_id in term_ids: + logger.info(f'Pulling data for term number {term_id}') + term_url_ending = url_ending_with_scope + str(term_id) + response = make_request_using_api_utils(term_url_ending) + + term_data = json.loads(response.text) + slim_term_dict = { + 'canvas_id': term_data['id'], + 'name': term_data['name'], + 'sis_id': int(term_data['sis_term_id']), + 'start_at': pd.to_datetime( + term_data['start_at'], + format=CANVAS_DATETIME_FORMAT + ), + 'end_at': pd.to_datetime( + term_data['end_at'], + format=CANVAS_DATETIME_FORMAT + ) + } + term_dicts.append(slim_term_dict) + + term_df = pd.DataFrame(term_dicts) + logger.debug(term_df.head()) + return term_df + + def slim_down_course_data(course_data: List[Dict]) -> List[Dict]: slim_course_dicts = [] for course_dict in course_data: slim_course_dict = { 'canvas_id': course_dict['id'], - 'sis_id': course_dict['sis_course_id'], + 'sis_id': str(course_dict['sis_course_id']), 'name': course_dict['name'], 'account_id': course_dict['account_id'], + 'term_id': course_dict['enrollment_term_id'], 'created_at': course_dict['created_at'], 'workflow_state': course_dict['workflow_state'] } @@ -85,39 +122,44 @@ def slim_down_course_data(course_data: List[Dict]) -> List[Dict]: return slim_course_dicts -def gather_course_data_from_api(account_id: int, term_id: int) -> pd.DataFrame: +def gather_course_data_from_api(account_id: int, term_ids: Sequence[int]) -> pd.DataFrame: logger.info('** gather_course_data_from_api') url_ending_with_scope = f'{API_SCOPE_PREFIX}/accounts/{account_id}/courses' - params = { - 'with_enrollments': True, - 'enrollment_type': ['student', 'teacher'], - 'enrollment_term_id': term_id, - 'per_page': 100, - 'include': ['total_students'] - } - # Make first course request - page_num = 1 - logger.info(f'Course Page Number: {page_num}') - response = make_request_using_api_utils(url_ending_with_scope, params) - all_course_data = json.loads(response.text) - course_dicts = slim_down_course_data(all_course_data) - more_pages = True - - while more_pages: - next_params = API_UTIL.get_next_page(response) - if next_params: - page_num += 1 - logger.info(f'Course Page Number: {page_num}') - response = make_request_using_api_utils(url_ending_with_scope, next_params) - all_course_data = json.loads(response.text) - course_dicts += slim_down_course_data(all_course_data) - else: - logger.info('No more pages!') - more_pages = False + course_dicts = [] + for term_id in term_ids: + logger.info(f'Fetching course data for term {term_id}') + + params = { + 'with_enrollments': True, + 'enrollment_type': ['student', 'teacher'], + 'enrollment_term_id': term_id, + 'per_page': 100, + 'include': ['total_students'] + } + + # Make first course request + page_num = 1 + logger.info(f'Course Page Number: {page_num}') + response = make_request_using_api_utils(url_ending_with_scope, params) + all_course_data = json.loads(response.text) + course_dicts += slim_down_course_data(all_course_data) + more_pages = True + + while more_pages: + next_params = API_UTIL.get_next_page(response) + if next_params: + page_num += 1 + logger.info(f'Course Page Number: {page_num}') + response = make_request_using_api_utils(url_ending_with_scope, next_params) + all_course_data = json.loads(response.text) + course_dicts += slim_down_course_data(all_course_data) + else: + logger.info('No more pages!') + more_pages = False num_course_dicts = len(course_dicts) - logger.info(f'Total course records: {num_course_dicts}') + logger.info(f'Total course records for all active terms: {num_course_dicts}') course_dicts_with_students = [] for course_dict in course_dicts: if course_dict['total_students'] > 0: @@ -183,10 +225,14 @@ def pull_sis_section_data_from_udw(section_ids: Sequence[int], conn: connection) def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.Timestamp]]]: logger.info("* run_course_inventory") + logger.info('Making requests against the Canvas API') + # Gather term data + term_df = gather_term_data_from_api(ACCOUNT_ID, TERM_IDS) + # Gather course data - course_df = gather_course_data_from_api(ACCOUNT_ID, TERM_ID) + course_df = gather_course_data_from_api(ACCOUNT_ID, TERM_IDS) logger.info("*** Fetching the published date ***") course_available_df = course_df.loc[course_df.workflow_state == 'available'].copy() @@ -198,11 +244,12 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T logger.info("*** Checking for courses available and no published date ***") logger.info(course_df[(course_df['workflow_state'] == 'available') & (course_df['published_at'].isnull())]) + course_df['created_at'] = pd.to_datetime(course_df['created_at'], - format="%Y-%m-%dT%H:%M:%SZ", + format=CANVAS_DATETIME_FORMAT, errors='coerce') course_df['published_at'] = pd.to_datetime(course_df['published_at'], - format="%Y-%m-%dT%H:%M:%SZ", + format=CANVAS_DATETIME_FORMAT, errors='coerce') logger.info("*** Fetching the canvas course usage data ***") @@ -260,6 +307,7 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T } # Produce output + num_term_records = len(term_df) num_course_records = len(course_df) num_user_records = len(user_df) num_section_records = len(section_df) @@ -268,6 +316,10 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T if CREATE_CSVS: # Generate CSV Output + logger.info(f'Writing {num_term_records} term records to CSV') + term_df.to_csv(os.path.join('data', 'term.csv'), index=False) + logger.info('Wrote data to data/term.csv') + logger.info(f'Writing {num_course_records} course records to CSV') course_df.to_csv(os.path.join('data', 'course.csv'), index=False) logger.info('Wrote data to data/course.csv') @@ -288,12 +340,18 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T canvas_course_usage_df.to_csv(os.path.join('data', 'canvas_course_usage.csv'), index=False) logger.info('Wrote data to data/canvas_course_usage.csv') - # Empty tables (if any) in database, then migrate - logger.info('Emptying tables in DB') + # Initialize DBCreator object db_creator_obj = DBCreator(INVENTORY_DB, APPEND_TABLE_NAMES) + + # Empty tables (if any) in database + logger.info('Emptying tables in DB') db_creator_obj.drop_records() # Insert gathered data + logger.info(f'Inserting {num_term_records} term records to DB') + term_df.to_sql('term', db_creator_obj.engine, if_exists='append', index=False) + logger.info(f'Inserted data into term table in {db_creator_obj.db_name}') + logger.info(f'Inserting {num_course_records} course records to DB') course_df.to_sql('course', db_creator_obj.engine, if_exists='append', index=False) logger.info(f'Inserted data into course table in {db_creator_obj.db_name}') diff --git a/db/migrations/0013.add_term_table.py b/db/migrations/0013.add_term_table.py new file mode 100644 index 0000000..dd7e301 --- /dev/null +++ b/db/migrations/0013.add_term_table.py @@ -0,0 +1,31 @@ +# +# file: migrations/0013.add_term_table.py +# +from yoyo import step + +steps = [ + step(''' + CREATE TABLE IF NOT EXISTS term + ( + canvas_id INTEGER NOT NULL UNIQUE, + name VARCHAR(100) NOT NULL UNIQUE, + sis_id INTEGER NOT NULL, + start_at DATETIME NOT NULL, + end_at DATETIME NOT NULL, + PRIMARY KEY (canvas_id) + ) + ENGINE=InnoDB + CHARACTER SET utf8mb4; + '''), + step(''' + ALTER TABLE course + ADD COLUMN term_id INTEGER NOT NULL AFTER account_id; + '''), + step(''' + ALTER TABLE course + ADD CONSTRAINT fk_term_id + FOREIGN KEY (term_id) + REFERENCES term(canvas_id) + ON UPDATE CASCADE ON DELETE CASCADE; + ''') +] diff --git a/db/migrations/0014.change_course_sis_id_data_type.py b/db/migrations/0014.change_course_sis_id_data_type.py new file mode 100644 index 0000000..6a00dc1 --- /dev/null +++ b/db/migrations/0014.change_course_sis_id_data_type.py @@ -0,0 +1,10 @@ +# +# file: migrations/0014.change_course_sis_id_data_type.py +# +from yoyo import step + +step(''' + ALTER TABLE course + MODIFY + sis_id VARCHAR(15) NULL; +''') diff --git a/online_meetings/canvas_zoom_meetings.py b/online_meetings/canvas_zoom_meetings.py index 2ba4eb3..240b577 100644 --- a/online_meetings/canvas_zoom_meetings.py +++ b/online_meetings/canvas_zoom_meetings.py @@ -7,7 +7,7 @@ import re import sys from datetime import datetime -from typing import Dict, Optional, List +from typing import Dict, List, Optional, Sequence, Union import canvasapi import pandas as pd @@ -160,16 +160,31 @@ def get_zoom_course(self, course: canvasapi.course.Course) -> None: self.get_zoom_details(posturl, formdata, course.id) return None - def zoom_course_report(self, canvas_account: int = 1, enrollment_term_id: int = 0, - published: bool = True, add_course_ids: list = None) -> None: + def zoom_course_report( + self, + canvas_account: int = 1, + enrollment_term_ids: Union[Sequence[int], None] = None, + published: bool = True, + add_course_ids: list = None + ) -> None: account = CANVAS.get_account(canvas_account) # Canvas has a limit of 100 per page on this API per_page = 100 - # Get all published courses from the defined enrollment term + + # Get all published courses from the defined enrollment terms courses = [] - if enrollment_term_id: - courses = account.get_courses(enrollment_term_id=enrollment_term_id, published=published, per_page=per_page) + if enrollment_term_ids is not None: + for enrollment_term_id in enrollment_term_ids: + logger.info(f'Fetching published course data for term {enrollment_term_id}') + courses_list = list( + account.get_courses( + enrollment_term_id=enrollment_term_id, + published=published, + per_page=per_page + ) + ) + courses += courses_list course_count = 0 for course in courses: @@ -191,7 +206,7 @@ def zoom_course_report(self, canvas_account: int = 1, enrollment_term_id: int = start_time = datetime.now() logger.info(f"Script started at {start_time}") zoom_placements = ZoomPlacements() -zoom_placements.zoom_course_report(ENV.get("CANVAS_ACCOUNT_ID", 1), ENV.get("CANVAS_TERM_ID", 0), +zoom_placements.zoom_course_report(ENV.get("CANVAS_ACCOUNT_ID", 1), ENV.get("CANVAS_TERM_IDS", []), True, ENV.get("ADD_COURSE_IDS", [])) zoom_courses_df = pd.DataFrame(zoom_placements.zoom_courses)