Skip to content
This repository has been archived by the owner on Apr 22, 2024. It is now read-only.

Add term table, accomodate multiple terms, fix course.sis_id bug (#113, #117) #115

Merged
merged 19 commits into from
Apr 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ This includes the creation of a configuration file called `env.json`. Complete t
`LOG_LEVEL` | The minimum level for log messages that will appear in output. `INFO` or `DEBUG` is recommended for most use cases; see [Python's logging module](https://docs.python.org/3/library/logging.html).
`JOB_NAMES` | The names of one or more jobs (not case sensitive) that have been implemented and defined in `run_jobs.py` (see the **Implementing a New Job** section below).
`CANVAS_ACCOUNT_ID` | The Canvas instance root account ID number associated with the courses for which data will be collected.
`CANVAS_TERM_ID` | The Canvas instance term ID number that will be used to limit the query for Canvas courses. Set to 0 to use `ADD_COURSE_IDS`.
`ADD_COURSE_IDS` | Additional Canvas course IDs to retrieve. Duplicates found in `CANVAS_TERM_ID` (if defined) will be removed.
`CANVAS_TERM_IDS` | The Canvas instance term ID numbers that will be used to limit queries for Canvas courses. Set to `[]` (empty array) to only use `ADD_COURSE_IDS` (see below).
`ADD_COURSE_IDS` | Additional Canvas course IDs to retrieve when using `online_meetings/canvas_zoom_meetings.py`. Duplicates found in `CANVAS_TERM_ID` (if defined) will be removed.
`API_BASE_URL` | The base URL for making requests using the U-M API Directory; the default value should be correct.
`API_SCOPE_PREFIX` | The scope prefix that will be added after the `API_BASE_URL`; this is usually an acronym for the university location and the API Directory subscription name in CamelCase, separated by `/`.
`API_SUBSCRIPTION_NAME` | The name of the API Directory subscription all in lowercase.
Expand Down
2 changes: 1 addition & 1 deletion config/env_blank.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"LOG_LEVEL": "DEBUG",
"JOB_NAMES": ["COURSE_INVENTORY"],
"CANVAS_ACCOUNT_ID": 1,
"CANVAS_TERM_ID": 164,
"CANVAS_TERM_IDS": [164],
"ADD_COURSE_IDS": [],
"API_BASE_URL": "https://apigw.it.umich.edu/um",
"API_SCOPE_PREFIX": "",
Expand Down
136 changes: 97 additions & 39 deletions course_inventory/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,21 @@
from umich_api.api_utils import ApiUtil

# local libraries
from course_inventory.async_enroll_gatherer import AsyncEnrollGatherer
from course_inventory.canvas_course_usage import CanvasCourseUsage
from course_inventory.gql_queries import queries as QUERIES
from course_inventory.published_date import FetchPublishedDate
ssciolla marked this conversation as resolved.
Show resolved Hide resolved
from db.db_creator import DBCreator
from environ import ENV
from vocab import ValidDataSourceName
from .async_enroll_gatherer import AsyncEnrollGatherer
from .canvas_course_usage import CanvasCourseUsage
from .gql_queries import queries as QUERIES
from .published_date import FetchPublishedDate


# Initialize settings and globals

logger = logging.getLogger(__name__)

ACCOUNT_ID = ENV.get('CANVAS_ACCOUNT_ID', 1)
TERM_ID = ENV['CANVAS_TERM_ID']
TERM_IDS = ENV['CANVAS_TERM_IDS']
ssciolla marked this conversation as resolved.
Show resolved Hide resolved

API_UTIL = ApiUtil(ENV['API_BASE_URL'], ENV['API_CLIENT_ID'], ENV['API_CLIENT_SECRET'])
SUBSCRIPTION_NAME = ENV['API_SUBSCRIPTION_NAME']
Expand All @@ -39,6 +39,8 @@
INVENTORY_DB = ENV['INVENTORY_DB']
APPEND_TABLE_NAMES = ENV.get('APPEND_TABLE_NAMES', ['job_run', 'data_source_status'])

CANVAS_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
ssciolla marked this conversation as resolved.
Show resolved Hide resolved


# Function(s) - Canvas

Expand All @@ -65,14 +67,49 @@ def make_request_using_api_utils(url: str, params: Dict[str, Any] = {}) -> Respo
return response


def gather_term_data_from_api(account_id: int, term_ids: Sequence[int]) -> pd.DataFrame:
logger.info('** gather_new_term_data_from_api')

# Fetch data for terms from config
logger.info(f'Canvas terms specified in config: {term_ids}')
url_ending_with_scope = f'{API_SCOPE_PREFIX}/accounts/{account_id}/terms/'

term_dicts = []
for term_id in term_ids:
logger.info(f'Pulling data for term number {term_id}')
term_url_ending = url_ending_with_scope + str(term_id)
response = make_request_using_api_utils(term_url_ending)

term_data = json.loads(response.text)
slim_term_dict = {
'canvas_id': term_data['id'],
'name': term_data['name'],
'sis_id': int(term_data['sis_term_id']),
'start_at': pd.to_datetime(
term_data['start_at'],
format=CANVAS_DATETIME_FORMAT
),
'end_at': pd.to_datetime(
term_data['end_at'],
format=CANVAS_DATETIME_FORMAT
)
}
term_dicts.append(slim_term_dict)

term_df = pd.DataFrame(term_dicts)
logger.debug(term_df.head())
return term_df


def slim_down_course_data(course_data: List[Dict]) -> List[Dict]:
slim_course_dicts = []
for course_dict in course_data:
slim_course_dict = {
'canvas_id': course_dict['id'],
'sis_id': course_dict['sis_course_id'],
'sis_id': str(course_dict['sis_course_id']),
'name': course_dict['name'],
'account_id': course_dict['account_id'],
'term_id': course_dict['enrollment_term_id'],
'created_at': course_dict['created_at'],
'workflow_state': course_dict['workflow_state']
}
Expand All @@ -85,39 +122,44 @@ def slim_down_course_data(course_data: List[Dict]) -> List[Dict]:
return slim_course_dicts


def gather_course_data_from_api(account_id: int, term_id: int) -> pd.DataFrame:
def gather_course_data_from_api(account_id: int, term_ids: Sequence[int]) -> pd.DataFrame:
logger.info('** gather_course_data_from_api')
url_ending_with_scope = f'{API_SCOPE_PREFIX}/accounts/{account_id}/courses'
params = {
'with_enrollments': True,
'enrollment_type': ['student', 'teacher'],
'enrollment_term_id': term_id,
'per_page': 100,
'include': ['total_students']
}

# Make first course request
page_num = 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, params)
all_course_data = json.loads(response.text)
course_dicts = slim_down_course_data(all_course_data)
more_pages = True

while more_pages:
next_params = API_UTIL.get_next_page(response)
if next_params:
page_num += 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, next_params)
all_course_data = json.loads(response.text)
course_dicts += slim_down_course_data(all_course_data)
else:
logger.info('No more pages!')
more_pages = False
course_dicts = []
for term_id in term_ids:
logger.info(f'Fetching course data for term {term_id}')

params = {
'with_enrollments': True,
'enrollment_type': ['student', 'teacher'],
'enrollment_term_id': term_id,
'per_page': 100,
'include': ['total_students']
}

# Make first course request
page_num = 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, params)
all_course_data = json.loads(response.text)
course_dicts += slim_down_course_data(all_course_data)
more_pages = True

while more_pages:
next_params = API_UTIL.get_next_page(response)
if next_params:
page_num += 1
logger.info(f'Course Page Number: {page_num}')
response = make_request_using_api_utils(url_ending_with_scope, next_params)
all_course_data = json.loads(response.text)
course_dicts += slim_down_course_data(all_course_data)
else:
logger.info('No more pages!')
more_pages = False

num_course_dicts = len(course_dicts)
logger.info(f'Total course records: {num_course_dicts}')
logger.info(f'Total course records for all active terms: {num_course_dicts}')
course_dicts_with_students = []
for course_dict in course_dicts:
if course_dict['total_students'] > 0:
Expand Down Expand Up @@ -183,10 +225,14 @@ def pull_sis_section_data_from_udw(section_ids: Sequence[int], conn: connection)

def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.Timestamp]]]:
logger.info("* run_course_inventory")

logger.info('Making requests against the Canvas API')

# Gather term data
term_df = gather_term_data_from_api(ACCOUNT_ID, TERM_IDS)

# Gather course data
course_df = gather_course_data_from_api(ACCOUNT_ID, TERM_ID)
course_df = gather_course_data_from_api(ACCOUNT_ID, TERM_IDS)
zqian marked this conversation as resolved.
Show resolved Hide resolved

logger.info("*** Fetching the published date ***")
course_available_df = course_df.loc[course_df.workflow_state == 'available'].copy()
Expand All @@ -198,11 +244,12 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T

logger.info("*** Checking for courses available and no published date ***")
logger.info(course_df[(course_df['workflow_state'] == 'available') & (course_df['published_at'].isnull())])

course_df['created_at'] = pd.to_datetime(course_df['created_at'],
format="%Y-%m-%dT%H:%M:%SZ",
format=CANVAS_DATETIME_FORMAT,
errors='coerce')
course_df['published_at'] = pd.to_datetime(course_df['published_at'],
format="%Y-%m-%dT%H:%M:%SZ",
format=CANVAS_DATETIME_FORMAT,
errors='coerce')

logger.info("*** Fetching the canvas course usage data ***")
Expand Down Expand Up @@ -260,6 +307,7 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T
}

# Produce output
num_term_records = len(term_df)
num_course_records = len(course_df)
num_user_records = len(user_df)
num_section_records = len(section_df)
Expand All @@ -268,6 +316,10 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T

if CREATE_CSVS:
# Generate CSV Output
logger.info(f'Writing {num_term_records} term records to CSV')
term_df.to_csv(os.path.join('data', 'term.csv'), index=False)
logger.info('Wrote data to data/term.csv')

logger.info(f'Writing {num_course_records} course records to CSV')
course_df.to_csv(os.path.join('data', 'course.csv'), index=False)
logger.info('Wrote data to data/course.csv')
Expand All @@ -288,12 +340,18 @@ def run_course_inventory() -> Sequence[Dict[str, Union[ValidDataSourceName, pd.T
canvas_course_usage_df.to_csv(os.path.join('data', 'canvas_course_usage.csv'), index=False)
logger.info('Wrote data to data/canvas_course_usage.csv')

# Empty tables (if any) in database, then migrate
logger.info('Emptying tables in DB')
# Initialize DBCreator object
db_creator_obj = DBCreator(INVENTORY_DB, APPEND_TABLE_NAMES)

# Empty tables (if any) in database
logger.info('Emptying tables in DB')
db_creator_obj.drop_records()

# Insert gathered data
logger.info(f'Inserting {num_term_records} term records to DB')
term_df.to_sql('term', db_creator_obj.engine, if_exists='append', index=False)
logger.info(f'Inserted data into term table in {db_creator_obj.db_name}')

logger.info(f'Inserting {num_course_records} course records to DB')
course_df.to_sql('course', db_creator_obj.engine, if_exists='append', index=False)
logger.info(f'Inserted data into course table in {db_creator_obj.db_name}')
Expand Down
31 changes: 31 additions & 0 deletions db/migrations/0013.add_term_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# file: migrations/0013.add_term_table.py
#
from yoyo import step

steps = [
step('''
CREATE TABLE IF NOT EXISTS term
(
canvas_id INTEGER NOT NULL UNIQUE,
name VARCHAR(100) NOT NULL UNIQUE,
sis_id INTEGER NOT NULL,
start_at DATETIME NOT NULL,
end_at DATETIME NOT NULL,
PRIMARY KEY (canvas_id)
)
ENGINE=InnoDB
CHARACTER SET utf8mb4;
'''),
step('''
ALTER TABLE course
ADD COLUMN term_id INTEGER NOT NULL AFTER account_id;
'''),
step('''
ALTER TABLE course
ADD CONSTRAINT fk_term_id
FOREIGN KEY (term_id)
REFERENCES term(canvas_id)
ON UPDATE CASCADE ON DELETE CASCADE;
''')
]
10 changes: 10 additions & 0 deletions db/migrations/0014.change_course_sis_id_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# file: migrations/0014.change_course_sis_id_data_type.py
zqian marked this conversation as resolved.
Show resolved Hide resolved
#
from yoyo import step

step('''
ALTER TABLE course
MODIFY
sis_id VARCHAR(15) NULL;
''')
29 changes: 22 additions & 7 deletions online_meetings/canvas_zoom_meetings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
import sys
from datetime import datetime
from typing import Dict, Optional, List
from typing import Dict, List, Optional, Sequence, Union

import canvasapi
import pandas as pd
Expand Down Expand Up @@ -160,16 +160,31 @@ def get_zoom_course(self, course: canvasapi.course.Course) -> None:
self.get_zoom_details(posturl, formdata, course.id)
return None

def zoom_course_report(self, canvas_account: int = 1, enrollment_term_id: int = 0,
published: bool = True, add_course_ids: list = None) -> None:
def zoom_course_report(
self,
canvas_account: int = 1,
enrollment_term_ids: Union[Sequence[int], None] = None,
published: bool = True,
add_course_ids: list = None
) -> None:

account = CANVAS.get_account(canvas_account)
# Canvas has a limit of 100 per page on this API
per_page = 100
# Get all published courses from the defined enrollment term

# Get all published courses from the defined enrollment terms
courses = []
if enrollment_term_id:
courses = account.get_courses(enrollment_term_id=enrollment_term_id, published=published, per_page=per_page)
if enrollment_term_ids is not None:
for enrollment_term_id in enrollment_term_ids:
logger.info(f'Fetching published course data for term {enrollment_term_id}')
courses_list = list(
account.get_courses(
enrollment_term_id=enrollment_term_id,
published=published,
per_page=per_page
)
)
courses += courses_list

course_count = 0
for course in courses:
Expand All @@ -191,7 +206,7 @@ def zoom_course_report(self, canvas_account: int = 1, enrollment_term_id: int =
start_time = datetime.now()
logger.info(f"Script started at {start_time}")
zoom_placements = ZoomPlacements()
zoom_placements.zoom_course_report(ENV.get("CANVAS_ACCOUNT_ID", 1), ENV.get("CANVAS_TERM_ID", 0),
zoom_placements.zoom_course_report(ENV.get("CANVAS_ACCOUNT_ID", 1), ENV.get("CANVAS_TERM_IDS", []),
True, ENV.get("ADD_COURSE_IDS", []))

zoom_courses_df = pd.DataFrame(zoom_placements.zoom_courses)
Expand Down