Skip to content

Commit

Permalink
Merge pull request #34 from proversity-org/proversity/PE-929
Browse files Browse the repository at this point in the history
PE-929 - Add time spent to the enrollment per site report.
  • Loading branch information
Squirrel18 authored Feb 17, 2020
2 parents cb1048c + 25cd22f commit 8b42d9b
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 9 deletions.
Empty file.
103 changes: 103 additions & 0 deletions openedx_proversity_reports/google_services/bigquery_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
This module contains some Google BigQuery API abstract functions.
"""
import logging

from django.conf import settings
from google.api_core.exceptions import GoogleAPIError
from google.cloud import bigquery
from google.cloud.bigquery.client import Client
from google.oauth2 import service_account

BIGQUERY_API_SCOPES = (
'https://www.googleapis.com/auth/bigquery',
'https://www.googleapis.com/auth/cloud-platform',
'https://www.googleapis.com/auth/bigquery.readonly',
)
logger = logging.getLogger(__name__)


def get_google_bigquery_data(query_string):
"""
Return the Google BigQuery data.
Return:
google.cloud.bigquery.job.QueryJob.result() instance.
"""
bigquery_client = get_google_bigquery_api_client()
query_job = bigquery_client.query(
query_string,
job_config=get_google_bigquery_job_config(),
)

try:
query_job.exception()
except GoogleAPIError as api_error:
for error_item in api_error.errors:
logger.error('Google BigQuery API error: %s', error_item.get('message', ''))
return []

if query_job.errors:
for error_item in query_job.errors:
logger.error('Google BigQuery query error: %s', error_item.get('message', ''))
return []

return query_job.result()


def get_google_bigquery_api_client():
"""
Return the Google BigQuery API client.
Returns:
google_bigquery_client: google.cloud.bigquery.client.Client instance.
"""
service_account_credentials = getattr(settings, 'OPR_GOOGLE_SERVICE_ACCOUNT_CREDENTIALS', {})
google_project_id = getattr(settings, 'OPR_GOOGLE_CLOUD_PROJECT_ID', '')

if not (service_account_credentials or google_project_id):
logger.error('Google Service Account credentials or porject ID were not provided.')
raise GoogleBigQueryInformationError('Google Service Account credentials or porject ID were not provided.')

credentials = service_account.Credentials.from_service_account_info(
service_account_credentials,
scopes=BIGQUERY_API_SCOPES,
)

return Client(
project=google_project_id,
credentials=credentials,
)


def get_google_bigquery_job_config():
"""
Return the Google BigQuery job configuration.
Returns:
job_config: google.cloud.bigquery.job.QueryJobConfig instance.
"""
job_config = bigquery.QueryJobConfig()
job_config.maximum_bytes_billed = getattr(settings, 'OPR_GOOGLE_BIGQUERY_MAX_PROCESS_BYTES', None)
job_config.use_query_cache = getattr(settings, 'OPR_GOOGLE_BIGQUERY_USE_CACHE', False)

return job_config


def get_google_bigquery_course_id(course_key):
"""
Return the course id as it is used in Google Big Query for the dataset name.
Get the deprecated course key string and then replaces the fordward slash characters '/'
with underscore characters '_' e.g. course-v1:edX+DemoX+Demo_Course -> edX-DemoX-Demo_Course
"""
deprecated_course_key = course_key._to_deprecated_string() # pylint: disable=protected-access

return deprecated_course_key.replace('/', '_')


class GoogleBigQueryInformationError(Exception):
"""
Exception class raised when some of the required information
by Google BigQuery were not provided.
"""
pass
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@
"""
from importlib import import_module

from django.conf import settings
from opaque_keys.edx.keys import CourseKey
from rest_framework import status

from openedx_proversity_reports.google_services.bigquery_module import (
get_google_bigquery_course_id,
get_google_bigquery_data,
GoogleBigQueryInformationError,
)
from openedx_proversity_reports.reports.backend.base import BaseReportBackend
from openedx_proversity_reports.edxapp_wrapper.get_student_library import user_attribute, user_signup_source
from openedx_proversity_reports.utils import get_course_enrollment, get_user_role
Expand Down Expand Up @@ -43,10 +49,11 @@ def process_request(self, request, extra_data={}): # pylint: disable=dangerous-
HTTP_400_BAD_REQUEST object.
"""
site_name = extra_data.get('site_name', '')
date_string = extra_data.get('date', '')

if not site_name:
if not site_name or not date_string:
return {
'site_name': 'This field is required.',
'message': 'missing site_name or date fields.',
'success': False,
'status': status.HTTP_400_BAD_REQUEST,
}
Expand All @@ -64,12 +71,12 @@ def process_request(self, request, extra_data={}): # pylint: disable=dangerous-
return super(EnrollmentReportPerSiteBackend, self).process_request(request, extra_data)


def generate_enrollment_per_site_report(course_key, enrolled_users):
def generate_enrollment_per_site_report(course_key, enrolled_users, date):
"""
Return the report data.
Args:
course_key: Course id string.
course_key: Opaque course key object.
enrolled_users: List that contains information about the enrolled users.
Returns:
List of dicts: [{
Expand All @@ -81,24 +88,77 @@ def generate_enrollment_per_site_report(course_key, enrolled_users):
role: User's course role.
}]
"""
opaque_course_key = CourseKey.from_string(course_key)
report_data = []
bigquery_data = list(
get_google_bigquery_data(
query_string=get_google_bigquery_query(
course_dataset_name=get_google_bigquery_course_id(course_key),
date=date,
course_id=str(course_key),
),
),
)

for user in enrolled_users:
enrollment = get_course_enrollment().objects.filter(
user__email=user.get('email', ''),
course_id=opaque_course_key,
course_id=course_key,
)

if not enrollment:
continue

time_spent_per_user = 0

try:
time_spent_per_user = sum(row_data[1] for row_data in bigquery_data if row_data[0] == user.get('username', ''))
except IndexError:
pass

report_data.append({
'username': user.get('username', ''),
'email': user.get('email', ''),
'date_of_enrollment': str(enrollment[0].created),
'date_of_registration': user.get('date_joined', ''),
'role': get_user_role(enrollment[0].user, opaque_course_key),
'role': get_user_role(enrollment[0].user, course_key),
'time_spent': time_spent_per_user,
})

return report_data


def get_google_bigquery_query(course_dataset_name, date, course_id):
"""
Return the Google BigQuery query for the time_on_asset_daily table.
Args:
course_dataset_name: Dataset name where the table is stored.
date: Date to filter the query. Date format: '%Y-%m-%d' e.g. '2019-01-01'
course_id: Course id string.
Returns:
query_string: The query string to make the query.
Raises:
GoogleBigQueryInformationError: If OPR_GOOGLE_CLOUD_PROJECT_ID or course_dataset_name
were not provided or are None.
"""
google_project_id = getattr(settings, 'OPR_GOOGLE_CLOUD_PROJECT_ID', '')
query_max_result_number = getattr(settings, 'OPR_GOOGLE_BIGQUERY_MAX_NUMBER_RESULTS_PER_QUERY', 1000)

if not google_project_id or not course_dataset_name:
raise GoogleBigQueryInformationError('Google cloud project id or course_dataset_name are missing.')

query_string = """
SELECT username, time_umid30
FROM `{google_project_id}.{bigquery_dataset}.time_on_asset_daily`
WHERE course_id = '{course_id}'
AND time_umid30 IS NOT NULL
AND PARSE_DATETIME('%Y-%m-%d', date) = '{query_date}' LIMIT {max_result_number}
""".format(
google_project_id=google_project_id,
bigquery_dataset=course_dataset_name,
course_id=course_id,
query_date=date,
max_result_number=query_max_result_number,
)

return query_string
6 changes: 4 additions & 2 deletions openedx_proversity_reports/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,14 @@ def enrollment_per_site_report_task(*args, **kwargs):
}
"""
extra_data = kwargs.pop('extra_data', {})
course_key = CourseKey.from_string(kwargs.get('course_key', ''))
report_data = generate_enrollment_per_site_report(
course_key=kwargs.get('course_key', ''),
course_key=course_key,
enrolled_users=kwargs.pop('enrolled_users', []),
date=extra_data.get('date', ''),
)
course_object = course_overview().get_from_id_if_exists(
course_id=CourseKey.from_string(kwargs.get('course_key', '')),
course_id=course_key,
)

return {
Expand Down

0 comments on commit 8b42d9b

Please sign in to comment.