Skip to content

Commit

Permalink
feat: add management command for SFMC Course Catalog POC' (#4409)
Browse files Browse the repository at this point in the history
  • Loading branch information
AfaqShuaib09 authored Aug 28, 2024
1 parent 5c8ceb1 commit 83681de
Show file tree
Hide file tree
Showing 6 changed files with 535 additions and 0 deletions.
91 changes: 91 additions & 0 deletions course_discovery/apps/course_metadata/gspread_client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from string import ascii_uppercase

import gspread
from django.conf import settings
Expand Down Expand Up @@ -37,6 +38,96 @@ def read_data(self, config):
logger.exception('[Spread Sheet Read Error]: Exception occurred while reading sheet data')
return None

def _get_or_create_worksheet(self, spread_sheet, tab_id, cols, rows):
"""
Get or create a worksheet with the given tab_id in the given spread_sheet
Args:
spread_sheet: The spread sheet object
tab_id: The tab id of the worksheet
cols: The number of columns in the worksheet
rows: The number of rows in the worksheet
"""
try:
return spread_sheet.worksheet(tab_id)
except gspread.exceptions.WorksheetNotFound:
return spread_sheet.add_worksheet(
title=tab_id,
rows=rows,
cols=cols,
)

def _write_headers(self, sheet_tab, headers):
"""
Write headers to the first row of the worksheet
Args:
sheet_tab: The worksheet object
headers: The headers of the worksheet
"""
sheet_tab.append_row(headers)
end_column = ascii_uppercase[len(headers) - 1]
cell_range = f"A1:{end_column}1"
sheet_tab.format(cell_range, {'textFormat': {'bold': True}})

def _write_rows(self, sheet_tab, headers, csv_data):
"""
Write rows to the worksheet after headers
Args:
sheet_tab: The worksheet object
headers: The headers of the worksheet
csv_data: The data to be written in the worksheet, as a list of dictionaries, where
each dictionary represents a row
"""
for row in csv_data:
sheet_tab.append_row(
[
(
row.get(header).replace('\"', '\"\"') # double quote escape to preserve " in values
if isinstance(row.get(header), str)
else row.get(header)
)
for header in headers
]
)

def write_data(self, config, csv_headers, csv_data, overwrite):
"""
Write data to the google spread sheet
Args:
config: The configuration for the google spread sheet
csv_headers: The headers of the data to be written in the worksheet
csv_data: The data to be written in the worksheet, as a list of dictionaries, where
each dictionary represents a row
overwrite: Whether to overwrite the existing data in the worksheet
"""
try:
spread_sheet = self.get_spread_sheet_by_key(config["SHEET_ID"])
sheet_tab = self._get_or_create_worksheet(
spread_sheet, config["OUTPUT_TAB_ID"], len(csv_headers) + 1, len(csv_data) + 1
)

if overwrite:
sheet_tab.clear()

if csv_headers:
self._write_headers(sheet_tab, csv_headers)

self._write_rows(sheet_tab, csv_headers, csv_data)

logger.info(
f"""
[Spread Sheet Write Success]: Successfully written data to
sheet {config["SHEET_ID"]} tab {config["OUTPUT_TAB_ID"]}
"""
)
except gspread.exceptions.GSpreadException as e:
logger.exception(f"[Spread Sheet Write Error]: GSpreadException occurred while writing sheet data: {e}")
except Exception as e: # pylint: disable=broad-except
logger.exception(f"[Spread Sheet Write Error]: Exception occurred while writing sheet data: {e}")

@staticmethod
def get_worksheet_data_by_tab_id(spread_sheet, tab_id):
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import csv
import datetime
import logging

from django.conf import settings
from django.core.management import BaseCommand, CommandError
from django.db.models import Prefetch

from course_discovery.apps.course_metadata.gspread_client import GspreadClient
from course_discovery.apps.course_metadata.models import Course, CourseType, SubjectTranslation

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = """
Populates Product Catalog for Salesforce Marketing Cloud Catalog
Example usage:
python manage.py populate_product_catalog --product_type={product_type} --output_csv=/path/to/output.csv --product_source={product_source}
python manage.py populate_product_catalog --product_type={product_type} --product_source={product_source} --use_gspread_client=True --overwrite=True
"""

CATALOG_CSV_HEADERS = [
'UUID', 'Title', 'Organizations Name', 'Organizations Logo', 'Organizations Abbr', 'Languages',
'Subjects', 'Subjects Spanish', 'Marketing URL', 'Marketing Image'
]

def add_arguments(self, parser):
parser.add_argument(
'--product_type',
dest='product_type',
type=str,
required=False,
help='Product Type to populate in the catalog'
)
parser.add_argument(
'--output_csv',
dest='output_csv',
type=str,
required=False,
help='Path of the output CSV'
)
parser.add_argument(
'--product_source',
dest='product_source',
type=str,
required=False,
help='The product source to filter the products'
)
parser.add_argument(
'--use_gspread_client',
dest='gspread_client_flag',
type=bool,
required=False,
help='Flag to use Gspread Client for writing data to Google Sheets'
)
parser.add_argument(
'--overwrite',
dest='overwrite_flag',
type=bool,
default=True,
required=False,
help='Flag to overwrite the existing data in Google Sheet tab'
)

def get_products(self, product_type, product_source):
"""
Extract products from the DB for product catalog
"""
ocm_course_catalog_types = [
CourseType.AUDIT, CourseType.VERIFIED_AUDIT, CourseType.PROFESSIONAL, CourseType.CREDIT_VERIFIED_AUDIT,
'verified', 'spoc-verified-audit'
]

if (product_type := product_type.lower()) in ['executive_education', 'bootcamp', 'ocm_course']:
queryset = Course.objects.available()

if product_type == 'ocm_course':
queryset = queryset.filter(type__slug__in=ocm_course_catalog_types)

elif product_type == 'executive_education':
queryset = queryset.filter(type__slug=CourseType.EXECUTIVE_EDUCATION_2U)

elif product_type == 'bootcamp':
queryset = queryset.filter(type__slug=CourseType.BOOTCAMP_2U)

if product_source:
queryset = queryset.filter(product_source__slug=product_source)

# Prefetch Spanish translations of subjects
subject_translations = Prefetch(
'subjects__translations',
queryset=SubjectTranslation.objects.filter(language_code='es'),
to_attr='spanish_translations'
)

return queryset.prefetch_related(
'authoring_organizations',
'subjects',
subject_translations
)
else:
# Return empty queryset if invalid product type specified
return Course.objects.none()

def write_csv_header(self, output_csv):
"""
Write the header of output CSV in the file.
"""
writer = csv.DictWriter(output_csv, fieldnames=self.CATALOG_CSV_HEADERS)
writer.writeheader()
return writer

def get_transformed_data(self, product):
"""
Transforms the product data for product's catalog
"""
authoring_orgs = product.authoring_organizations.all()
return {
"UUID": str(product.uuid),
"Title": product.title,
"Organizations Name": ", ".join(org.name for org in authoring_orgs),
"Organizations Logo": ", ".join(
org.logo_image.url for org in authoring_orgs if org.logo_image
),
"Organizations Abbr": ", ".join(org.key for org in authoring_orgs),
"Languages": product.languages_codes,
"Subjects": ", ".join(subject.name for subject in product.subjects.all()),
"Subjects Spanish": ", ".join(
translation.name for subject in product.subjects.all()
for translation in subject.spanish_translations
),
"Marketing URL": product.marketing_url,
"Marketing Image": (product.image.url if product.image else ""),
}

def handle(self, *args, **options):
product_type = options.get('product_type')
output_csv = options.get('output_csv')
product_source = options.get('product_source')
gspread_client_flag = options.get('gspread_client_flag')
overwrite = options.get('overwrite_flag')
PRODUCT_CATALOG_CONFIG = {
'SHEET_ID': settings.PRODUCT_CATALOG_SHEET_ID,
'OUTPUT_TAB_ID': (
product_type.upper() + ('_' + datetime.datetime.now().strftime("%Y%m%d") if not overwrite else '')
if product_type else 'All'
),
}

gspread_client = GspreadClient()

try:
products = self.get_products(product_type, product_source)
if not products.exists():
raise CommandError('No products found for the given criteria.')
products_count = products.count()

logger.info(f'Fetched {products_count} courses from the database')
if output_csv:
with open(output_csv, 'w', newline='') as output_file:
output_writer = self.write_csv_header(output_file)
for product in products:
try:
output_writer.writerow(self.get_transformed_data(product))
except Exception as e: # pylint: disable=broad-exception-caught
logger.error(f"Error writing product {product.uuid} to CSV: {str(e)}")
continue

logger.info(f'Populated {products_count} {product_type}s to {output_csv}')

elif gspread_client_flag:
csv_data = [self.get_transformed_data(product) for product in products]
gspread_client.write_data(
PRODUCT_CATALOG_CONFIG,
self.CATALOG_CSV_HEADERS,
csv_data,
overwrite=overwrite,
)
logger.info(f'Populated {products_count} {product_type}s to Google Sheets')

except Exception as e:
raise CommandError(f'Error while populating product catalog: {str(e)}') from e
Loading

0 comments on commit 83681de

Please sign in to comment.