diff --git a/dataeng/jobs/AnalyticsEmailOptin.groovy b/dataeng/jobs/AnalyticsEmailOptin.groovy index 478c4509f..9037f69a6 100644 --- a/dataeng/jobs/AnalyticsEmailOptin.groovy +++ b/dataeng/jobs/AnalyticsEmailOptin.groovy @@ -40,6 +40,10 @@ job ('analytics-email-optin-worker') { stringParam('PLATFORM_VENV') } + logRotator { + daysToKeep(30) + } + throttleConcurrentBuilds { maxPerNode(5) maxTotal(5) @@ -141,6 +145,10 @@ job ('analytics-email-optin-master') { } } + triggers{ + cron('# Saturdays around 4 a.m. UTC\nH 4 * * 6') + } + wrappers { timestamps() } @@ -154,8 +162,9 @@ job ('analytics-email-optin-master') { } virtualenv { nature("shell") + name("analytics-exporter") command( - readFileFromWorkspace("dataeng/resources/setup-exporter-properties.sh") + readFileFromWorkspace("dataeng/resources/setup-exporter-email-optin.sh") ) } downstreamParameterized { diff --git a/dataeng/jobs/AnalyticsExporter.groovy b/dataeng/jobs/AnalyticsExporter.groovy new file mode 100644 index 000000000..c2b740006 --- /dev/null +++ b/dataeng/jobs/AnalyticsExporter.groovy @@ -0,0 +1,280 @@ +import org.yaml.snakeyaml.Yaml +import org.yaml.snakeyaml.error.YAMLException + + +Map config = [:] +Binding bindings = getBinding() +config.putAll(bindings.getVariables()) +PrintStream out = config['out'] + +Map globals = binding.variables +String commonVarsDir = globals.get('COMMON_VARS_DIR') +String commonVarsFilePath = commonVarsDir + 'common.yaml' +Map commonConfigMap = [:] + +try { + out.println('Parsing secret YAML file') + String commonConfigContents = readFileFromWorkspace(commonVarsFilePath) + Yaml yaml = new Yaml() + commonConfigMap = yaml.load(commonConfigContents) + out.println('Successfully parsed secret YAML file') + +} catch (YAMLException e) { + throw new IllegalArgumentException("Unable to parse ${commonVarsFilePath}: ${e.message}") +} + +job ('analytics-exporter-course') { + parameters { + stringParam('COURSES', '', 'Space separated list of courses to process. E.g. --course=course-v1:BerkleeX+BMPR365_3x+1T2015') + stringParam('EXPORTER_BRANCH', 'environment/production', 'Branch from the analytics-exporter repository. For tags use tags/[tag-name].') + stringParam('PLATFORM_BRANCH', 'origin/zafft/analytics-exporter-settings-hotfix', 'Branch from the exporter repository. For tags use tags/[tag-name].') + stringParam('SECURE_BRANCH', 'release', 'Branch from the analytics-secure repository, where the configuration settings reside. For tags use tags/[tag-name]') + stringParam('CONFIG_FILENAME', 'course_exporter.yaml', 'Name of configuration file in analytics-secure/analytics-exporter.') + stringParam('OUTPUT_BUCKET', '', 'Name of the bucket for the destination of the export data. Can use a path. (eg. export-data/test).') + stringParam('NOTIFICATION_EMAILS', '', 'Space separated list of emails to notify in case of failure.') + stringParam('DATE_MODIFIER', '', 'Used to set the date of the CWSM dump. Leave blank to use today\'s date. Set to "-d 201x-0x-0x" if that is when the CWSM dump took place. (Leave off quotes.)') + stringParam('TASKS', '', 'Space separated list of tasks to process. Leave this blank to use the task list specified in the config file. Specify here only if you are running tests of a specific task.') + } + + multiscm{ + git { + remote { + url('git@github.com:edx/edx-platform.git') + branch('$PLATFORM_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('edx-platform') + } + } + git { + remote { + url('git@github.com:edx/edx-analytics-exporter.git') + branch('$EXPORTER_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-exporter') + } + } + git { + remote { + url(commonConfigMap.get('SECURE_REPO_URL')) + branch('$SECURE_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-secure') + } + } + git { + remote { + url(commonConfigMap.get('BAKED_CONFIG_SECURE_REPO_URL')) + branch('*/master') + credentials('1') + } + extensions { + relativeTargetDirectory('config/baked-config-secure') + } + } + } + + wrappers { + timestamps() + } + + steps { + virtualenv { + nature("shell") + command( + readFileFromWorkspace("dataeng/resources/setup-platform-venv.sh") + ) + } + virtualenv { + nature("shell") + name("analytics-exporter") + command( + readFileFromWorkspace("dataeng/resources/run-course-exporter.sh") + ) + } + } +} + +job ('analytics-exporter-worker') { + + parameters { + stringParam('NOTIFICATION_EMAILS') + stringParam('MASTER_WORKSPACE') + stringParam('ORG_CONFIG_PATH') + stringParam('GPG_KEYS_PATH') + stringParam('DATE') + stringParam('CONFIG_PATH') + stringParam('OUTPUT_BUCKET') + stringParam('EXPORTER_VENV') + stringParam('ORG') + stringParam('PLATFORM_VENV') + stringParam('EXTRA_OPTIONS') + stringParam('SECURE_BRANCH') + } + + logRotator { + daysToKeep(30) + } + + throttleConcurrentBuilds { + maxPerNode(4) + maxTotal(4) + } + + concurrentBuild() + + multiscm { + git { + remote { + url(commonConfigMap.get('BAKED_CONFIG_SECURE_REPO_URL')) + branch('*/master') + credentials('1') + } + extensions { + relativeTargetDirectory('config/baked-config-secure') + } + } + git { + remote { + url(commonConfigMap.get('SECURE_REPO_URL')) + branch('$SECURE_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-secure') + } + } + } + + wrappers { + timestamps() + buildName('#${BUILD_NUMBER} ${ENV,var="ORG"}') + } + + steps { + shell(readFileFromWorkspace("dataeng/resources/org-exporter-worker.sh")) + } + + publishers { + // Mark the build as 'unstable' if the text is found in 'console log'. + textFinder("\\[WARNING\\]", '', true, false, true) + } + +} + +job ('analytics-exporter-master') { + + parameters { + stringParam('ORGS', '*', 'Space separated list of organizations to process. Can use wildcards. e.g.: idbx HarvardX') + stringParam('EXPORTER_BRANCH', 'environment/production', 'Branch from the edx-analytics-exporter repository. For tags use tags/[tag-name].') + stringParam('PLATFORM_BRANCH', 'aed/analytics-exporter-settings-hotfix', 'Branch from the edx-platform repository. For tags use tags/[tag-name].') + stringParam('SECURE_BRANCH', 'release', 'Branch from the analytics-secure repository, where the configuration settings reside. For tags use tags/[tag-name]') + stringParam('CONFIG_FILENAME', 'default.yaml', 'Name of configuration file in analytics-secure/analytics-exporter.') + stringParam('OUTPUT_BUCKET', commonConfigMap.get('EXPORTER_OUTPUT_BUCKET'), 'Name of the bucket for the destination of the export data. Can use a path. (eg. export-data/test).') + stringParam('NOTIFICATION_EMAILS', commonConfigMap.get('EXTENDED_NOTIFY_LIST'), 'Space separated list of emails to notify in case of failure.') + stringParam('DATE_MODIFIER', '', 'Used to set the date of the CWSM dump. Leave blank to use today\'s date. Set to "-d 201x-0x-0x" if that is when the CWSM dump took place. (Leave off quotes.)') + stringParam('EXTRA_OPTIONS', '--exclude-task=OrgEmailOptInTask', 'e.g. --exclude-task=OrgEmailOptInTask') + stringParam('ORG_CONFIG', 'data-czar-keys/config.yaml', 'Path to the data-czar organization config file.') + stringParam('DATA_CZAR_KEYS_BRANCH', 'master', 'Branch to use for the data-czar-keys repository.') + } + + multiscm{ + git { + remote { + url('git@github.com:edx/edx-platform.git') + branch('$PLATFORM_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('edx-platform') + } + } + git { + remote { + url('git@github.com:edx/edx-analytics-exporter.git') + branch('$EXPORTER_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-exporter') + } + } + git { + remote { + url(commonConfigMap.get('SECURE_REPO_URL')) + branch('$SECURE_BRANCH') + credentials('1') + } + extensions { + pruneBranches() + relativeTargetDirectory('analytics-secure') + } + } + git { + remote { + url(commonConfigMap.get('DATA_CZAR_KEYS_REPO_URL')) + branch('$DATA_CZAR_KEYS_BRANCH') + credentials('1') + } + extensions { + relativeTargetDirectory('data-czar-keys') + } + } + } + + triggers{ + cron('# Sundays around 10 a.m. UTC\nH 10 * * 0') + } + + wrappers { + timestamps() + } + + steps { + virtualenv { + nature("shell") + command( + readFileFromWorkspace("dataeng/resources/setup-platform-venv-legacy.sh") + ) + } + virtualenv { + nature("shell") + name("analytics-exporter") + command( + readFileFromWorkspace("dataeng/resources/setup-exporter.sh") + ) + } + + downstreamParameterized { + trigger('analytics-exporter-worker') { + block { + buildStepFailure('FAILURE') + failure('FAILURE') + unstable('UNSTABLE') + } + parameters { + predefinedProp('MASTER_WORKSPACE', '${WORKSPACE}') + predefinedProp('NOTIFICATION_EMAILS', '${NOTIFICATION_EMAILS}') + } + parameterFactories { + fileBuildParameterFactory { + filePattern('organizations/*') + encoding('UTF-8') + noFilesFoundAction('SKIP') + } + } + } + } + } +} diff --git a/dataeng/resources/org-exporter-worker.sh b/dataeng/resources/org-exporter-worker.sh new file mode 100644 index 000000000..be3edadaf --- /dev/null +++ b/dataeng/resources/org-exporter-worker.sh @@ -0,0 +1,12 @@ +TODAY=$(date +%d) + +env | sort + +${EXPORTER_VENV}/bin/exporter \ + --org=${ORG} \ + --output-bucket=${OUTPUT_BUCKET} \ + --external-prefix=databases/${DATE:-$TODAY} \ + --django-admin=${PLATFORM_VENV}/bin/django-admin.py \ + --django-pythonpath=${PLATFORM_VENV}/edx-platform \ + --gpg-keys=${GPG_KEYS_PATH} \ + ${EXTRA_OPTIONS} ${CONFIG_PATH} ${ORG_CONFIG_PATH} diff --git a/dataeng/resources/run-course-exporter.sh b/dataeng/resources/run-course-exporter.sh new file mode 100644 index 000000000..756b2de30 --- /dev/null +++ b/dataeng/resources/run-course-exporter.sh @@ -0,0 +1,32 @@ +# Create destination directory +WORKING_DIRECTORY=/var/lib/jenkins/tmp/analytics-course-exporter +mkdir -p ${WORKING_DIRECTORY}/course-data + +# Install requirements into this (exporter) virtual environment +pushd analytics-exporter/ +pip install -r github_requirements.txt +pip install mysql-connector-python -e . +popd + +# Get name of other (platform) virtual environment +source platform_venv + +# Configuration paths in analytics-secure +SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +CONFIG_PATH=${SECURE_ROOT}/${CONFIG_FILENAME} + +DATE=$(date +%d ${DATE_MODIFIER}) +TODAY=$(date +%d) + +env | sort + +# Export job configuration files +course-exporter \ + ${COURSES} \ + ${TASKS} \ + --work-dir=${WORKING_DIRECTORY} \ + --output-bucket=${OUTPUT_BUCKET} \ + --external-prefix=databases/${DATE:-$TODAY} \ + --django-admin=${PLATFORM_VENV}/bin/django-admin.py \ + --django-pythonpath=${PLATFORM_VENV}/edx-platform \ + ${CONFIG_PATH} diff --git a/dataeng/resources/setup-exporter-properties.sh b/dataeng/resources/setup-exporter-email-optin.sh similarity index 100% rename from dataeng/resources/setup-exporter-properties.sh rename to dataeng/resources/setup-exporter-email-optin.sh diff --git a/dataeng/resources/setup-exporter.sh b/dataeng/resources/setup-exporter.sh new file mode 100644 index 000000000..d3bfb1bda --- /dev/null +++ b/dataeng/resources/setup-exporter.sh @@ -0,0 +1,43 @@ +# Check that the DATE_MODIFIER is set to a Sunday, or if blank the current day is a Sunday +DAYOFWEEK=$(date +%u ${DATE_MODIFIER}) +if [ $DAYOFWEEK != 7 ]; then + exit 1 +fi + +# Create destination directory +mkdir -p /var/lib/jenkins/tmp/analytics-exporter/course-data + +# Install requirements into this (exporter) virtual environment +pushd analytics-exporter/ +pip install -r github_requirements.txt +pip install --allow-external mysql-connector-python -e . +popd + +# Configuration paths in analytics-secure +SECURE_ROOT=${WORKSPACE}/analytics-secure/analytics-exporter +CONFIG_PATH=${SECURE_ROOT}/${CONFIG_FILENAME} +GPG_KEYS_PATH=${WORKSPACE}/data-czar-keys + +# Save virtualenv location and configuration paths +echo " +EXPORTER_VENV=${VIRTUAL_ENV} +CONFIG_PATH=${CONFIG_PATH} +GPG_KEYS_PATH=${GPG_KEYS_PATH} +DATE=$(date +%d ${DATE_MODIFIER}) +EXTRA_OPTIONS=${EXTRA_OPTIONS} +ORG_CONFIG_PATH=${WORKSPACE}/${ORG_CONFIG} +SECURE_BRANCH=${SECURE_BRANCH} +" > exporter_vars + +env | sort + + +# Export job configuration files +exporter-properties \ + --output-bucket=${OUTPUT_BUCKET} \ + --orgs="${ORGS}" \ + --include=platform_venv \ + --include=exporter_vars \ + ${CONFIG_PATH} \ + ${WORKSPACE}/${ORG_CONFIG} \ + organizations diff --git a/dataeng/resources/setup-platform-venv-legacy.sh b/dataeng/resources/setup-platform-venv-legacy.sh new file mode 100755 index 000000000..dd01210bf --- /dev/null +++ b/dataeng/resources/setup-platform-venv-legacy.sh @@ -0,0 +1,17 @@ +# Some recent changes in edx-platform breaks the exporter. +# We are currently using edx-platform's aed/analytics-exporter-settings-hotfix(Nov 2017) which follows an old +# requirements installation strategy. This file would go away in favor of 'setup-platform-env' once we figure out the +# underlying issue. +#!/usr/bin/env bash + +# Install requirements +pushd edx-platform +pip install --exists-action w -r requirements/edx/pre.txt +pip install --exists-action w -r requirements/edx/django.txt +pip install --exists-action w -r requirements/edx/base.txt +pip install --exists-action w -r requirements/edx/github.txt +pip install --exists-action w -r requirements/edx/local.txt +popd + +# Save virtualenv location +echo "PLATFORM_VENV=${VIRTUAL_ENV}" > platform_venv