From 9e0dad38ae4d6f5ba92d7af5898811e6f0c9bf77 Mon Sep 17 00:00:00 2001 From: Kenneth Hendricks Date: Fri, 8 Dec 2017 14:52:33 +1100 Subject: [PATCH] Add course log fetch CLI script --- classes/client/s3_client.php | 29 +++ classes/log_fetcher/course_log_fetcher.php | 235 +++++++++++++++++++++ cli/fetch_course_logs.php | 81 +++++++ 3 files changed, 345 insertions(+) create mode 100644 classes/log_fetcher/course_log_fetcher.php create mode 100644 cli/fetch_course_logs.php diff --git a/classes/client/s3_client.php b/classes/client/s3_client.php index 0e46230..e6061a8 100644 --- a/classes/client/s3_client.php +++ b/classes/client/s3_client.php @@ -97,4 +97,33 @@ public function upload_file($filepath, $keyname) { return $s3url; } + /** + * Downloads a file from s3 to supplied path. + * + * @param string $filepath Local file path. + * @param string $keyname S3 keyname. + * @return void + */ + public function download_file($filepath, $keyname) { + $result = $this->client->getObject(array( + 'Bucket' => $this->bucket, + 'Key' => $keyname, + 'SaveAs' => $filepath + )); + } + + /** + * Returns all objects keys in the bucket. + * + * @return array + */ + public function get_all_keys() { + // We use the iteratior incase there are more than 1000. + $keys = array(); + $objects = $this->client->getIterator('ListObjects', array('Bucket' => $this->bucket)); + foreach ($objects as $object) { + $keys[] = $object['Key']; + } + return $keys; + } } \ No newline at end of file diff --git a/classes/log_fetcher/course_log_fetcher.php b/classes/log_fetcher/course_log_fetcher.php new file mode 100644 index 0000000..7e33b6c --- /dev/null +++ b/classes/log_fetcher/course_log_fetcher.php @@ -0,0 +1,235 @@ +. + +/** + * Course log fetcher + * + * @package tool_s3logs + * @author Kenneth Hendricks + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + +namespace tool_s3logs\log_fetcher; + +use tool_s3logs\client\s3_client; + +class course_log_fetcher { + + /** + * Fetches logs for multiple courses at once and writes to seperate files. + * + * @param integer $courseids Course ids. + * @param string $logfolder Log folder path. + * @return void + */ + public static function fetch_logs($courseids, $logfolder) { + + echo "Setting up course log files ... \n"; + $courseloghandles = self::get_course_log_file_handles($courseids, $logfolder); + self::write_headers_to_course_log_files($courseloghandles); + + echo "Getting S3 log file keys ... \n"; + $s3logkeys = self::get_s3_log_keys(); + + echo "Fetching logs from S3 \n"; + self::write_s3_logs_to_course_logs($courseids, $courseloghandles, $s3logkeys); + + echo "Fetching logs from DB \n"; + self::write_current_database_logs_to_course_logs($courseids, $courseloghandles); + + self::close_course_log_file_handles($courseloghandles); + + echo "Finished pulling logs \n"; + } + + /** + * Pulls s3 logs, parses them and writes relevent course log entries to + * respective files. + * + * @param array $courseids Course ids. + * @param array $courseloghandles Course log file handles. + * @param array $s3logkeys S3 log keys + * @return void + */ + private static function write_s3_logs_to_course_logs($courseids, $courseloghandles, $s3logkeys) { + $currentlogcount = 0; + $totallogs = count($s3logkeys); + foreach ($s3logkeys as $s3logkey) { + $currentlogcount++; + + echo "\nDownloading $s3logkey ($currentlogcount/$totallogs) \n"; + $loghandle = self::download_s3_log_file($s3logkey); + $completedlogs = 0; + + echo "Parsing $s3logkey \n"; + while (($row = fgetcsv($loghandle)) !== false) { + if (self::is_course_log_entry($courseids, $row)) { + $entrycourseid = $row[11]; // holds contextinstanceid + fputcsv($courseloghandles[$entrycourseid], $row); + + echo "Found log entry for course $entrycourseid \n"; + + if ($row[1] == '\core\event\course_deleted') { + $completedlogs++; + + // We've parsed all logs needed. + if ($completedlogs == count($courseids)) { + echo "Finishing early, {$completedlogs} course delete events found \n"; + fclose($loghandle); + return; + } + } + } + } + + fclose($loghandle); + } + } + + /** + * Determines if log row is a course log entry we want. + * Matches courseid = contextinstanceid and contextlevel = course + * + * @param array $courseids Course ids. + * @param array $row Log row. + * @return boolean + */ + private static function is_course_log_entry($courseids, $row) { + if (in_array($row[11], $courseids) && $row[10] == CONTEXT_COURSE) { + return true; + } + return false; + } + + /** + * Downloads an s3 log file to the temp dir and opens a handle for it. + * + * @param string $s3logkey + * @return resource + */ + private static function download_s3_log_file($s3logkey) { + $s3client = new s3_client(); + $tempdir = make_temp_directory('s3logs_download'); + $tempfilepath = tempnam($tempdir, 's3logs_'); + $s3client->download_file($tempfilepath, $s3logkey); + $loghandle = fopen($tempfilepath, 'r'); + return $loghandle; + } + + /** + * Gets oldest -> newest array of S3 log keys + * + * @return array + */ + private static function get_s3_log_keys() { + $s3client = new s3_client(); + $porentialkeys = $s3client->get_all_keys(); + + $keys = array(); + foreach ($porentialkeys as $potentialkey) { + // Dont include keys that arn't expected format. + if (!preg_match('/.*_\d{14}_\d*_\d*.csv/', $potentialkey)) { + continue; + } + + // Strip prefix so we can sort. + $sortable = trim(substr($potentialkey, strpos($potentialkey, '_') + 1)); + $keys[$sortable] = $potentialkey; + } + + // Sort so ordered oldest -> newest. + ksort($keys); + + return $keys; + } + + /** + * Writes current log contents for courses that are in the DB to log files. + * + * @param integer $courseids Course ids. + * @param array $logfilehandles Log file handles, indexed by courseid. + * @return void + */ + private static function write_current_database_logs_to_course_logs($courseids, $logfilehandles) { + global $DB; + + foreach ($courseids as $courseid) { + $records = $DB->get_records( + 'logstore_standard_log', + array( + 'contextlevel' => CONTEXT_COURSE, + 'contextinstanceid' => $courseid + ) + ); + + foreach ($records as $record) { + fputcsv($logfilehandles[$courseid], (array) $record); + } + } + } + + /** + * Creates file handles to pull logs to for each supplied course. + * + * @param integer $courseids Course ids. + * @param string $logfolder Log folder path. + * @return array + */ + private static function get_course_log_file_handles($courseids, $logfolder) { + $now = time(); + $handles = array(); + + foreach ($courseids as $courseid) { + $filename = "{$logfolder}/course_{$courseid}_retreived_{$now}.csv"; + $fp = fopen($filename, 'w'); + $handles[$courseid] = $fp; + } + + return $handles; + } + + /** + * Closes array of file handles + * + * @param array $filehandles + * @return void + */ + private static function close_course_log_file_handles(&$filehandles) { + foreach ($filehandles as $handle) { + fclose($handle); + } + } + + /** + * Writes logstore_standard_log CSV headers to file handles. + * + * @param array $filehandles + * @return void + */ + private static function write_headers_to_course_log_files(&$filehandles) { + global $DB; + $headerrecords = $DB->get_columns('logstore_standard_log'); + $headers = array(); + foreach ($headerrecords as $key => $value) { + $headers[] = $key; + } + + foreach ($filehandles as $handle) { + fputcsv($handle, $headers); + } + } +} diff --git a/cli/fetch_course_logs.php b/cli/fetch_course_logs.php new file mode 100644 index 0000000..d122c80 --- /dev/null +++ b/cli/fetch_course_logs.php @@ -0,0 +1,81 @@ +. + +/** + * Fetch course logs CLI script + * + * @package tool_s3logs + * @author Kenneth Hendricks + * @copyright Catalyst IT + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + +define('CLI_SCRIPT', true); + +require(__DIR__ . '/../../../../config.php'); +require_once($CFG->libdir.'/clilib.php'); + +$help = + "Retrive logs from S3 for courses. + +Options: +--courses=courseid1,courseid2 Comma seperated list of courseids. +--logfolder=/tmp/course_logs Path to folder where pulled logs will be stored. +-h, --help Print out this help. + +Example: +\$ sudo -u www-data /usr/bin/php admin/tool/s3_logs/cli/fetch_course_logs.php --courses=101,102,103 --logfolder=/tmp/course_logs +"; + +list($options, $unrecognized) = cli_get_params( + array( + 'courses' => null, + 'logfolder' => null, + 'help' => false, + ), + array( + 'h' => 'help', + ) +); + +if ($options['help'] || $options['courses'] === null || $options['logfolder'] === null) { + echo $help; + exit(0); +} + +$courseids = explode(',', $options['courses']); + +// Check course ids are positive integers. +foreach ($courseids as $courseid) { + if (!ctype_digit($courseid)) { + echo "Invalid course id: '{$courseid}'\n"; + exit(0); + } +} + +$logfolder = $options['logfolder']; + +if (!is_dir($logfolder)) { + echo "Supplied path is not a directory\n"; + exit(0); +} + +if (!is_writable($logfolder)) { + echo "Supplied folder is not writable\n"; + exit(0); +} + +tool_s3logs\log_fetcher\course_log_fetcher::fetch_logs($courseids, $logfolder); \ No newline at end of file