From b07aa22edca0c861c84eaa97f362f57219150b17 Mon Sep 17 00:00:00 2001 From: Peter Dulacka Date: Mon, 21 Feb 2022 13:21:07 +0100 Subject: [PATCH] Explicitly casting user ids to string This is to improve query performance. We used user_ids as array keys in the implementations and PHP converted them to integers. When integer is passed as a query parameter to the string column, index is not used and query performance is hit drastically. We needed to explicitly cast these external IDs as strings to the index is used correctly. remp/remp#1088 --- .../Commands/ComputeAuthorsSegments.php | 24 +++++++++++++------ .../Commands/ComputeSectionSegments.php | 21 +++++++++++----- CHANGELOG.md | 3 ++- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/Beam/app/Console/Commands/ComputeAuthorsSegments.php b/Beam/app/Console/Commands/ComputeAuthorsSegments.php index e855d7924..b31a22993 100644 --- a/Beam/app/Console/Commands/ComputeAuthorsSegments.php +++ b/Beam/app/Console/Commands/ComputeAuthorsSegments.php @@ -45,6 +45,7 @@ class ComputeAuthorsSegments extends Command public function handle() { + ini_set('memory_limit', -1); // Using Cursor on large number of results causing memory issues // https://github.com/laravel/framework/issues/14919 DB::connection()->getPdo()->setAttribute(PDO::MYSQL_ATTR_USE_BUFFERED_QUERY, false); @@ -253,15 +254,21 @@ private function aggregatedPageviewsFor($groupParameter) private function groupDataFor($groupParameter) { - $this->line("Computing total pageviews for parameter '$groupParameter'"); + $this->getOutput()->write("Computing total pageviews for parameter '$groupParameter': "); $totalPageviews = $this->aggregatedPageviewsFor($groupParameter); - $this->line("Done"); + $this->line(count($totalPageviews)); $segments = []; - $this->line("Computing segment items for parameter '$groupParameter'"); - foreach (array_chunk($totalPageviews, 500, true) as $totalPageviewsChunk) { - $forItems = array_keys($totalPageviewsChunk); + $processed = 0; + $step = 500; + + $bar = $this->output->createProgressBar(count($totalPageviews)); + $bar->setFormat('%message%: %current%/%max% [%bar%] %percent:1s%% %elapsed:6s%/%estimated:-6s% %memory:6s%'); + $bar->setMessage("Computing segment items for parameter '$groupParameter'"); + + foreach (array_chunk($totalPageviews, $step, true) as $totalPageviewsChunk) { + $forItems = array_map('strval', array_keys($totalPageviewsChunk)); $queryItems = DB::table(ArticleAggregatedView::getTableName())->select( $groupParameter, @@ -289,10 +296,13 @@ private function groupDataFor($groupParameter) $segments[$item->author_id][] = $item->$groupParameter; } } - } - $this->line("Done"); + $processed += $step; + $bar->setProgress($processed); + } + $bar->finish(); + $this->line(''); return $segments; } diff --git a/Beam/app/Console/Commands/ComputeSectionSegments.php b/Beam/app/Console/Commands/ComputeSectionSegments.php index 7e8f0665b..60cf670a9 100644 --- a/Beam/app/Console/Commands/ComputeSectionSegments.php +++ b/Beam/app/Console/Commands/ComputeSectionSegments.php @@ -253,15 +253,21 @@ private function aggregatedPageviewsFor($groupParameter) private function groupDataFor($groupParameter) { - $this->line("Computing total pageviews for parameter '$groupParameter'"); + $this->getOutput()->write("Computing total pageviews for parameter '$groupParameter': "); $totalPageviews = $this->aggregatedPageviewsFor($groupParameter); - $this->line("Done"); + $this->line(count($totalPageviews)); $segments = []; - $this->line("Computing segment items for parameter '$groupParameter'"); + + $processed = 0; + $step = 500; + + $bar = $this->output->createProgressBar(count($totalPageviews)); + $bar->setFormat('%message%: %current%/%max% [%bar%] %percent:1s%% %elapsed:6s%/%estimated:-6s% %memory:6s%'); + $bar->setMessage("Computing segment items for parameter '$groupParameter'"); foreach (array_chunk($totalPageviews, 500, true) as $totalPageviewsChunk) { - $forItems = array_keys($totalPageviewsChunk); + $forItems = array_map('strval', array_keys($totalPageviewsChunk)); $queryItems = DB::table(ArticleAggregatedView::getTableName())->select( $groupParameter, @@ -289,10 +295,13 @@ private function groupDataFor($groupParameter) $segments[$item->section_id][] = $item->$groupParameter; } } - } - $this->line("Done"); + $processed += $step; + $bar->setProgress($processed); + } + $bar->finish(); + $this->line(''); return $segments; } diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e5abbaaf..215afe4c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p - Fixed retrieval of browser_id in `conversions:aggregate-events` command which leads to more thorough definition of user's conversion path. remp/remp#1049 - Previously some events (mainly pageviews) could have been not matched correctly and missing in the aggregated data. - Fixed occasional incorrect page_progress parameter being tracked causing progress update not to be tracked at all. - - Due to JS floating points being JS floating points sometimes the page_progress was >1 which server refused to accept. + - Due to JS floating points being JS floating points sometimes the page_progress was >1 which server refused to accept. +- Fixed issues with very slow author/section segment recalculation for instances with bigger amount of data. remp/remp#1088 ## [0.30.0] - 2022-02-10