Skip to content
This repository has been archived by the owner on Oct 19, 2024. It is now read-only.

Commit

Permalink
Optimize analytics queries (#781)
Browse files Browse the repository at this point in the history
* Optimize analytics queries

* fix clippy
  • Loading branch information
Geometrically authored Dec 5, 2023
1 parent 27055b9 commit 4630d17
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 1,095 deletions.
177 changes: 57 additions & 120 deletions src/clickhouse/fetch.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use std::sync::Arc;

use crate::{
models::ids::{ProjectId, VersionId},
routes::ApiError,
};
use crate::{models::ids::ProjectId, routes::ApiError};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -39,201 +36,141 @@ pub struct ReturnDownloads {
// Only one of project_id or version_id should be used
// Fetches playtimes as a Vec of ReturnPlaytimes
pub async fn fetch_playtimes(
projects: Option<Vec<ProjectId>>,
versions: Option<Vec<VersionId>>,
projects: Vec<ProjectId>,
start_date: DateTime<Utc>,
end_date: DateTime<Utc>,
resolution_minute: u32,
client: Arc<clickhouse::Client>,
) -> Result<Vec<ReturnPlaytimes>, ApiError> {
let project_or_version = if projects.is_some() && versions.is_none() {
"project_id"
} else if versions.is_some() {
"version_id"
} else {
return Err(ApiError::InvalidInput(
"Only one of 'project_id' or 'version_id' should be used.".to_string(),
));
};

let mut query = client
.query(&format!(
let query = client
.query(
"
SELECT
toUnixTimestamp(toStartOfInterval(recorded, toIntervalMinute(?))) AS time,
{project_or_version} AS id,
SUM(seconds) AS total_seconds
FROM playtime
WHERE recorded BETWEEN ? AND ?
AND {project_or_version} IN ?
GROUP BY
time,
{project_or_version}
"
))
SELECT
toUnixTimestamp(toStartOfInterval(recorded, toIntervalMinute(?))) AS time,
project_id AS id,
SUM(seconds) AS total_seconds
FROM playtime
WHERE recorded BETWEEN ? AND ?
AND project_id IN ?
GROUP BY
time,
project_id
",
)
.bind(resolution_minute)
.bind(start_date.timestamp())
.bind(end_date.timestamp());

if let Some(projects) = projects {
query = query.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());
} else if let Some(versions) = versions {
query = query.bind(versions.iter().map(|x| x.0).collect::<Vec<_>>());
}
.bind(end_date.timestamp())
.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());

Ok(query.fetch_all().await?)
}

// Fetches views as a Vec of ReturnViews
pub async fn fetch_views(
projects: Option<Vec<ProjectId>>,
versions: Option<Vec<VersionId>>,
projects: Vec<ProjectId>,
start_date: DateTime<Utc>,
end_date: DateTime<Utc>,
resolution_minutes: u32,
client: Arc<clickhouse::Client>,
) -> Result<Vec<ReturnViews>, ApiError> {
let project_or_version = if projects.is_some() && versions.is_none() {
"project_id"
} else if versions.is_some() {
"version_id"
} else {
return Err(ApiError::InvalidInput(
"Only one of 'project_id' or 'version_id' should be used.".to_string(),
));
};

let mut query = client
.query(&format!(
let query = client
.query(
"
SELECT
toUnixTimestamp(toStartOfInterval(recorded, toIntervalMinute(?))) AS time,
{project_or_version} AS id,
count(views.id) AS total_views
project_id AS id,
count(1) AS total_views
FROM views
WHERE recorded BETWEEN ? AND ?
AND {project_or_version} IN ?
AND project_id IN ?
GROUP BY
time, {project_or_version}
"
))
time, project_id
",
)
.bind(resolution_minutes)
.bind(start_date.timestamp())
.bind(end_date.timestamp());

if let Some(projects) = projects {
query = query.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());
} else if let Some(versions) = versions {
query = query.bind(versions.iter().map(|x| x.0).collect::<Vec<_>>());
}
.bind(end_date.timestamp())
.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());

Ok(query.fetch_all().await?)
}

// Fetches downloads as a Vec of ReturnDownloads
pub async fn fetch_downloads(
projects: Option<Vec<ProjectId>>,
versions: Option<Vec<VersionId>>,
projects: Vec<ProjectId>,
start_date: DateTime<Utc>,
end_date: DateTime<Utc>,
resolution_minutes: u32,
client: Arc<clickhouse::Client>,
) -> Result<Vec<ReturnDownloads>, ApiError> {
let project_or_version = if projects.is_some() && versions.is_none() {
"project_id"
} else if versions.is_some() {
"version_id"
} else {
return Err(ApiError::InvalidInput(
"Only one of 'project_id' or 'version_id' should be used.".to_string(),
));
};

let mut query = client
.query(&format!(
let query = client
.query(
"
SELECT
toUnixTimestamp(toStartOfInterval(recorded, toIntervalMinute(?))) AS time,
{project_or_version} as id,
count(downloads.id) AS total_downloads
project_id as id,
count(1) AS total_downloads
FROM downloads
WHERE recorded BETWEEN ? AND ?
AND {project_or_version} IN ?
GROUP BY time, {project_or_version}
"
))
AND project_id IN ?
GROUP BY time, project_id
",
)
.bind(resolution_minutes)
.bind(start_date.timestamp())
.bind(end_date.timestamp());

if let Some(projects) = projects {
query = query.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());
} else if let Some(versions) = versions {
query = query.bind(versions.iter().map(|x| x.0).collect::<Vec<_>>());
}
.bind(end_date.timestamp())
.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());

Ok(query.fetch_all().await?)
}

// Fetches countries as a Vec of ReturnCountry
pub async fn fetch_countries(
projects: Option<Vec<ProjectId>>,
versions: Option<Vec<VersionId>>,
projects: Vec<ProjectId>,
start_date: DateTime<Utc>,
end_date: DateTime<Utc>,
client: Arc<clickhouse::Client>,
) -> Result<Vec<ReturnCountry>, ApiError> {
let project_or_version = if projects.is_some() && versions.is_none() {
"project_id"
} else if versions.is_some() {
"version_id"
} else {
return Err(ApiError::InvalidInput(
"Only one of 'project_id' or 'version_id' should be used.".to_string(),
));
};

let mut query = client.query(&format!(
let query = client.query(
"
WITH view_grouping AS (
SELECT
country,
{project_or_version},
count(id) AS total_views
project_id,
count(1) AS total_views
FROM views
WHERE recorded BETWEEN ? AND ?
GROUP BY
country,
{project_or_version}
project_id
),
download_grouping AS (
SELECT
country,
{project_or_version},
count(id) AS total_downloads
project_id,
count(1) AS total_downloads
FROM downloads
WHERE recorded BETWEEN ? AND ?
GROUP BY
country,
{project_or_version}
project_id
)
SELECT
v.country,
v.{project_or_version},
v.project_id,
v.total_views,
d.total_downloads
FROM view_grouping AS v
LEFT JOIN download_grouping AS d ON (v.country = d.country) AND (v.{project_or_version} = d.{project_or_version})
WHERE {project_or_version} IN ?
LEFT JOIN download_grouping AS d ON (v.country = d.country) AND (v.project_id = d.project_id)
WHERE project_id IN ?
"
)).bind(start_date.timestamp()).bind(end_date.timestamp()).bind(start_date.timestamp()).bind(end_date.timestamp());

if let Some(projects) = projects {
query = query.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());
} else if let Some(versions) = versions {
query = query.bind(versions.iter().map(|x| x.0).collect::<Vec<_>>());
}
)
.bind(start_date.timestamp())
.bind(end_date.timestamp())
.bind(start_date.timestamp())
.bind(end_date.timestamp())
.bind(projects.iter().map(|x| x.0).collect::<Vec<_>>());

Ok(query.fetch_all().await?)
}
9 changes: 3 additions & 6 deletions src/clickhouse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ pub async fn init_client_with_database(
"
CREATE TABLE IF NOT EXISTS {database}.views
(
id UUID,
recorded DateTime64(4),
domain String,
site_path String,
Expand All @@ -50,7 +49,7 @@ pub async fn init_client_with_database(
headers Array(Tuple(String, String)),
)
ENGINE = MergeTree()
PRIMARY KEY (id, recorded)
PRIMARY KEY (project_id, recorded)
"
))
.execute()
Expand All @@ -61,7 +60,6 @@ pub async fn init_client_with_database(
"
CREATE TABLE IF NOT EXISTS {database}.downloads
(
id UUID,
recorded DateTime64(4),
domain String,
site_path String,
Expand All @@ -76,7 +74,7 @@ pub async fn init_client_with_database(
headers Array(Tuple(String, String)),
)
ENGINE = MergeTree()
PRIMARY KEY (id, recorded)
PRIMARY KEY (project_id, recorded)
"
))
.execute()
Expand All @@ -87,7 +85,6 @@ pub async fn init_client_with_database(
"
CREATE TABLE IF NOT EXISTS {database}.playtime
(
id UUID,
recorded DateTime64(4),
seconds UInt64,
Expand All @@ -100,7 +97,7 @@ pub async fn init_client_with_database(
parent UInt64,
)
ENGINE = MergeTree()
PRIMARY KEY (id, recorded)
PRIMARY KEY (project_id, recorded)
"
))
.execute()
Expand Down
Loading

0 comments on commit 4630d17

Please sign in to comment.