From 06c5c8bdb3223ba11670d1dcdab3fcb5808b42c7 Mon Sep 17 00:00:00 2001 From: aecsocket Date: Fri, 22 May 2026 17:51:44 +0100 Subject: [PATCH 1/8] fix uri too long --- apps/labrinth/src/routes/v3/analytics_get.rs | 180 +++++++++++++++---- 1 file changed, 145 insertions(+), 35 deletions(-) diff --git a/apps/labrinth/src/routes/v3/analytics_get.rs b/apps/labrinth/src/routes/v3/analytics_get.rs index 64a80418d7..474a1fc9e2 100644 --- a/apps/labrinth/src/routes/v3/analytics_get.rs +++ b/apps/labrinth/src/routes/v3/analytics_get.rs @@ -9,7 +9,7 @@ mod old; -use std::{num::NonZeroU64, sync::LazyLock}; +use std::{collections::HashMap, num::NonZeroU64, sync::LazyLock}; use crate::database::PgPool; use actix_web::{HttpRequest, post, web}; @@ -576,6 +576,7 @@ mod query { pub struct PlaytimeRow { pub bucket: u64, pub project_id: DBProjectId, + pub parent_version_id: DBVersionId, pub version_id: DBVersionId, pub loader: String, pub game_version: String, @@ -590,13 +591,12 @@ mod query { const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; const USE_COUNTRY: &str = "{use_country: Bool}"; const PARENT_VERSION_IDS: &str = "{parent_version_ids: Array(UInt64)}"; - const PARENT_VERSION_PROJECT_IDS: &str = - "{parent_version_project_ids: Array(UInt64)}"; formatcp!( "SELECT bucket, if({USE_PROJECT_ID}, source_project_id, 0) AS project_id, + parent_version_id, version_id, loader, game_version, @@ -606,6 +606,7 @@ mod query { SELECT widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, project_id AS source_project_id, + 0 AS parent_version_id, if({USE_VERSION_ID}, version_id, 0) AS version_id, if({USE_LOADER}, loader, '') AS loader, if({USE_GAME_VERSION}, game_version, '') AS game_version, @@ -620,7 +621,8 @@ mod query { SELECT widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, - transform(parent, {PARENT_VERSION_IDS}, {PARENT_VERSION_PROJECT_IDS}) AS source_project_id, + 0 AS source_project_id, + parent AS parent_version_id, if({USE_VERSION_ID}, version_id, 0) AS version_id, if({USE_LOADER}, loader, '') AS loader, if({USE_GAME_VERSION}, game_version, '') AS game_version, @@ -631,7 +633,7 @@ mod query { recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} AND parent IN {PARENT_VERSION_IDS} ) - GROUP BY bucket, project_id, version_id, loader, game_version, country" + GROUP BY bucket, project_id, parent_version_id, version_id, loader, game_version, country" ) }; @@ -764,10 +766,10 @@ pub async fn fetch_analytics( .iter() .map(|version| DBVersionId(version.id)) .collect::>(); - let parent_version_project_ids = parent_versions + let parent_version_projects = parent_versions .iter() - .map(|version| DBProjectId(version.mod_id)) - .collect::>(); + .map(|version| (DBVersionId(version.id), DBProjectId(version.mod_id))) + .collect::>(); let affiliate_code_ids = DBAffiliateCode::get_by_affiliate(user.id.into(), &**pool) @@ -782,7 +784,6 @@ pub async fn fetch_analytics( time_slices: &mut time_slices, project_ids: &project_ids, parent_version_ids: &parent_version_ids, - parent_version_project_ids: &parent_version_project_ids, affiliate_code_ids: &affiliate_code_ids, }; @@ -793,6 +794,7 @@ pub async fn fetch_analytics( query_clickhouse::( &mut query_clickhouse_cx, query::VIEWS, + ClickhouseQueryParams::PROJECT_IDS, &[ ("use_project_id", uses(F::ProjectId)), ("use_domain", uses(F::Domain)), @@ -833,6 +835,7 @@ pub async fn fetch_analytics( query_clickhouse::( &mut query_clickhouse_cx, query::DOWNLOADS, + ClickhouseQueryParams::PROJECT_IDS, &[ ("use_project_id", uses(F::ProjectId)), ("use_domain", uses(F::Domain)), @@ -883,9 +886,9 @@ pub async fn fetch_analytics( use ProjectPlaytimeField as F; let uses = |field| metrics.bucket_by.contains(&field); - query_clickhouse::( + query_clickhouse_playtime( &mut query_clickhouse_cx, - query::PLAYTIME, + &parent_version_projects, &[ ("use_project_id", uses(F::ProjectId)), ("use_version_id", uses(F::VersionId)), @@ -893,24 +896,6 @@ pub async fn fetch_analytics( ("use_game_version", uses(F::GameVersion)), ("use_country", uses(F::Country)), ], - |row| row.bucket, - |row| { - let country = if uses(F::Country) { - Some(condense_country(row.country, row.seconds)) - } else { - None - }; - AnalyticsData::Project(ProjectAnalytics { - source_project: row.project_id.into(), - metrics: ProjectMetrics::Playtime(ProjectPlaytime { - version_id: none_if_zero_version_id(row.version_id), - loader: none_if_empty(row.loader), - game_version: none_if_empty(row.game_version), - country, - seconds: row.seconds, - }), - }) - }, ) .await?; } @@ -924,6 +909,7 @@ pub async fn fetch_analytics( query_clickhouse::( &mut query_clickhouse_cx, query::AFFILIATE_CODE_CLICKS, + ClickhouseQueryParams::empty(), &[("use_affiliate_code_id", uses(F::AffiliateCodeId))], |row| row.bucket, |row| { @@ -1201,13 +1187,132 @@ struct QueryClickhouseContext<'a> { time_slices: &'a mut [TimeSlice], project_ids: &'a [DBProjectId], parent_version_ids: &'a [DBVersionId], - parent_version_project_ids: &'a [DBProjectId], affiliate_code_ids: &'a [DBAffiliateCodeId], } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct PlaytimeBucket { + bucket: u64, + project_id: DBProjectId, + version_id: Option, + loader: Option, + game_version: Option, + country: Option, +} + +#[derive(Debug, Clone, Copy, Default)] +struct ClickhouseQueryParams { + project_ids: bool, + parent_version_ids: bool, + affiliate_code_ids: bool, +} + +impl ClickhouseQueryParams { + const PROJECT_IDS: Self = Self { + project_ids: true, + parent_version_ids: false, + affiliate_code_ids: false, + }; + + const fn empty() -> Self { + Self { + project_ids: false, + parent_version_ids: false, + affiliate_code_ids: false, + } + } +} + +impl std::ops::BitOr for ClickhouseQueryParams { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + Self { + project_ids: self.project_ids || rhs.project_ids, + parent_version_ids: self.parent_version_ids + || rhs.parent_version_ids, + affiliate_code_ids: self.affiliate_code_ids + || rhs.affiliate_code_ids, + } + } +} + +async fn query_clickhouse_playtime( + cx: &mut QueryClickhouseContext<'_>, + parent_version_projects: &HashMap, + use_columns: &[(&str, bool)], +) -> Result<(), ApiError> { + let uses = |name| { + use_columns + .iter() + .any(|(column_name, used)| *column_name == name && *used) + }; + let mut query = cx + .clickhouse + .query(query::PLAYTIME) + .param("time_range_start", cx.req.time_range.start.timestamp()) + .param("time_range_end", cx.req.time_range.end.timestamp()) + .param("time_slices", cx.time_slices.len()) + .param("project_ids", cx.project_ids) + .param("parent_version_ids", cx.parent_version_ids); + for (param_name, used) in use_columns { + query = query.param(param_name, used) + } + + let mut cursor = query.fetch::()?; + let mut buckets = HashMap::::new(); + + while let Some(row) = cursor.next().await? { + let project_id = if uses("use_project_id") && row.project_id.0 == 0 { + parent_version_projects + .get(&row.parent_version_id) + .copied() + .unwrap_or(row.project_id) + } else { + row.project_id + }; + let key = PlaytimeBucket { + bucket: row.bucket, + project_id, + version_id: uses("use_version_id").then_some(row.version_id), + loader: uses("use_loader").then(|| row.loader.clone()), + game_version: uses("use_game_version") + .then(|| row.game_version.clone()), + country: uses("use_country").then(|| row.country.clone()), + }; + + *buckets.entry(key).or_default() += row.seconds; + } + + for (key, seconds) in buckets { + let bucket = key.bucket as usize; + add_to_time_slice( + cx.time_slices, + bucket, + AnalyticsData::Project(ProjectAnalytics { + source_project: key.project_id.into(), + metrics: ProjectMetrics::Playtime(ProjectPlaytime { + version_id: key + .version_id + .and_then(none_if_zero_version_id), + loader: key.loader.and_then(none_if_empty), + game_version: key.game_version.and_then(none_if_empty), + country: key + .country + .map(|country| condense_country(country, seconds)), + seconds, + }), + }), + )?; + } + + Ok(()) +} + async fn query_clickhouse( cx: &mut QueryClickhouseContext<'_>, query: &str, + params: ClickhouseQueryParams, use_columns: &[(&str, bool)], // I hate using the hidden type Row::Value here, but it's what next() returns, so I see no other option row_get_bucket: impl Fn(&Row::Value<'_>) -> u64, @@ -1221,11 +1326,16 @@ where .query(query) .param("time_range_start", cx.req.time_range.start.timestamp()) .param("time_range_end", cx.req.time_range.end.timestamp()) - .param("time_slices", cx.time_slices.len()) - .param("project_ids", cx.project_ids) - .param("parent_version_ids", cx.parent_version_ids) - .param("parent_version_project_ids", cx.parent_version_project_ids) - .param("affiliate_code_ids", cx.affiliate_code_ids); + .param("time_slices", cx.time_slices.len()); + if params.project_ids { + query = query.param("project_ids", cx.project_ids); + } + if params.parent_version_ids { + query = query.param("parent_version_ids", cx.parent_version_ids); + } + if params.affiliate_code_ids { + query = query.param("affiliate_code_ids", cx.affiliate_code_ids); + } for (param_name, used) in use_columns { query = query.param(param_name, used) } From 2c1172248474d64add2da3eb6ccb8f511d351bda Mon Sep 17 00:00:00 2001 From: aecsocket Date: Fri, 22 May 2026 18:01:39 +0100 Subject: [PATCH 2/8] all projects route for user --- ...e83b71b766d64e05cecbfab58194eff89ec08.json | 22 +++ apps/labrinth/src/routes/v3/users.rs | 131 +++++++++++++++++- 2 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 apps/labrinth/.sqlx/query-9cabb8fd373e6ebf76e6d6a6711e83b71b766d64e05cecbfab58194eff89ec08.json diff --git a/apps/labrinth/.sqlx/query-9cabb8fd373e6ebf76e6d6a6711e83b71b766d64e05cecbfab58194eff89ec08.json b/apps/labrinth/.sqlx/query-9cabb8fd373e6ebf76e6d6a6711e83b71b766d64e05cecbfab58194eff89ec08.json new file mode 100644 index 0000000000..899516555d --- /dev/null +++ b/apps/labrinth/.sqlx/query-9cabb8fd373e6ebf76e6d6a6711e83b71b766d64e05cecbfab58194eff89ec08.json @@ -0,0 +1,22 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT m.id\n FROM mods m\n WHERE m.organization_id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false + ] + }, + "hash": "9cabb8fd373e6ebf76e6d6a6711e83b71b766d64e05cecbfab58194eff89ec08" +} diff --git a/apps/labrinth/src/routes/v3/users.rs b/apps/labrinth/src/routes/v3/users.rs index c93d1b5336..5c0329f002 100644 --- a/apps/labrinth/src/routes/v3/users.rs +++ b/apps/labrinth/src/routes/v3/users.rs @@ -1,4 +1,7 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; use super::{ApiError, oauth_clients::get_user_clients}; use crate::database::PgPool; @@ -10,12 +13,14 @@ use crate::{ get_user_from_headers, }, database::{ - models::{DBModerationNote, DBUser}, + models::{DBModerationNote, DBOrganization, DBProjectId, DBUser}, redis::RedisPool, }, file_hosting::{FileHost, FileHostPublicity}, models::{ + ids::OrganizationId, notifications::Notification, + organizations::Organization, pats::Scopes, projects::Project, users::{Badges, Role}, @@ -35,6 +40,7 @@ pub fn config(cfg: &mut web::ServiceConfig) { cfg.route("user", web::get().to(user_auth_get)); cfg.route("users", web::get().to(users_get)); cfg.route("user_email", web::get().to(admin_user_email)); + cfg.route("all-projects", web::get().to(all_projects)); cfg.service( web::scope("user") @@ -53,11 +59,132 @@ pub fn config(cfg: &mut web::ServiceConfig) { ); } +#[derive(Serialize)] +pub struct AllProjectsResponse { + pub projects: Vec, + pub organizations: HashMap, +} + #[derive(Deserialize)] pub struct UserEmailQuery { pub email: String, } +pub async fn all_projects( + req: HttpRequest, + pool: web::Data, + redis: web::Data, + session_queue: web::Data, +) -> Result, ApiError> { + let user = get_user_from_headers( + &req, + &**pool, + &redis, + &session_queue, + Scopes::PROJECT_READ | Scopes::ORGANIZATION_READ, + ) + .await? + .1; + + let user_project_ids = + DBUser::get_projects(user.id.into(), &**pool, &redis).await?; + let organization_ids = DBUser::get_organizations(user.id.into(), &**pool) + .await?; + let organizations_data = + DBOrganization::get_many_ids(&organization_ids, &**pool, &redis) + .await?; + + let team_ids = organizations_data + .iter() + .map(|organization| organization.team_id) + .collect::>(); + let teams_data = + crate::database::models::DBTeamMember::get_from_team_full_many( + &team_ids, &**pool, &redis, + ) + .await?; + let users = DBUser::get_many_ids( + &teams_data + .iter() + .map(|member| member.user_id) + .collect::>(), + &**pool, + &redis, + ) + .await?; + + let mut team_groups = HashMap::new(); + for member in teams_data { + team_groups.entry(member.team_id).or_insert(vec![]).push(member); + } + + let mut organizations = HashMap::new(); + let mut visible_organization_ids = Vec::new(); + for data in organizations_data { + if !is_visible_organization(&data, &Some(user.clone()), &pool, &redis) + .await? + { + continue; + } + + visible_organization_ids.push(data.id); + let members_data = team_groups.remove(&data.team_id).unwrap_or(vec![]); + let team_members = members_data + .into_iter() + .filter_map(|data| { + users.iter().find(|x| x.id == data.user_id).map(|member| { + crate::models::teams::TeamMember::from( + data, + member.clone(), + false, + ) + }) + }) + .collect(); + + organizations.insert( + OrganizationId::from(data.id), + Organization::from(data, team_members), + ); + } + + let organization_id_values = visible_organization_ids + .iter() + .map(|id| id.0) + .collect::>(); + let organization_project_ids = sqlx::query!( + " + SELECT m.id + FROM mods m + WHERE m.organization_id = ANY($1) + ", + &organization_id_values, + ) + .fetch_all(&**pool) + .await? + .into_iter() + .map(|row| DBProjectId(row.id)) + .collect::>(); + + let project_ids = user_project_ids + .into_iter() + .chain(organization_project_ids) + .collect::>() + .into_iter() + .collect::>(); + let projects_data = + crate::database::DBProject::get_many_ids(&project_ids, &**pool, &redis) + .await?; + let projects = + filter_visible_projects(projects_data, &Some(user), &pool, true) + .await?; + + Ok(web::Json(AllProjectsResponse { + projects, + organizations, + })) +} + pub async fn admin_user_email( req: HttpRequest, pool: web::Data, From 5bba94c709bae94e1a8447c1092536b963f89663 Mon Sep 17 00:00:00 2001 From: aecsocket Date: Fri, 22 May 2026 18:24:13 +0100 Subject: [PATCH 3/8] analytics facet fetching --- .../src/routes/v3/analytics_get/facets.rs | 422 ++++++++++++++++++ .../mod.rs} | 4 +- 2 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 apps/labrinth/src/routes/v3/analytics_get/facets.rs rename apps/labrinth/src/routes/v3/{analytics_get.rs => analytics_get/mod.rs} (99%) diff --git a/apps/labrinth/src/routes/v3/analytics_get/facets.rs b/apps/labrinth/src/routes/v3/analytics_get/facets.rs new file mode 100644 index 0000000000..8028048687 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/facets.rs @@ -0,0 +1,422 @@ +use std::collections::HashSet; + +use actix_web::{HttpRequest, post, web}; +use serde::Serialize; + +use super::{DownloadSource, GetRequest, normalize_download_source}; +use crate::{ + auth::get_user_from_headers, + database::{ + PgPool, + models::{DBProjectId, DBUser, DBVersionId}, + redis::RedisPool, + }, + models::{ + ids::VersionId, + pats::Scopes, + v3::analytics::DownloadReason, + }, + queue::session::AuthQueue, + routes::ApiError, +}; + +pub fn config(cfg: &mut utoipa_actix_web::service_config::ServiceConfig) { + cfg.service(fetch_facets); +} + +#[derive(Debug, Serialize, utoipa::ToSchema)] +pub struct FacetsResponse { + pub facets: AnalyticsFacets, +} + +#[derive(Debug, Default, Serialize, utoipa::ToSchema)] +pub struct AnalyticsFacets { + pub project_views: ProjectViewsFacets, + pub project_downloads: ProjectDownloadsFacets, + pub project_playtime: ProjectPlaytimeFacets, +} + +#[derive(Debug, Default, Serialize, utoipa::ToSchema)] +pub struct ProjectViewsFacets { + pub domain: Vec, + pub site_path: Vec, + pub monetized: Vec, + pub country: Vec, +} + +#[derive(Debug, Default, Serialize, utoipa::ToSchema)] +pub struct ProjectDownloadsFacets { + pub domain: Vec, + pub user_agent: Vec, + pub version_id: Vec, + pub monetized: Vec, + pub country: Vec, + pub reason: Vec, + pub game_version: Vec, + pub loader: Vec, +} + +#[derive(Debug, Default, Serialize, utoipa::ToSchema)] +pub struct ProjectPlaytimeFacets { + pub version_id: Vec, + pub loader: Vec, + pub game_version: Vec, + pub country: Vec, +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct StringFacetRow { + value: String, +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct VersionFacetRow { + value: DBVersionId, +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct BoolFacetRow { + value: bool, +} + +#[utoipa::path( + responses((status = OK, body = inline(FacetsResponse))), +)] +#[post("/facets")] +pub async fn fetch_facets( + http_req: HttpRequest, + req: web::Json, + pool: web::Data, + redis: web::Data, + session_queue: web::Data, + clickhouse: web::Data, +) -> Result, ApiError> { + let user = get_user_from_headers( + &http_req, + &**pool, + &redis, + &session_queue, + Scopes::ANALYTICS, + ) + .await? + .1; + + let project_ids = if req.project_ids.is_empty() { + DBUser::get_projects(user.id.into(), &**pool, &redis).await? + } else { + req.project_ids + .iter() + .map(|id| DBProjectId::from(*id)) + .collect::>() + }; + let project_ids = + super::filter_allowed_project_ids(&project_ids, &user, &pool, &redis) + .await?; + + let parent_version_ids = + fetch_project_version_ids(&project_ids, &pool).await?; + + Ok(web::Json(FacetsResponse { + facets: AnalyticsFacets { + project_views: fetch_project_views_facets( + &clickhouse, + &project_ids, + ) + .await?, + project_downloads: fetch_project_downloads_facets( + &clickhouse, + &project_ids, + ) + .await?, + project_playtime: fetch_project_playtime_facets( + &clickhouse, + &project_ids, + &parent_version_ids, + ) + .await?, + }, + })) +} + +async fn fetch_project_version_ids( + project_ids: &[DBProjectId], + pool: &PgPool, +) -> Result, ApiError> { + let project_id_values = + project_ids.iter().map(|id| id.0).collect::>(); + Ok(sqlx::query!( + " + SELECT id + FROM versions + WHERE mod_id = ANY($1) + ", + &project_id_values, + ) + .fetch_all(pool) + .await? + .into_iter() + .map(|row| DBVersionId(row.id)) + .collect()) +} + +async fn fetch_project_views_facets( + clickhouse: &clickhouse::Client, + project_ids: &[DBProjectId], +) -> Result { + Ok(ProjectViewsFacets { + domain: fetch_string_facet( + clickhouse, + "SELECT DISTINCT domain AS value FROM views WHERE project_id IN {project_ids: Array(UInt64)} AND domain != '' ORDER BY value", + project_ids, + ) + .await?, + site_path: fetch_string_facet( + clickhouse, + "SELECT DISTINCT site_path AS value FROM views WHERE project_id IN {project_ids: Array(UInt64)} AND site_path != '' ORDER BY value", + project_ids, + ) + .await?, + monetized: fetch_bool_facet( + clickhouse, + "SELECT DISTINCT monetized AS value FROM views WHERE project_id IN {project_ids: Array(UInt64)} ORDER BY value", + project_ids, + ) + .await?, + country: fetch_string_facet( + clickhouse, + "SELECT DISTINCT country AS value FROM views WHERE project_id IN {project_ids: Array(UInt64)} AND country != '' ORDER BY value", + project_ids, + ) + .await?, + }) +} + +async fn fetch_project_downloads_facets( + clickhouse: &clickhouse::Client, + project_ids: &[DBProjectId], +) -> Result { + let user_agents = fetch_string_facet( + clickhouse, + "SELECT DISTINCT user_agent AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND user_agent != ''", + project_ids, + ) + .await?; + let user_agent = normalize_download_source_facets(&user_agents); + + Ok(ProjectDownloadsFacets { + domain: fetch_string_facet( + clickhouse, + "SELECT DISTINCT domain AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND domain != '' ORDER BY value", + project_ids, + ) + .await?, + user_agent, + version_id: fetch_version_facet( + clickhouse, + "SELECT DISTINCT version_id AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND version_id != 0 ORDER BY value", + project_ids, + ) + .await?, + monetized: fetch_bool_facet( + clickhouse, + "SELECT DISTINCT user_id != 0 AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} ORDER BY value", + project_ids, + ) + .await?, + country: fetch_string_facet( + clickhouse, + "SELECT DISTINCT country AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND country != '' ORDER BY value", + project_ids, + ) + .await?, + reason: fetch_string_facet( + clickhouse, + "SELECT DISTINCT reason AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND reason != '' ORDER BY value", + project_ids, + ) + .await? + .into_iter() + .filter_map(|reason| reason.parse().ok()) + .collect(), + game_version: fetch_string_facet( + clickhouse, + "SELECT DISTINCT game_version AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND game_version != '' ORDER BY value", + project_ids, + ) + .await?, + loader: fetch_string_facet( + clickhouse, + "SELECT DISTINCT loader AS value FROM downloads WHERE project_id IN {project_ids: Array(UInt64)} AND loader != '' ORDER BY value", + project_ids, + ) + .await?, + }) +} + +fn normalize_download_source_facets( + user_agents: &[String], +) -> Vec { + user_agents + .iter() + .filter_map(|user_agent| normalize_download_source(user_agent)) + .collect::>() + .into_iter() + .collect() +} + +async fn fetch_project_playtime_facets( + clickhouse: &clickhouse::Client, + project_ids: &[DBProjectId], + parent_version_ids: &[DBVersionId], +) -> Result { + Ok(ProjectPlaytimeFacets { + version_id: fetch_playtime_version_facet( + clickhouse, + project_ids, + parent_version_ids, + ) + .await?, + loader: fetch_playtime_string_facet( + clickhouse, + "loader", + project_ids, + parent_version_ids, + ) + .await?, + game_version: fetch_playtime_string_facet( + clickhouse, + "game_version", + project_ids, + parent_version_ids, + ) + .await?, + country: fetch_playtime_string_facet( + clickhouse, + "country", + project_ids, + parent_version_ids, + ) + .await?, + }) +} + +async fn fetch_string_facet( + clickhouse: &clickhouse::Client, + query: &str, + project_ids: &[DBProjectId], +) -> Result, ApiError> { + let mut rows = clickhouse + .query(query) + .param("project_ids", project_ids) + .fetch::()?; + let mut values = Vec::new(); + while let Some(row) = rows.next().await? { + values.push(row.value); + } + Ok(values) +} + +async fn fetch_version_facet( + clickhouse: &clickhouse::Client, + query: &str, + project_ids: &[DBProjectId], +) -> Result, ApiError> { + let mut rows = clickhouse + .query(query) + .param("project_ids", project_ids) + .fetch::()?; + let mut values = Vec::new(); + while let Some(row) = rows.next().await? { + values.push(row.value.into()); + } + Ok(values) +} + +async fn fetch_bool_facet( + clickhouse: &clickhouse::Client, + query: &str, + project_ids: &[DBProjectId], +) -> Result, ApiError> { + let mut rows = clickhouse + .query(query) + .param("project_ids", project_ids) + .fetch::()?; + let mut values = Vec::new(); + while let Some(row) = rows.next().await? { + values.push(row.value); + } + Ok(values) +} + +async fn fetch_playtime_string_facet( + clickhouse: &clickhouse::Client, + column: &str, + project_ids: &[DBProjectId], + parent_version_ids: &[DBVersionId], +) -> Result, ApiError> { + let query = format!( + "SELECT DISTINCT {column} AS value + FROM playtime + WHERE (project_id IN {{project_ids: Array(UInt64)}} OR parent IN {{parent_version_ids: Array(UInt64)}}) + AND {column} != '' + ORDER BY value" + ); + let mut rows = clickhouse + .query(&query) + .param("project_ids", project_ids) + .param("parent_version_ids", parent_version_ids) + .fetch::()?; + let mut values = Vec::new(); + while let Some(row) = rows.next().await? { + values.push(row.value); + } + Ok(values) +} + +async fn fetch_playtime_version_facet( + clickhouse: &clickhouse::Client, + project_ids: &[DBProjectId], + parent_version_ids: &[DBVersionId], +) -> Result, ApiError> { + let mut rows = clickhouse + .query( + "SELECT DISTINCT version_id AS value + FROM playtime + WHERE (project_id IN {project_ids: Array(UInt64)} OR parent IN {parent_version_ids: Array(UInt64)}) + AND version_id != 0 + ORDER BY value", + ) + .param("project_ids", project_ids) + .param("parent_version_ids", parent_version_ids) + .fetch::()?; + let mut values = Vec::new(); + while let Some(row) = rows.next().await? { + values.push(row.value.into()); + } + Ok(values) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn user_agent_facets_use_normalized_sources() { + let user_agents = vec![ + "MultiMC/5.0".to_string(), + "MultiMC/6.0".to_string(), + "PrismLauncher/6.1".to_string(), + "curl/8.7.1".to_string(), + "Mozilla/5.0 AppleWebKit/537.36".to_string(), + ]; + + assert_eq!( + normalize_download_source_facets(&user_agents), + vec![ + DownloadSource::Named("MultiMC".into()), + DownloadSource::Named("Prism Launcher".into()), + DownloadSource::Website, + ], + ); + } +} diff --git a/apps/labrinth/src/routes/v3/analytics_get.rs b/apps/labrinth/src/routes/v3/analytics_get/mod.rs similarity index 99% rename from apps/labrinth/src/routes/v3/analytics_get.rs rename to apps/labrinth/src/routes/v3/analytics_get/mod.rs index 474a1fc9e2..1321fd7b1b 100644 --- a/apps/labrinth/src/routes/v3/analytics_get.rs +++ b/apps/labrinth/src/routes/v3/analytics_get/mod.rs @@ -7,6 +7,7 @@ //! requests, you have to zip together M arrays of N elements //! - this makes it inconvenient to have separate endpoints +mod facets; mod old; use std::{collections::HashMap, num::NonZeroU64, sync::LazyLock}; @@ -43,6 +44,7 @@ use crate::{ pub fn config(cfg: &mut utoipa_actix_web::service_config::ServiceConfig) { cfg.service(fetch_analytics); + cfg.configure(facets::config); cfg.configure(old::config); } @@ -351,7 +353,7 @@ pub struct ProjectDownloads { downloads: u64, } -#[derive(Debug, Clone, PartialEq, Eq, utoipa::ToSchema)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, utoipa::ToSchema)] pub enum DownloadSource { Website, ModrinthApp, From 2827b14026fad259d59960e8f3b38ff794ceb3c0 Mon Sep 17 00:00:00 2001 From: aecsocket Date: Fri, 22 May 2026 18:31:37 +0100 Subject: [PATCH 4/8] cache download source regexes --- .../src/routes/v3/analytics_get/mod.rs | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/apps/labrinth/src/routes/v3/analytics_get/mod.rs b/apps/labrinth/src/routes/v3/analytics_get/mod.rs index 1321fd7b1b..594cbc49ad 100644 --- a/apps/labrinth/src/routes/v3/analytics_get/mod.rs +++ b/apps/labrinth/src/routes/v3/analytics_get/mod.rs @@ -10,11 +10,19 @@ mod facets; mod old; -use std::{collections::HashMap, num::NonZeroU64, sync::LazyLock}; +use std::{ + collections::HashMap, + num::NonZeroU64, + sync::{ + LazyLock, + atomic::{AtomicUsize, Ordering}, + }, +}; use crate::database::PgPool; use actix_web::{HttpRequest, post, web}; use chrono::{DateTime, TimeDelta, Utc}; +use dashmap::DashMap; use eyre::eyre; use futures::StreamExt; use regex::Regex; @@ -1168,7 +1176,43 @@ static DOWNLOAD_SOURCE_PATTERNS: LazyLock> = .collect() }); +// Put a cap of 100MB on the download source cache. We can adjust this as needed, +// if we find we're getting too few cache hits, or too much memory usage. +const MAX_DOWNLOAD_SOURCE_CACHE_BYTES: usize = 100 * 1024 * 1024; + +static DOWNLOAD_SOURCE_CACHE: LazyLock< + DashMap>, +> = LazyLock::new(DashMap::new); + +static DOWNLOAD_SOURCE_CACHE_BYTES: AtomicUsize = AtomicUsize::new(0); + fn normalize_download_source(user_agent: &str) -> Option { + if let Some(source) = DOWNLOAD_SOURCE_CACHE.get(user_agent) { + return source.clone(); + } + + let source = normalize_download_source_uncached(user_agent); + + // This is intentionally a simple bounded cache. Reads are the hot path, + // and the distinct UA set should settle after common launchers are seen. + // If this becomes lock-contentious, ArcSwap plus an immutable map would + // avoid DashMap's shard locks while keeping misses cheaper than cloning a + // std HashMap. + let key_bytes = user_agent.len(); + let previous_bytes = + DOWNLOAD_SOURCE_CACHE_BYTES.fetch_add(key_bytes, Ordering::Relaxed); + if previous_bytes + key_bytes <= MAX_DOWNLOAD_SOURCE_CACHE_BYTES { + DOWNLOAD_SOURCE_CACHE.insert(user_agent.to_owned(), source.clone()); + } else { + DOWNLOAD_SOURCE_CACHE_BYTES.fetch_sub(key_bytes, Ordering::Relaxed); + } + + source +} + +fn normalize_download_source_uncached( + user_agent: &str, +) -> Option { DOWNLOAD_SOURCE_PATTERNS.iter().find_map(|(regex, source)| { regex.is_match(user_agent).then(|| source.into_source()) }) From 2587cc2f114ebf3704936cc72db910b4c530346a Mon Sep 17 00:00:00 2001 From: aecsocket Date: Fri, 22 May 2026 19:06:19 +0100 Subject: [PATCH 5/8] filtering --- .../src/routes/v3/analytics_get/facets.rs | 6 +- .../src/routes/v3/analytics_get/mod.rs | 460 ++++++++++++++++-- apps/labrinth/src/routes/v3/users.rs | 9 +- 3 files changed, 422 insertions(+), 53 deletions(-) diff --git a/apps/labrinth/src/routes/v3/analytics_get/facets.rs b/apps/labrinth/src/routes/v3/analytics_get/facets.rs index 8028048687..c40a6ffb4a 100644 --- a/apps/labrinth/src/routes/v3/analytics_get/facets.rs +++ b/apps/labrinth/src/routes/v3/analytics_get/facets.rs @@ -11,11 +11,7 @@ use crate::{ models::{DBProjectId, DBUser, DBVersionId}, redis::RedisPool, }, - models::{ - ids::VersionId, - pats::Scopes, - v3::analytics::DownloadReason, - }, + models::{ids::VersionId, pats::Scopes, v3::analytics::DownloadReason}, queue::session::AuthQueue, routes::ApiError, }; diff --git a/apps/labrinth/src/routes/v3/analytics_get/mod.rs b/apps/labrinth/src/routes/v3/analytics_get/mod.rs index 594cbc49ad..63986d65f1 100644 --- a/apps/labrinth/src/routes/v3/analytics_get/mod.rs +++ b/apps/labrinth/src/routes/v3/analytics_get/mod.rs @@ -110,25 +110,31 @@ pub enum TimeRangeResolution { #[derive(Debug, Default, Serialize, Deserialize, utoipa::ToSchema)] pub struct ReturnMetrics { /// How many times a project page has been viewed. - pub project_views: Option>, + pub project_views: Option>, /// How many times a project has been downloaded. - pub project_downloads: Option>, + pub project_downloads: + Option>, /// How long users have been playing a project. - pub project_playtime: Option>, + pub project_playtime: + Option>, /// How much payout revenue a project has generated. - pub project_revenue: Option>, + pub project_revenue: + Option>, /// How many times an affiliate code has been clicked. - pub affiliate_code_clicks: Option>, + pub affiliate_code_clicks: + Option>, /// How many times a product has been purchased with an affiliate code. - pub affiliate_code_conversions: - Option>, + pub affiliate_code_conversions: Option< + Metrics, + >, /// How much payout revenue an affiliate code has generated. - pub affiliate_code_revenue: Option>, + pub affiliate_code_revenue: + Option>, } /// See [`ReturnMetrics`]. #[derive(Debug, Serialize, Deserialize, utoipa::ToSchema)] -pub struct Metrics { +pub struct Metrics { /// When collecting metrics, what fields do we want to group the results by? /// /// For example, if we have two views entries: @@ -143,7 +149,13 @@ pub struct Metrics { /// aggregate of the two rows: /// - `{ "project_id": "abcdefgh", "count": 8 }` #[serde(default = "Vec::default")] - pub bucket_by: Vec, + pub bucket_by: Vec, + /// Filters to apply before aggregating this metric. + /// + /// Values within one field are ORed together. Different fields are ANDed + /// together. An empty list means that field is not filtered. + #[serde(default)] + pub filter_by: FilterBy, } /// Fields for [`ReturnMetrics::project_views`]. @@ -166,6 +178,23 @@ pub enum ProjectViewsField { Country, } +/// Filters for [`ReturnMetrics::project_views`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectViewsFilters { + /// Referrer domains to include. + #[serde(default)] + pub domain: Vec, + /// Modrinth site paths to include. + #[serde(default)] + pub site_path: Vec, + /// Monetization states to include. + #[serde(default)] + pub monetized: Vec, + /// Country codes to include. + #[serde(default)] + pub country: Vec, +} + /// Fields for [`ReturnMetrics::project_downloads`]. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, @@ -194,6 +223,35 @@ pub enum ProjectDownloadsField { Loader, } +/// Filters for [`ReturnMetrics::project_downloads`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectDownloadsFilters { + /// Version IDs to include. + #[serde(default)] + pub version_id: Vec, + /// Referrer domains to include. + #[serde(default)] + pub domain: Vec, + /// Normalized download sources to include. + #[serde(default)] + pub user_agent: Vec, + /// Monetization states to include. + #[serde(default)] + pub monetized: Vec, + /// Country codes to include. + #[serde(default)] + pub country: Vec, + /// Download reasons to include. + #[serde(default)] + pub reason: Vec, + /// Game versions to include. + #[serde(default)] + pub game_version: Vec, + /// Loaders to include. + #[serde(default)] + pub loader: Vec, +} + /// Fields for [`ReturnMetrics::project_playtime`]. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, @@ -214,6 +272,23 @@ pub enum ProjectPlaytimeField { Country, } +/// Filters for [`ReturnMetrics::project_playtime`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectPlaytimeFilters { + /// Version IDs to include. + #[serde(default)] + pub version_id: Vec, + /// Loaders to include. + #[serde(default)] + pub loader: Vec, + /// Game versions to include. + #[serde(default)] + pub game_version: Vec, + /// Country codes to include. + #[serde(default)] + pub country: Vec, +} + /// Fields for [`ReturnMetrics::project_revenue`]. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, @@ -224,6 +299,10 @@ pub enum ProjectRevenueField { ProjectId, } +/// Filters for [`ReturnMetrics::project_revenue`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectRevenueFilters {} + /// Fields for [`ReturnMetrics::affiliate_code_clicks`]. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, @@ -234,6 +313,14 @@ pub enum AffiliateCodeClicksField { AffiliateCodeId, } +/// Filters for [`ReturnMetrics::affiliate_code_clicks`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeClicksFilters { + /// Affiliate code IDs to include. + #[serde(default)] + pub affiliate_code_id: Vec, +} + /// Fields for [`ReturnMetrics::affiliate_code_conversions`]. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, @@ -244,6 +331,14 @@ pub enum AffiliateCodeConversionsField { AffiliateCodeId, } +/// Filters for [`ReturnMetrics::affiliate_code_conversions`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeConversionsFilters { + /// Affiliate code IDs to include. + #[serde(default)] + pub affiliate_code_id: Vec, +} + /// Fields for [`ReturnMetrics::affiliate_code_revenue`]. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, @@ -254,6 +349,14 @@ pub enum AffiliateCodeRevenueField { AffiliateCodeId, } +/// Filters for [`ReturnMetrics::affiliate_code_revenue`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeRevenueFilters { + /// Affiliate code IDs to include. + #[serde(default)] + pub affiliate_code_id: Vec, +} + /// Minimum width of a [`TimeSlice`], controlled by [`TimeRange::resolution`]. pub const MIN_RESOLUTION: TimeDelta = TimeDelta::minutes(60); @@ -510,6 +613,10 @@ mod query { const USE_SITE_PATH: &str = "{use_site_path: Bool}"; const USE_MONETIZED: &str = "{use_monetized: Bool}"; const USE_COUNTRY: &str = "{use_country: Bool}"; + const FILTER_DOMAIN: &str = "{filter_domain: Array(String)}"; + const FILTER_SITE_PATH: &str = "{filter_site_path: Array(String)}"; + const FILTER_MONETIZED: &str = "{filter_monetized: UInt8}"; + const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; formatcp!( "SELECT @@ -527,6 +634,10 @@ mod query { -- not the possibly-zero one, -- by using `views.project_id` instead of `project_id` AND views.project_id IN {PROJECT_IDS} + AND (empty({FILTER_DOMAIN}) OR views.domain IN {FILTER_DOMAIN}) + AND (empty({FILTER_SITE_PATH}) OR views.site_path IN {FILTER_SITE_PATH}) + AND ({FILTER_MONETIZED} = 2 OR CAST(views.monetized AS UInt8) = {FILTER_MONETIZED}) + AND (empty({FILTER_COUNTRY}) OR views.country IN {FILTER_COUNTRY}) GROUP BY bucket, project_id, domain, site_path, monetized, country " ) @@ -557,6 +668,14 @@ mod query { const USE_REASON: &str = "{use_reason: Bool}"; const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; const USE_LOADER: &str = "{use_loader: Bool}"; + const FILTER_DOMAIN: &str = "{filter_domain: Array(String)}"; + const FILTER_VERSION_ID: &str = "{filter_version_id: Array(UInt64)}"; + const FILTER_MONETIZED: &str = "{filter_monetized: UInt8}"; + const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; + const FILTER_REASON: &str = "{filter_reason: Array(String)}"; + const FILTER_GAME_VERSION: &str = + "{filter_game_version: Array(String)}"; + const FILTER_LOADER: &str = "{filter_loader: Array(String)}"; formatcp!( "SELECT @@ -578,6 +697,13 @@ mod query { -- not the possibly-zero one, -- by using `downloads.project_id` instead of `project_id` AND downloads.project_id IN {PROJECT_IDS} + AND (empty({FILTER_DOMAIN}) OR downloads.domain IN {FILTER_DOMAIN}) + AND (empty({FILTER_VERSION_ID}) OR downloads.version_id IN {FILTER_VERSION_ID}) + AND ({FILTER_MONETIZED} = 2 OR CAST(downloads.user_id != 0 AS UInt8) = {FILTER_MONETIZED}) + AND (empty({FILTER_COUNTRY}) OR downloads.country IN {FILTER_COUNTRY}) + AND (empty({FILTER_REASON}) OR downloads.reason IN {FILTER_REASON}) + AND (empty({FILTER_GAME_VERSION}) OR downloads.game_version IN {FILTER_GAME_VERSION}) + AND (empty({FILTER_LOADER}) OR downloads.loader IN {FILTER_LOADER}) GROUP BY bucket, project_id, domain, user_agent, version_id, monetized, country, reason, game_version, loader" ) }; @@ -601,6 +727,11 @@ mod query { const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; const USE_COUNTRY: &str = "{use_country: Bool}"; const PARENT_VERSION_IDS: &str = "{parent_version_ids: Array(UInt64)}"; + const FILTER_VERSION_ID: &str = "{filter_version_id: Array(UInt64)}"; + const FILTER_LOADER: &str = "{filter_loader: Array(String)}"; + const FILTER_GAME_VERSION: &str = + "{filter_game_version: Array(String)}"; + const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; formatcp!( "SELECT @@ -626,6 +757,10 @@ mod query { WHERE recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} AND playtime.project_id IN {PROJECT_IDS} + AND (empty({FILTER_VERSION_ID}) OR playtime.version_id IN {FILTER_VERSION_ID}) + AND (empty({FILTER_LOADER}) OR playtime.loader IN {FILTER_LOADER}) + AND (empty({FILTER_GAME_VERSION}) OR playtime.game_version IN {FILTER_GAME_VERSION}) + AND (empty({FILTER_COUNTRY}) OR playtime.country IN {FILTER_COUNTRY}) UNION ALL @@ -642,6 +777,10 @@ mod query { WHERE recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} AND parent IN {PARENT_VERSION_IDS} + AND (empty({FILTER_VERSION_ID}) OR playtime.version_id IN {FILTER_VERSION_ID}) + AND (empty({FILTER_LOADER}) OR playtime.loader IN {FILTER_LOADER}) + AND (empty({FILTER_GAME_VERSION}) OR playtime.game_version IN {FILTER_GAME_VERSION}) + AND (empty({FILTER_COUNTRY}) OR playtime.country IN {FILTER_COUNTRY}) ) GROUP BY bucket, project_id, parent_version_id, version_id, loader, game_version, country" ) @@ -657,6 +796,8 @@ mod query { pub const AFFILIATE_CODE_CLICKS: &str = { const USE_AFFILIATE_CODE_ID: &str = "{use_affiliate_code_id: Bool}"; const AFFILIATE_CODE_IDS: &str = "{affiliate_code_ids: Array(UInt64)}"; + const FILTER_AFFILIATE_CODE_ID: &str = + "{filter_affiliate_code_id: Array(UInt64)}"; formatcp!( "SELECT @@ -670,6 +811,7 @@ mod query { -- not the possibly-zero one, -- by using `affiliate_code_clicks.affiliate_code_id` instead of `project_id` -- AND affiliate_code_clicks.affiliate_code_id IN {AFFILIATE_CODE_IDS} + AND (empty({FILTER_AFFILIATE_CODE_ID}) OR affiliate_code_id IN {FILTER_AFFILIATE_CODE_ID}) GROUP BY bucket, affiliate_code_id" ) }; @@ -812,6 +954,25 @@ pub async fn fetch_analytics( ("use_monetized", uses(F::Monetized)), ("use_country", uses(F::Country)), ], + vec![ + ClickhouseFilterParam::String( + "filter_domain", + &metrics.filter_by.domain, + ), + ClickhouseFilterParam::String( + "filter_site_path", + &metrics.filter_by.site_path, + ), + ClickhouseFilterParam::Bool( + "filter_monetized", + &metrics.filter_by.monetized, + ), + ClickhouseFilterParam::String( + "filter_country", + &metrics.filter_by.country, + ), + ], + |_| true, |row| row.bucket, |row| { let country = if uses(F::Country) { @@ -842,14 +1003,16 @@ pub async fn fetch_analytics( use ProjectDownloadsField as F; let uses = |field| metrics.bucket_by.contains(&field); - query_clickhouse::( + query_clickhouse_downloads( &mut query_clickhouse_cx, - query::DOWNLOADS, - ClickhouseQueryParams::PROJECT_IDS, &[ ("use_project_id", uses(F::ProjectId)), ("use_domain", uses(F::Domain)), - ("use_user_agent", uses(F::UserAgent)), + ( + "use_user_agent", + uses(F::UserAgent) + || !metrics.filter_by.user_agent.is_empty(), + ), ("use_version_id", uses(F::VersionId)), ("use_monetized", uses(F::Monetized)), ("use_country", uses(F::Country)), @@ -857,37 +1020,38 @@ pub async fn fetch_analytics( ("use_game_version", uses(F::GameVersion)), ("use_loader", uses(F::Loader)), ], - |row| row.bucket, - |row| { - let country = if uses(F::Country) { - Some(condense_country(row.country, row.downloads)) - } else { - None - }; - AnalyticsData::Project(ProjectAnalytics { - source_project: row.project_id.into(), - metrics: ProjectMetrics::Downloads(ProjectDownloads { - domain: none_if_empty(row.domain), - user_agent: if uses(F::UserAgent) { - normalize_download_source(&row.user_agent) - } else { - None - }, - version_id: none_if_zero_version_id(row.version_id), - monetized: match row.monetized { - 0 => Some(false), - 1 => Some(true), - _ => None, - }, - country, - reason: none_if_empty(row.reason) - .and_then(|s| s.parse().ok()), - game_version: none_if_empty(row.game_version), - loader: none_if_empty(row.loader), - downloads: row.downloads, - }), - }) - }, + &metrics.filter_by, + uses(F::UserAgent), + vec![ + ClickhouseFilterParam::String( + "filter_domain", + &metrics.filter_by.domain, + ), + ClickhouseFilterParam::VersionId( + "filter_version_id", + &metrics.filter_by.version_id, + ), + ClickhouseFilterParam::Bool( + "filter_monetized", + &metrics.filter_by.monetized, + ), + ClickhouseFilterParam::String( + "filter_country", + &metrics.filter_by.country, + ), + ClickhouseFilterParam::DownloadReason( + "filter_reason", + &metrics.filter_by.reason, + ), + ClickhouseFilterParam::String( + "filter_game_version", + &metrics.filter_by.game_version, + ), + ClickhouseFilterParam::String( + "filter_loader", + &metrics.filter_by.loader, + ), + ], ) .await?; } @@ -906,6 +1070,24 @@ pub async fn fetch_analytics( ("use_game_version", uses(F::GameVersion)), ("use_country", uses(F::Country)), ], + vec![ + ClickhouseFilterParam::VersionId( + "filter_version_id", + &metrics.filter_by.version_id, + ), + ClickhouseFilterParam::String( + "filter_loader", + &metrics.filter_by.loader, + ), + ClickhouseFilterParam::String( + "filter_game_version", + &metrics.filter_by.game_version, + ), + ClickhouseFilterParam::String( + "filter_country", + &metrics.filter_by.country, + ), + ], ) .await?; } @@ -921,6 +1103,11 @@ pub async fn fetch_analytics( query::AFFILIATE_CODE_CLICKS, ClickhouseQueryParams::empty(), &[("use_affiliate_code_id", uses(F::AffiliateCodeId))], + vec![ClickhouseFilterParam::AffiliateCodeId( + "filter_affiliate_code_id", + &metrics.filter_by.affiliate_code_id, + )], + |_| true, |row| row.bucket, |row| { AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { @@ -990,6 +1177,12 @@ pub async fn fetch_analytics( } if let Some(metrics) = &req.return_metrics.affiliate_code_conversions { + let filter_affiliate_code_ids = metrics + .filter_by + .affiliate_code_id + .iter() + .map(|id| DBAffiliateCodeId::from(*id).0) + .collect::>(); let mut rows = sqlx::query!( "SELECT WIDTH_BUCKET( @@ -1008,12 +1201,14 @@ pub async fn fetch_analytics( ac.affiliate = $4 AND usa.created_at BETWEEN $1 AND $2 AND c.status = 'succeeded' + AND (cardinality($6::bigint[]) = 0 OR affiliate_code = ANY($6)) GROUP BY bucket, affiliate_code", req.time_range.start, req.time_range.end, num_time_slices as i64, DBUserId::from(user.id) as DBUserId, metrics.bucket_by.contains(&AffiliateCodeConversionsField::AffiliateCodeId), + &filter_affiliate_code_ids, ) .fetch(&**pool); while let Some(row) = rows.next().await.transpose()? { @@ -1049,6 +1244,12 @@ pub async fn fetch_analytics( return Err(AuthenticationError::InvalidCredentials.into()); } + let filter_affiliate_code_ids = metrics + .filter_by + .affiliate_code_id + .iter() + .map(|id| DBAffiliateCodeId::from(*id).0) + .collect::>(); let mut rows = sqlx::query!( "SELECT WIDTH_BUCKET( @@ -1064,12 +1265,14 @@ pub async fn fetch_analytics( user_id = $4 AND payouts_values.affiliate_code_source IS NOT NULL AND created BETWEEN $1 AND $2 + AND (cardinality($6::bigint[]) = 0 OR affiliate_code_source = ANY($6)) GROUP BY bucket, affiliate_code_source", req.time_range.start, req.time_range.end, num_time_slices as i64, DBUserId::from(user.id) as DBUserId, metrics.bucket_by.contains(&AffiliateCodeRevenueField::AffiliateCodeId), + &filter_affiliate_code_ids, ) .fetch(&**pool); while let Some(row) = rows.next().await.transpose()? { @@ -1246,6 +1449,20 @@ struct PlaytimeBucket { country: Option, } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct DownloadBucket { + bucket: u64, + project_id: DBProjectId, + domain: Option, + user_agent: Option, + version_id: Option, + monetized: Option, + country: Option, + reason: Option, + game_version: Option, + loader: Option, +} + #[derive(Debug, Clone, Copy, Default)] struct ClickhouseQueryParams { project_ids: bool, @@ -1253,6 +1470,49 @@ struct ClickhouseQueryParams { affiliate_code_ids: bool, } +enum ClickhouseFilterParam<'a> { + String(&'static str, &'a [String]), + Bool(&'static str, &'a [bool]), + VersionId(&'static str, &'a [VersionId]), + AffiliateCodeId(&'static str, &'a [AffiliateCodeId]), + DownloadReason(&'static str, &'a [DownloadReason]), +} + +impl ClickhouseFilterParam<'_> { + fn bind(self, query: clickhouse::query::Query) -> clickhouse::query::Query { + match self { + Self::String(name, values) => query.param(name, values), + Self::Bool(name, values) => { + let value = match values { + [false] => 0, + [true] => 1, + _ => 2, + }; + query.param(name, value) + } + Self::VersionId(name, values) => { + let values = values + .iter() + .map(|id| DBVersionId::from(*id)) + .collect::>(); + query.param(name, values) + } + Self::AffiliateCodeId(name, values) => { + let values = values + .iter() + .map(|id| DBAffiliateCodeId::from(*id)) + .collect::>(); + query.param(name, values) + } + Self::DownloadReason(name, values) => { + let values = + values.iter().map(ToString::to_string).collect::>(); + query.param(name, values) + } + } + } +} + impl ClickhouseQueryParams { const PROJECT_IDS: Self = Self { project_ids: true, @@ -1287,6 +1547,7 @@ async fn query_clickhouse_playtime( cx: &mut QueryClickhouseContext<'_>, parent_version_projects: &HashMap, use_columns: &[(&str, bool)], + filter_params: Vec>, ) -> Result<(), ApiError> { let uses = |name| { use_columns @@ -1304,6 +1565,9 @@ async fn query_clickhouse_playtime( for (param_name, used) in use_columns { query = query.param(param_name, used) } + for filter_param in filter_params { + query = filter_param.bind(query); + } let mut cursor = query.fetch::()?; let mut buckets = HashMap::::new(); @@ -1355,11 +1619,111 @@ async fn query_clickhouse_playtime( Ok(()) } +async fn query_clickhouse_downloads( + cx: &mut QueryClickhouseContext<'_>, + use_columns: &[(&str, bool)], + filters: &ProjectDownloadsFilters, + bucket_by_user_agent: bool, + filter_params: Vec>, +) -> Result<(), ApiError> { + let uses = |name| { + use_columns + .iter() + .any(|(column_name, used)| *column_name == name && *used) + }; + let mut query = cx + .clickhouse + .query(query::DOWNLOADS) + .param("time_range_start", cx.req.time_range.start.timestamp()) + .param("time_range_end", cx.req.time_range.end.timestamp()) + .param("time_slices", cx.time_slices.len()) + .param("project_ids", cx.project_ids); + for (param_name, used) in use_columns { + query = query.param(param_name, used) + } + for filter_param in filter_params { + query = filter_param.bind(query); + } + + let mut cursor = query.fetch::()?; + let mut buckets = HashMap::::new(); + + while let Some(row) = cursor.next().await? { + let normalized_source = normalize_download_source(&row.user_agent); + if !filters.user_agent.is_empty() + && !normalized_source + .as_ref() + .is_some_and(|source| filters.user_agent.contains(source)) + { + continue; + } + + let key = DownloadBucket { + bucket: row.bucket, + project_id: row.project_id, + domain: uses("use_domain").then(|| row.domain.clone()), + user_agent: bucket_by_user_agent + .then_some(normalized_source) + .flatten(), + version_id: uses("use_version_id").then_some(row.version_id), + monetized: if uses("use_monetized") { + match row.monetized { + 0 => Some(false), + 1 => Some(true), + _ => None, + } + } else { + None + }, + country: uses("use_country").then(|| row.country.clone()), + reason: if uses("use_reason") { + none_if_empty(row.reason.clone()).and_then(|s| s.parse().ok()) + } else { + None + }, + game_version: uses("use_game_version") + .then(|| row.game_version.clone()), + loader: uses("use_loader").then(|| row.loader.clone()), + }; + + *buckets.entry(key).or_default() += row.downloads; + } + + for (key, downloads) in buckets { + add_to_time_slice( + cx.time_slices, + key.bucket as usize, + AnalyticsData::Project(ProjectAnalytics { + source_project: key.project_id.into(), + metrics: ProjectMetrics::Downloads(ProjectDownloads { + domain: key.domain.and_then(none_if_empty), + user_agent: key.user_agent, + version_id: key + .version_id + .and_then(none_if_zero_version_id), + monetized: key.monetized, + country: key + .country + .map(|country| condense_country(country, downloads)), + reason: key.reason, + game_version: key.game_version.and_then(none_if_empty), + loader: key.loader.and_then(none_if_empty), + downloads, + }), + }), + )?; + } + + Ok(()) +} + async fn query_clickhouse( cx: &mut QueryClickhouseContext<'_>, query: &str, params: ClickhouseQueryParams, use_columns: &[(&str, bool)], + filter_params: Vec>, + row_filter: impl Fn(&Row::Value<'_>) -> bool, // I hate using the hidden type Row::Value here, but it's what next() returns, so I see no other option row_get_bucket: impl Fn(&Row::Value<'_>) -> u64, row_to_analytics: impl Fn(Row::Value<'_>) -> AnalyticsData, @@ -1385,9 +1749,15 @@ where for (param_name, used) in use_columns { query = query.param(param_name, used) } + for filter_param in filter_params { + query = filter_param.bind(query); + } let mut cursor = query.fetch::()?; while let Some(row) = cursor.next().await? { + if !row_filter(&row) { + continue; + } let bucket = row_get_bucket(&row) as usize; add_to_time_slice(cx.time_slices, bucket, row_to_analytics(row))?; } diff --git a/apps/labrinth/src/routes/v3/users.rs b/apps/labrinth/src/routes/v3/users.rs index 5c0329f002..bfb366a73a 100644 --- a/apps/labrinth/src/routes/v3/users.rs +++ b/apps/labrinth/src/routes/v3/users.rs @@ -88,8 +88,8 @@ pub async fn all_projects( let user_project_ids = DBUser::get_projects(user.id.into(), &**pool, &redis).await?; - let organization_ids = DBUser::get_organizations(user.id.into(), &**pool) - .await?; + let organization_ids = + DBUser::get_organizations(user.id.into(), &**pool).await?; let organizations_data = DBOrganization::get_many_ids(&organization_ids, &**pool, &redis) .await?; @@ -115,7 +115,10 @@ pub async fn all_projects( let mut team_groups = HashMap::new(); for member in teams_data { - team_groups.entry(member.team_id).or_insert(vec![]).push(member); + team_groups + .entry(member.team_id) + .or_insert(vec![]) + .push(member); } let mut organizations = HashMap::new(); From f32939b08224e81f56b0bf862b02c8f28eaa2d42 Mon Sep 17 00:00:00 2001 From: aecsocket Date: Fri, 22 May 2026 19:08:07 +0100 Subject: [PATCH 6/8] prepare --- ...a1371fb63be744f1d87c68229342ffadbe998.json | 22 +++++++++++++++++++ ...5efefeedfc0fe11eebe9c939725ea0dad677.json} | 7 +++--- ...4b1f2d7da89f733dce227911753c30238eea.json} | 7 +++--- 3 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 apps/labrinth/.sqlx/query-12fa322e09465aab925ac33f8b2a1371fb63be744f1d87c68229342ffadbe998.json rename apps/labrinth/.sqlx/{query-9152c0d7e7f508491b601c16c6eed05e2333475e96007180acda6086ee2825c0.json => query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json} (84%) rename apps/labrinth/.sqlx/{query-eeea6cad39d645d3f5a0a4115c8350e08b7850a09a86c62d0de371a1caed7c07.json => query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json} (83%) diff --git a/apps/labrinth/.sqlx/query-12fa322e09465aab925ac33f8b2a1371fb63be744f1d87c68229342ffadbe998.json b/apps/labrinth/.sqlx/query-12fa322e09465aab925ac33f8b2a1371fb63be744f1d87c68229342ffadbe998.json new file mode 100644 index 0000000000..83a7b34b02 --- /dev/null +++ b/apps/labrinth/.sqlx/query-12fa322e09465aab925ac33f8b2a1371fb63be744f1d87c68229342ffadbe998.json @@ -0,0 +1,22 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT id\n FROM versions\n WHERE mod_id = ANY($1)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int8Array" + ] + }, + "nullable": [ + false + ] + }, + "hash": "12fa322e09465aab925ac33f8b2a1371fb63be744f1d87c68229342ffadbe998" +} diff --git a/apps/labrinth/.sqlx/query-9152c0d7e7f508491b601c16c6eed05e2333475e96007180acda6086ee2825c0.json b/apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json similarity index 84% rename from apps/labrinth/.sqlx/query-9152c0d7e7f508491b601c16c6eed05e2333475e96007180acda6086ee2825c0.json rename to apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json index fe4b626066..28e51da655 100644 --- a/apps/labrinth/.sqlx/query-9152c0d7e7f508491b601c16c6eed05e2333475e96007180acda6086ee2825c0.json +++ b/apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM usa.created_at)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n CASE WHEN $5 THEN affiliate_code ELSE 0 END AS affiliate_code,\n COUNT(*) AS conversions\n FROM users_subscriptions_affiliations usa\n INNER JOIN affiliate_codes ac ON ac.id = usa.affiliate_code\n INNER JOIN users_subscriptions us ON us.id = usa.subscription_id\n INNER JOIN charges c ON c.subscription_id = us.id\n WHERE\n ac.affiliate = $4\n AND usa.created_at BETWEEN $1 AND $2\n AND c.status = 'succeeded'\n GROUP BY bucket, affiliate_code", + "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM usa.created_at)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n CASE WHEN $5 THEN affiliate_code ELSE 0 END AS affiliate_code,\n COUNT(*) AS conversions\n FROM users_subscriptions_affiliations usa\n INNER JOIN affiliate_codes ac ON ac.id = usa.affiliate_code\n INNER JOIN users_subscriptions us ON us.id = usa.subscription_id\n INNER JOIN charges c ON c.subscription_id = us.id\n WHERE\n ac.affiliate = $4\n AND usa.created_at BETWEEN $1 AND $2\n AND c.status = 'succeeded'\n AND (cardinality($6::bigint[]) = 0 OR affiliate_code = ANY($6))\n GROUP BY bucket, affiliate_code", "describe": { "columns": [ { @@ -25,7 +25,8 @@ "Timestamptz", "Int4", "Int8", - "Bool" + "Bool", + "Int8Array" ] }, "nullable": [ @@ -34,5 +35,5 @@ null ] }, - "hash": "9152c0d7e7f508491b601c16c6eed05e2333475e96007180acda6086ee2825c0" + "hash": "4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677" } diff --git a/apps/labrinth/.sqlx/query-eeea6cad39d645d3f5a0a4115c8350e08b7850a09a86c62d0de371a1caed7c07.json b/apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json similarity index 83% rename from apps/labrinth/.sqlx/query-eeea6cad39d645d3f5a0a4115c8350e08b7850a09a86c62d0de371a1caed7c07.json rename to apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json index 41ee8d256a..f059dc932a 100644 --- a/apps/labrinth/.sqlx/query-eeea6cad39d645d3f5a0a4115c8350e08b7850a09a86c62d0de371a1caed7c07.json +++ b/apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM created)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n CASE WHEN $5 THEN affiliate_code_source ELSE 0 END AS affiliate_code_source,\n SUM(amount) amount_sum\n FROM payouts_values\n WHERE\n user_id = $4\n AND payouts_values.affiliate_code_source IS NOT NULL\n AND created BETWEEN $1 AND $2\n GROUP BY bucket, affiliate_code_source", + "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM created)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n CASE WHEN $5 THEN affiliate_code_source ELSE 0 END AS affiliate_code_source,\n SUM(amount) amount_sum\n FROM payouts_values\n WHERE\n user_id = $4\n AND payouts_values.affiliate_code_source IS NOT NULL\n AND created BETWEEN $1 AND $2\n AND (cardinality($6::bigint[]) = 0 OR affiliate_code_source = ANY($6))\n GROUP BY bucket, affiliate_code_source", "describe": { "columns": [ { @@ -25,7 +25,8 @@ "Timestamptz", "Int4", "Int8", - "Bool" + "Bool", + "Int8Array" ] }, "nullable": [ @@ -34,5 +35,5 @@ null ] }, - "hash": "eeea6cad39d645d3f5a0a4115c8350e08b7850a09a86c62d0de371a1caed7c07" + "hash": "71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea" } From f048bdbca181aab1ca079b2b9b80bf4cec3ce1e9 Mon Sep 17 00:00:00 2001 From: aecsocket Date: Sat, 23 May 2026 17:16:36 +0100 Subject: [PATCH 7/8] Split up analytics metrics into separate modules --- .../metrics/affiliate_code_clicks.rs | 102 ++ .../metrics/affiliate_code_conversions.rs | 125 ++ .../metrics/affiliate_code_revenue.rs | 121 ++ .../routes/v3/analytics_get/metrics/mod.rs | 152 ++ .../metrics/project_downloads.rs | 487 ++++++ .../analytics_get/metrics/project_playtime.rs | 266 ++++ .../analytics_get/metrics/project_revenue.rs | 98 ++ .../v3/analytics_get/metrics/project_views.rs | 181 +++ .../src/routes/v3/analytics_get/mod.rs | 1414 +---------------- 9 files changed, 1593 insertions(+), 1353 deletions(-) create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_clicks.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_conversions.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_revenue.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/mod.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/project_downloads.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/project_playtime.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/project_revenue.rs create mode 100644 apps/labrinth/src/routes/v3/analytics_get/metrics/project_views.rs diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_clicks.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_clicks.rs new file mode 100644 index 0000000000..257c7cb1dd --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_clicks.rs @@ -0,0 +1,102 @@ +use const_format::formatcp; +use serde::{Deserialize, Serialize}; + +use crate::{ + database::models::DBAffiliateCodeId, models::ids::AffiliateCodeId, + routes::ApiError, +}; + +use super::super::{ + ClickhouseFilterParam, ClickhouseQueryParams, QueryClickhouseContext, + query_clickhouse, +}; +use super::{ + AffiliateCodeAnalytics, AffiliateCodeMetrics, AnalyticsData, Metrics, +}; + +const TIME_RANGE_START: &str = "{time_range_start: UInt64}"; +const TIME_RANGE_END: &str = "{time_range_end: UInt64}"; +const TIME_SLICES: &str = "{time_slices: UInt64}"; + +/// Fields for [`super::ReturnMetrics::affiliate_code_clicks`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum AffiliateCodeClicksField { + /// Affiliate code ID. + AffiliateCodeId, +} + +/// Filters for [`super::ReturnMetrics::affiliate_code_clicks`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeClicksFilters { + /// Affiliate code IDs to include. + #[serde(default)] + pub affiliate_code_id: Vec, +} + +/// [`super::ReturnMetrics::affiliate_code_clicks`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeClicks { + /// Total clicks for this bucket. + pub clicks: u64, +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct AffiliateCodeClickRow { + bucket: u64, + affiliate_code_id: DBAffiliateCodeId, + clicks: u64, +} + +const AFFILIATE_CODE_CLICKS: &str = { + const USE_AFFILIATE_CODE_ID: &str = "{use_affiliate_code_id: Bool}"; + const FILTER_AFFILIATE_CODE_ID: &str = + "{filter_affiliate_code_id: Array(UInt64)}"; + + formatcp!( + "SELECT + widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, + if({USE_AFFILIATE_CODE_ID}, affiliate_code_id, 0) AS affiliate_code_id, + COUNT(*) AS clicks + FROM affiliate_code_clicks + WHERE + recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} + -- make sure that the REAL affiliate code id is included, + -- not the possibly-zero one, + -- by using `affiliate_code_clicks.affiliate_code_id` instead of `project_id` + AND (empty({FILTER_AFFILIATE_CODE_ID}) OR affiliate_code_id IN {FILTER_AFFILIATE_CODE_ID}) + GROUP BY bucket, affiliate_code_id" + ) +}; + +pub(crate) async fn fetch( + cx: &mut QueryClickhouseContext<'_>, + metrics: &Metrics, +) -> Result<(), ApiError> { + use AffiliateCodeClicksField as F; + let uses = |field| metrics.bucket_by.contains(&field); + + query_clickhouse::( + cx, + AFFILIATE_CODE_CLICKS, + ClickhouseQueryParams::empty(), + &[("use_affiliate_code_id", uses(F::AffiliateCodeId))], + vec![ClickhouseFilterParam::AffiliateCodeId( + "filter_affiliate_code_id", + &metrics.filter_by.affiliate_code_id, + )], + |_| true, + |row| row.bucket, + |row| { + AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { + source_affiliate_code: row.affiliate_code_id.into(), + metrics: AffiliateCodeMetrics::Clicks(AffiliateCodeClicks { + clicks: row.clicks, + }), + }) + }, + ) + .await +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_conversions.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_conversions.rs new file mode 100644 index 0000000000..33204d4133 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_conversions.rs @@ -0,0 +1,125 @@ +use futures::StreamExt; +use serde::{Deserialize, Serialize}; +use sqlx::Row; + +use crate::{ + database::{ + PgPool, + models::{DBAffiliateCodeId, DBUserId}, + }, + models::ids::AffiliateCodeId, + routes::ApiError, + util::error::Context, +}; + +use super::super::{TimeSlice, add_to_time_slice}; +use super::{ + AffiliateCodeAnalytics, AffiliateCodeMetrics, AnalyticsData, Metrics, +}; + +/// Fields for [`super::ReturnMetrics::affiliate_code_conversions`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum AffiliateCodeConversionsField { + /// Affiliate code ID. + AffiliateCodeId, +} + +/// Filters for [`super::ReturnMetrics::affiliate_code_conversions`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeConversionsFilters { + /// Affiliate code IDs to include. + #[serde(default)] + pub affiliate_code_id: Vec, +} + +/// [`super::ReturnMetrics::affiliate_code_conversions`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeConversions { + /// Total conversions for this bucket. + pub conversions: u64, +} + +pub(crate) async fn fetch( + pool: &PgPool, + time_slices: &mut [TimeSlice], + req: &super::super::GetRequest, + user_id: DBUserId, + num_time_slices: usize, + metrics: &Metrics< + AffiliateCodeConversionsField, + AffiliateCodeConversionsFilters, + >, +) -> Result<(), ApiError> { + let filter_affiliate_code_ids = metrics + .filter_by + .affiliate_code_id + .iter() + .map(|id| DBAffiliateCodeId::from(*id).0) + .collect::>(); + let mut rows = sqlx::query( + "SELECT + WIDTH_BUCKET( + EXTRACT(EPOCH FROM usa.created_at)::bigint, + EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint, + EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint, + $3::integer + ) AS bucket, + CASE WHEN $5 THEN affiliate_code ELSE 0 END AS affiliate_code, + COUNT(*) AS conversions + FROM users_subscriptions_affiliations usa + INNER JOIN affiliate_codes ac ON ac.id = usa.affiliate_code + INNER JOIN users_subscriptions us ON us.id = usa.subscription_id + INNER JOIN charges c ON c.subscription_id = us.id + WHERE + ac.affiliate = $4 + AND usa.created_at BETWEEN $1 AND $2 + AND c.status = 'succeeded' + AND (cardinality($6::bigint[]) = 0 OR affiliate_code = ANY($6)) + GROUP BY bucket, affiliate_code", + ) + .bind(req.time_range.start) + .bind(req.time_range.end) + .bind(num_time_slices as i64) + .bind(user_id as DBUserId) + .bind( + metrics + .bucket_by + .contains(&AffiliateCodeConversionsField::AffiliateCodeId), + ) + .bind(&filter_affiliate_code_ids) + .fetch(pool); + while let Some(row) = rows.next().await.transpose()? { + let bucket = row + .try_get::, _>("bucket")? + .wrap_internal_err("bucket should be non-null - query bug!")?; + let bucket = usize::try_from(bucket).wrap_internal_err_with(|| { + eyre::eyre!( + "bucket value {bucket} does not fit into `usize` - query bug!" + ) + })?; + + let affiliate_code = row.try_get::, _>("affiliate_code")?; + let conversion_count = row.try_get::, _>("conversions")?; + let source_affiliate_code = AffiliateCodeId::from(DBAffiliateCodeId( + affiliate_code.unwrap_or_default(), + )); + let conversions = u64::try_from(conversion_count.unwrap_or_default()) + .unwrap_or(u64::MAX); + + add_to_time_slice( + time_slices, + bucket, + AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { + source_affiliate_code, + metrics: AffiliateCodeMetrics::Conversions( + AffiliateCodeConversions { conversions }, + ), + }), + )?; + } + + Ok(()) +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_revenue.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_revenue.rs new file mode 100644 index 0000000000..883396c194 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/affiliate_code_revenue.rs @@ -0,0 +1,121 @@ +use futures::StreamExt; +use rust_decimal::Decimal; +use serde::{Deserialize, Serialize}; +use sqlx::Row; + +use crate::{ + database::{ + PgPool, + models::{DBAffiliateCodeId, DBUserId}, + }, + models::ids::AffiliateCodeId, + routes::ApiError, + util::error::Context, +}; + +use super::super::{TimeSlice, add_to_time_slice}; +use super::{ + AffiliateCodeAnalytics, AffiliateCodeMetrics, AnalyticsData, Metrics, +}; + +/// Fields for [`super::ReturnMetrics::affiliate_code_revenue`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum AffiliateCodeRevenueField { + /// Affiliate code ID. + AffiliateCodeId, +} + +/// Filters for [`super::ReturnMetrics::affiliate_code_revenue`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeRevenueFilters { + /// Affiliate code IDs to include. + #[serde(default)] + pub affiliate_code_id: Vec, +} + +/// [`super::ReturnMetrics::affiliate_code_revenue`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeRevenue { + /// Total revenue for this bucket. + pub revenue: Decimal, +} + +pub(crate) async fn fetch( + pool: &PgPool, + time_slices: &mut [TimeSlice], + req: &super::super::GetRequest, + user_id: DBUserId, + num_time_slices: usize, + metrics: &Metrics, +) -> Result<(), ApiError> { + let filter_affiliate_code_ids = metrics + .filter_by + .affiliate_code_id + .iter() + .map(|id| DBAffiliateCodeId::from(*id).0) + .collect::>(); + let mut rows = sqlx::query( + "SELECT + WIDTH_BUCKET( + EXTRACT(EPOCH FROM created)::bigint, + EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint, + EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint, + $3::integer + ) AS bucket, + CASE WHEN $5 THEN affiliate_code_source ELSE 0 END AS affiliate_code_source, + SUM(amount) amount_sum + FROM payouts_values + WHERE + user_id = $4 + AND payouts_values.affiliate_code_source IS NOT NULL + AND created BETWEEN $1 AND $2 + AND (cardinality($6::bigint[]) = 0 OR affiliate_code_source = ANY($6)) + GROUP BY bucket, affiliate_code_source", + ) + .bind(req.time_range.start) + .bind(req.time_range.end) + .bind(num_time_slices as i64) + .bind(user_id as DBUserId) + .bind( + metrics + .bucket_by + .contains(&AffiliateCodeRevenueField::AffiliateCodeId), + ) + .bind(&filter_affiliate_code_ids) + .fetch(pool); + while let Some(row) = rows.next().await.transpose()? { + let bucket = row + .try_get::, _>("bucket")? + .wrap_internal_err("bucket should be non-null - query bug!")?; + let bucket = usize::try_from(bucket).wrap_internal_err_with(|| { + eyre::eyre!( + "bucket value {bucket} does not fit into `usize` - query bug!" + ) + })?; + + let affiliate_code_source = + row.try_get::, _>("affiliate_code_source")?; + let source_affiliate_code = AffiliateCodeId::from(DBAffiliateCodeId( + affiliate_code_source.unwrap_or_default(), + )); + let revenue = row + .try_get::, _>("amount_sum")? + .unwrap_or_default(); + + add_to_time_slice( + time_slices, + bucket, + AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { + source_affiliate_code, + metrics: AffiliateCodeMetrics::Revenue(AffiliateCodeRevenue { + revenue, + }), + }), + )?; + } + + Ok(()) +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/mod.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/mod.rs new file mode 100644 index 0000000000..4315088a80 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/mod.rs @@ -0,0 +1,152 @@ +mod affiliate_code_clicks; +mod affiliate_code_conversions; +mod affiliate_code_revenue; +mod project_downloads; +mod project_playtime; +mod project_revenue; +mod project_views; + +use serde::{Deserialize, Serialize}; + +use crate::models::ids::{AffiliateCodeId, ProjectId}; + +pub(crate) use affiliate_code_clicks::fetch as fetch_affiliate_code_clicks; +pub use affiliate_code_clicks::{ + AffiliateCodeClicks, AffiliateCodeClicksField, AffiliateCodeClicksFilters, +}; +pub(crate) use affiliate_code_conversions::fetch as fetch_affiliate_code_conversions; +pub use affiliate_code_conversions::{ + AffiliateCodeConversions, AffiliateCodeConversionsField, + AffiliateCodeConversionsFilters, +}; +pub(crate) use affiliate_code_revenue::fetch as fetch_affiliate_code_revenue; +pub use affiliate_code_revenue::{ + AffiliateCodeRevenue, AffiliateCodeRevenueField, + AffiliateCodeRevenueFilters, +}; +pub use project_downloads::{ + DownloadSource, ProjectDownloads, ProjectDownloadsField, + ProjectDownloadsFilters, +}; +pub(crate) use project_downloads::{ + fetch as fetch_project_downloads, normalize_download_source, +}; +pub(crate) use project_playtime::fetch as fetch_project_playtime; +pub use project_playtime::{ + ProjectPlaytime, ProjectPlaytimeField, ProjectPlaytimeFilters, +}; +pub(crate) use project_revenue::fetch as fetch_project_revenue; +pub use project_revenue::{ + ProjectRevenue, ProjectRevenueField, ProjectRevenueFilters, +}; +pub(crate) use project_views::fetch as fetch_project_views; +pub use project_views::{ProjectViews, ProjectViewsField, ProjectViewsFilters}; + +/// What metrics the caller would like to receive from this analytics get +/// request. +#[derive(Debug, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ReturnMetrics { + /// How many times a project page has been viewed. + pub project_views: Option>, + /// How many times a project has been downloaded. + pub project_downloads: + Option>, + /// How long users have been playing a project. + pub project_playtime: + Option>, + /// How much payout revenue a project has generated. + pub project_revenue: + Option>, + /// How many times an affiliate code has been clicked. + pub affiliate_code_clicks: + Option>, + /// How many times a product has been purchased with an affiliate code. + pub affiliate_code_conversions: Option< + Metrics, + >, + /// How much payout revenue an affiliate code has generated. + pub affiliate_code_revenue: + Option>, +} + +/// See [`ReturnMetrics`]. +#[derive(Debug, Serialize, Deserialize, utoipa::ToSchema)] +pub struct Metrics { + /// When collecting metrics, what fields do we want to group the results by? + /// + /// For example, if we have two views entries: + /// - `{ "project_id": "abcdefgh", "domain": "youtube.com", "count": 5 }` + /// - `{ "project_id": "abcdefgh", "domain": "discord.com", "count": 3 }` + /// + /// If we bucket by `domain`, then we will get two results: + /// - `{ "project_id": "abcdefgh", "domain": "youtube.com", "count": 5 }` + /// - `{ "project_id": "abcdefgh", "domain": "discord.com", "count": 3 }` + /// + /// If we do not bucket by `domain`, we will only get one, which is an + /// aggregate of the two rows: + /// - `{ "project_id": "abcdefgh", "count": 8 }` + #[serde(default = "Vec::default")] + pub bucket_by: Vec, + /// Filters to apply before aggregating this metric. + /// + /// Values within one field are ORed together. Different fields are ANDed + /// together. An empty list means that field is not filtered. + #[serde(default)] + pub filter_by: FilterBy, +} + +/// Metrics collected in a single time slice. +#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] +#[serde(untagged)] // the presence of `source_project`, `source_affiliate_code` determines the kind +pub enum AnalyticsData { + /// Project metrics. + Project(ProjectAnalytics), + AffiliateCode(AffiliateCodeAnalytics), +} + +/// Project metrics. +#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectAnalytics { + /// What project these metrics are for. + pub source_project: ProjectId, + /// Metrics collected. + #[serde(flatten)] + pub metrics: ProjectMetrics, +} + +/// Project metrics of a specific kind. +/// +/// If a field is not included in [`Metrics::bucket_by`], it will be [`None`]. +#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case", tag = "metric_kind")] +pub enum ProjectMetrics { + /// [`ReturnMetrics::project_views`]. + Views(ProjectViews), + /// [`ReturnMetrics::project_downloads`]. + Downloads(ProjectDownloads), + /// [`ReturnMetrics::project_playtime`]. + Playtime(ProjectPlaytime), + /// [`ReturnMetrics::project_revenue`]. + Revenue(ProjectRevenue), +} + +/// Affiliate code metrics. +#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] +pub struct AffiliateCodeAnalytics { + /// What affiliate code these metrics are for. + pub source_affiliate_code: AffiliateCodeId, + /// Metrics collected. + #[serde(flatten)] + pub metrics: AffiliateCodeMetrics, +} + +/// Affiliate code metrics of a specific kind. +/// +/// If a field is not included in [`Metrics::bucket_by`], it will be [`None`]. +#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] +#[serde(rename_all = "snake_case", tag = "metric_kind")] +pub enum AffiliateCodeMetrics { + Clicks(AffiliateCodeClicks), + Conversions(AffiliateCodeConversions), + Revenue(AffiliateCodeRevenue), +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/project_downloads.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_downloads.rs new file mode 100644 index 0000000000..606ddbd717 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_downloads.rs @@ -0,0 +1,487 @@ +use std::{ + collections::HashMap, + sync::{ + LazyLock, + atomic::{AtomicUsize, Ordering}, + }, +}; + +use const_format::formatcp; +use dashmap::DashMap; +use regex::Regex; +use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error as _}; + +use crate::{ + database::models::{DBProjectId, DBVersionId}, + models::{ids::VersionId, v3::analytics::DownloadReason}, + routes::ApiError, +}; + +use super::super::{ + ClickhouseFilterParam, QueryClickhouseContext, add_to_time_slice, + condense_country, none_if_empty, none_if_zero_version_id, +}; +use super::{AnalyticsData, Metrics, ProjectAnalytics, ProjectMetrics}; + +const TIME_RANGE_START: &str = "{time_range_start: UInt64}"; +const TIME_RANGE_END: &str = "{time_range_end: UInt64}"; +const TIME_SLICES: &str = "{time_slices: UInt64}"; +const PROJECT_IDS: &str = "{project_ids: Array(UInt64)}"; + +/// Fields for [`super::ReturnMetrics::project_downloads`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum ProjectDownloadsField { + /// Project ID. + ProjectId, + /// Version ID of this project. + VersionId, + /// Referrer domain which linked to this project. + Domain, + /// Normalized user agent used to download this project. + UserAgent, + /// Whether these downloads were monetized or not. + Monetized, + /// What country these downloads came from. + /// + /// To anonymize the data, the country may be reported as `XX`. + Country, + /// Download reason. + Reason, + /// Game version used for this download. + GameVersion, + /// Mod loader used for this download. + Loader, +} + +/// Filters for [`super::ReturnMetrics::project_downloads`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectDownloadsFilters { + /// Version IDs to include. + #[serde(default)] + pub version_id: Vec, + /// Referrer domains to include. + #[serde(default)] + pub domain: Vec, + /// Normalized download sources to include. + #[serde(default)] + pub user_agent: Vec, + /// Monetization states to include. + #[serde(default)] + pub monetized: Vec, + /// Country codes to include. + #[serde(default)] + pub country: Vec, + /// Download reasons to include. + #[serde(default)] + pub reason: Vec, + /// Game versions to include. + #[serde(default)] + pub game_version: Vec, + /// Loaders to include. + #[serde(default)] + pub loader: Vec, +} + +/// [`super::ReturnMetrics::project_downloads`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectDownloads { + /// [`ProjectDownloadsField::Domain`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) domain: Option, + /// [`ProjectDownloadsField::UserAgent`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) user_agent: Option, + /// [`ProjectDownloadsField::VersionId`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) version_id: Option, + /// [`ProjectDownloadsField::Monetized`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) monetized: Option, + /// [`ProjectDownloadsField::Country`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) country: Option, + /// [`ProjectDownloadsField::Reason`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) reason: Option, + /// [`ProjectDownloadsField::GameVersion`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) game_version: Option, + /// [`ProjectDownloadsField::Loader`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) loader: Option, + /// Total number of downloads for this bucket. + pub(crate) downloads: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, utoipa::ToSchema)] +pub enum DownloadSource { + Website, + ModrinthApp, + ModrinthHosting, + ModrinthMaven, + Other, + Named(String), +} + +impl Serialize for DownloadSource { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + Self::Named(name) => serializer.serialize_str(name), + Self::Website => serializer.serialize_str("website"), + Self::ModrinthApp => serializer.serialize_str("modrinth_app"), + Self::ModrinthHosting => { + serializer.serialize_str("modrinth_hosting") + } + Self::ModrinthMaven => serializer.serialize_str("modrinth_maven"), + Self::Other => serializer.serialize_str("other"), + } + } +} + +impl<'de> Deserialize<'de> for DownloadSource { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let source = String::deserialize(deserializer)?; + Ok(match source.as_str() { + "website" => Self::Website, + "modrinth_app" => Self::ModrinthApp, + "modrinth_hosting" => Self::ModrinthHosting, + "modrinth_maven" => Self::ModrinthMaven, + "other" => Self::Other, + _ if !source.is_empty() => Self::Named(source), + _ => { + return Err(D::Error::custom( + "download source cannot be empty", + )); + } + }) + } +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct DownloadRow { + bucket: u64, + project_id: DBProjectId, + domain: String, + user_agent: String, + version_id: DBVersionId, + monetized: i8, + country: String, + reason: String, + game_version: String, + loader: String, + downloads: u64, +} + +const DOWNLOADS: &str = { + const USE_PROJECT_ID: &str = "{use_project_id: Bool}"; + const USE_DOMAIN: &str = "{use_domain: Bool}"; + const USE_USER_AGENT: &str = "{use_user_agent: Bool}"; + const USE_VERSION_ID: &str = "{use_version_id: Bool}"; + const USE_MONETIZED: &str = "{use_monetized: Bool}"; + const USE_COUNTRY: &str = "{use_country: Bool}"; + const USE_REASON: &str = "{use_reason: Bool}"; + const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; + const USE_LOADER: &str = "{use_loader: Bool}"; + const FILTER_DOMAIN: &str = "{filter_domain: Array(String)}"; + const FILTER_VERSION_ID: &str = "{filter_version_id: Array(UInt64)}"; + const FILTER_MONETIZED: &str = "{filter_monetized: UInt8}"; + const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; + const FILTER_REASON: &str = "{filter_reason: Array(String)}"; + const FILTER_GAME_VERSION: &str = "{filter_game_version: Array(String)}"; + const FILTER_LOADER: &str = "{filter_loader: Array(String)}"; + + formatcp!( + "SELECT + widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, + if({USE_PROJECT_ID}, project_id, 0) AS project_id, + if({USE_DOMAIN}, domain, '') AS domain, + if({USE_USER_AGENT}, user_agent, '') AS user_agent, + if({USE_VERSION_ID}, version_id, 0) AS version_id, + if({USE_MONETIZED}, CAST(user_id != 0 AS Int8), -1) AS monetized, + if({USE_COUNTRY}, country, '') AS country, + if({USE_REASON}, reason, '') AS reason, + if({USE_GAME_VERSION}, game_version, '') AS game_version, + if({USE_LOADER}, loader, '') AS loader, + COUNT(*) AS downloads + FROM downloads + WHERE + recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} + -- make sure that the REAL project id is included, + -- not the possibly-zero one, + -- by using `downloads.project_id` instead of `project_id` + AND downloads.project_id IN {PROJECT_IDS} + AND (empty({FILTER_DOMAIN}) OR downloads.domain IN {FILTER_DOMAIN}) + AND (empty({FILTER_VERSION_ID}) OR downloads.version_id IN {FILTER_VERSION_ID}) + AND ({FILTER_MONETIZED} = 2 OR CAST(downloads.user_id != 0 AS UInt8) = {FILTER_MONETIZED}) + AND (empty({FILTER_COUNTRY}) OR downloads.country IN {FILTER_COUNTRY}) + AND (empty({FILTER_REASON}) OR downloads.reason IN {FILTER_REASON}) + AND (empty({FILTER_GAME_VERSION}) OR downloads.game_version IN {FILTER_GAME_VERSION}) + AND (empty({FILTER_LOADER}) OR downloads.loader IN {FILTER_LOADER}) + GROUP BY bucket, project_id, domain, user_agent, version_id, monetized, country, reason, game_version, loader" + ) +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct DownloadBucket { + bucket: u64, + project_id: DBProjectId, + domain: Option, + user_agent: Option, + version_id: Option, + monetized: Option, + country: Option, + reason: Option, + game_version: Option, + loader: Option, +} + +pub(crate) async fn fetch( + cx: &mut QueryClickhouseContext<'_>, + metrics: &Metrics, +) -> Result<(), ApiError> { + use ProjectDownloadsField as F; + let uses = |field| metrics.bucket_by.contains(&field); + let use_columns = &[ + ("use_project_id", uses(F::ProjectId)), + ("use_domain", uses(F::Domain)), + ( + "use_user_agent", + uses(F::UserAgent) || !metrics.filter_by.user_agent.is_empty(), + ), + ("use_version_id", uses(F::VersionId)), + ("use_monetized", uses(F::Monetized)), + ("use_country", uses(F::Country)), + ("use_reason", uses(F::Reason)), + ("use_game_version", uses(F::GameVersion)), + ("use_loader", uses(F::Loader)), + ]; + + let mut query = cx + .clickhouse + .query(DOWNLOADS) + .param("time_range_start", cx.req.time_range.start.timestamp()) + .param("time_range_end", cx.req.time_range.end.timestamp()) + .param("time_slices", cx.time_slices.len()) + .param("project_ids", cx.project_ids); + for (param_name, used) in use_columns { + query = query.param(param_name, used) + } + for filter_param in [ + ClickhouseFilterParam::String( + "filter_domain", + &metrics.filter_by.domain, + ), + ClickhouseFilterParam::VersionId( + "filter_version_id", + &metrics.filter_by.version_id, + ), + ClickhouseFilterParam::Bool( + "filter_monetized", + &metrics.filter_by.monetized, + ), + ClickhouseFilterParam::String( + "filter_country", + &metrics.filter_by.country, + ), + ClickhouseFilterParam::DownloadReason( + "filter_reason", + &metrics.filter_by.reason, + ), + ClickhouseFilterParam::String( + "filter_game_version", + &metrics.filter_by.game_version, + ), + ClickhouseFilterParam::String( + "filter_loader", + &metrics.filter_by.loader, + ), + ] { + query = filter_param.bind(query); + } + + let uses_column = |name| { + use_columns + .iter() + .any(|(column_name, used)| *column_name == name && *used) + }; + let mut cursor = query.fetch::()?; + let mut buckets = HashMap::::new(); + + while let Some(row) = cursor.next().await? { + let normalized_source = normalize_download_source(&row.user_agent); + if !metrics.filter_by.user_agent.is_empty() + && !normalized_source.as_ref().is_some_and(|source| { + metrics.filter_by.user_agent.contains(source) + }) + { + continue; + } + + let key = DownloadBucket { + bucket: row.bucket, + project_id: row.project_id, + domain: uses_column("use_domain").then(|| row.domain.clone()), + user_agent: uses(F::UserAgent) + .then_some(normalized_source) + .flatten(), + version_id: uses_column("use_version_id").then_some(row.version_id), + monetized: if uses_column("use_monetized") { + match row.monetized { + 0 => Some(false), + 1 => Some(true), + _ => None, + } + } else { + None + }, + country: uses_column("use_country").then(|| row.country.clone()), + reason: if uses_column("use_reason") { + none_if_empty(row.reason.clone()).and_then(|s| s.parse().ok()) + } else { + None + }, + game_version: uses_column("use_game_version") + .then(|| row.game_version.clone()), + loader: uses_column("use_loader").then(|| row.loader.clone()), + }; + + *buckets.entry(key).or_default() += row.downloads; + } + + for (key, downloads) in buckets { + add_to_time_slice( + cx.time_slices, + key.bucket as usize, + AnalyticsData::Project(ProjectAnalytics { + source_project: key.project_id.into(), + metrics: ProjectMetrics::Downloads(ProjectDownloads { + domain: key.domain.and_then(none_if_empty), + user_agent: key.user_agent, + version_id: key + .version_id + .and_then(none_if_zero_version_id), + monetized: key.monetized, + country: key + .country + .map(|country| condense_country(country, downloads)), + reason: key.reason, + game_version: key.game_version.and_then(none_if_empty), + loader: key.loader.and_then(none_if_empty), + downloads, + }), + }), + )?; + } + + Ok(()) +} + +#[derive(Debug, Clone, Copy)] +enum DownloadSourcePattern { + Named(&'static str), + Website, + ModrinthApp, + ModrinthHosting, + ModrinthMaven, +} + +impl DownloadSourcePattern { + fn into_source(self) -> DownloadSource { + match self { + Self::Named(name) => DownloadSource::Named(name.into()), + Self::Website => DownloadSource::Website, + Self::ModrinthApp => DownloadSource::ModrinthApp, + Self::ModrinthHosting => DownloadSource::ModrinthHosting, + Self::ModrinthMaven => DownloadSource::ModrinthMaven, + } + } +} + +static DOWNLOAD_SOURCE_PATTERNS: LazyLock> = + LazyLock::new(|| { + use DownloadSourcePattern as P; + + [ + (r"^modrinth/kyros/", P::ModrinthHosting), + (r"^modrinth/theseus/", P::ModrinthApp), + (r"^(Gradle/|Apache-Maven/)", P::ModrinthMaven), + (r"^MultiMC/", P::Named("MultiMC")), + (r"^PrismLauncher/", P::Named("Prism Launcher")), + (r"^PolyMC/", P::Named("PolyMC")), + (r"^FCL/", P::Named("FCL")), + (r"^PCL2/", P::Named("PCL2")), + (r"^HMCL/", P::Named("HMCL")), + (r"^Lunar Client Launcher", P::Named("Lunar Client")), + (r"^PojavLauncher", P::Named("PojavLauncher")), + (r"^ATLauncher/", P::Named("ATLauncher")), + (r"FeatherLauncher/", P::Named("Feather Client")), + ( + r"^FeatherMC/Feather Client Rust Launcher/", + P::Named("Feather Client"), + ), + (r"Feather/[0-9A-Za-z]+", P::Named("Feather Client")), + (r"^PandoraLauncher/", P::Named("Pandora Launcher")), + (r"^unsup", P::Named("unsup")), + (r"nothub/mrpack-install", P::Named("mrpack-install")), + (r"^(packwiz-installer|packwiz/)", P::Named("Packwiz")), + ( + r"^(Mozilla/|Chrome/|Chromium/|Firefox/|Safari/|AppleWebKit/|Edg/|OPR/)", + P::Website, + ), + ] + .into_iter() + .map(|(pattern, source)| { + ( + Regex::new(pattern) + .expect("download source regex should be valid"), + source, + ) + }) + .collect() + }); + +const MAX_DOWNLOAD_SOURCE_CACHE_BYTES: usize = 100 * 1024 * 1024; + +static DOWNLOAD_SOURCE_CACHE: LazyLock< + DashMap>, +> = LazyLock::new(DashMap::new); + +static DOWNLOAD_SOURCE_CACHE_BYTES: AtomicUsize = AtomicUsize::new(0); + +pub(crate) fn normalize_download_source( + user_agent: &str, +) -> Option { + if let Some(source) = DOWNLOAD_SOURCE_CACHE.get(user_agent) { + return source.clone(); + } + + let source = normalize_download_source_uncached(user_agent); + + let key_bytes = user_agent.len(); + let previous_bytes = + DOWNLOAD_SOURCE_CACHE_BYTES.fetch_add(key_bytes, Ordering::Relaxed); + if previous_bytes + key_bytes <= MAX_DOWNLOAD_SOURCE_CACHE_BYTES { + DOWNLOAD_SOURCE_CACHE.insert(user_agent.to_owned(), source.clone()); + } else { + DOWNLOAD_SOURCE_CACHE_BYTES.fetch_sub(key_bytes, Ordering::Relaxed); + } + + source +} + +fn normalize_download_source_uncached( + user_agent: &str, +) -> Option { + DOWNLOAD_SOURCE_PATTERNS.iter().find_map(|(regex, source)| { + regex.is_match(user_agent).then(|| source.into_source()) + }) +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/project_playtime.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_playtime.rs new file mode 100644 index 0000000000..0a21f009d5 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_playtime.rs @@ -0,0 +1,266 @@ +use std::collections::HashMap; + +use const_format::formatcp; +use serde::{Deserialize, Serialize}; + +use crate::{ + database::models::{DBProjectId, DBVersionId}, + models::ids::VersionId, + routes::ApiError, +}; + +use super::super::{ + ClickhouseFilterParam, QueryClickhouseContext, add_to_time_slice, + condense_country, none_if_empty, none_if_zero_version_id, +}; +use super::{AnalyticsData, Metrics, ProjectAnalytics, ProjectMetrics}; + +const TIME_RANGE_START: &str = "{time_range_start: UInt64}"; +const TIME_RANGE_END: &str = "{time_range_end: UInt64}"; +const TIME_SLICES: &str = "{time_slices: UInt64}"; +const PROJECT_IDS: &str = "{project_ids: Array(UInt64)}"; + +/// Fields for [`super::ReturnMetrics::project_playtime`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum ProjectPlaytimeField { + /// Project ID. + ProjectId, + /// Version ID of this project. + VersionId, + /// Game mod loader which was used to count this playtime, e.g. Fabric. + Loader, + /// Game version which this project was played on. + GameVersion, + /// What country this playtime came from. + /// + /// To anonymize the data, the country may be reported as `XX`. + Country, +} + +/// Filters for [`super::ReturnMetrics::project_playtime`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectPlaytimeFilters { + /// Version IDs to include. + #[serde(default)] + pub version_id: Vec, + /// Loaders to include. + #[serde(default)] + pub loader: Vec, + /// Game versions to include. + #[serde(default)] + pub game_version: Vec, + /// Country codes to include. + #[serde(default)] + pub country: Vec, +} + +/// [`super::ReturnMetrics::project_playtime`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectPlaytime { + /// [`ProjectPlaytimeField::VersionId`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) version_id: Option, + /// [`ProjectPlaytimeField::Loader`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) loader: Option, + /// [`ProjectPlaytimeField::GameVersion`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) game_version: Option, + /// [`ProjectPlaytimeField::Country`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) country: Option, + /// Total number of seconds of playtime for this bucket. + pub(crate) seconds: u64, +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct PlaytimeRow { + bucket: u64, + project_id: DBProjectId, + parent_version_id: DBVersionId, + version_id: DBVersionId, + loader: String, + game_version: String, + country: String, + seconds: u64, +} + +const PLAYTIME: &str = { + const USE_PROJECT_ID: &str = "{use_project_id: Bool}"; + const USE_VERSION_ID: &str = "{use_version_id: Bool}"; + const USE_LOADER: &str = "{use_loader: Bool}"; + const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; + const USE_COUNTRY: &str = "{use_country: Bool}"; + const PARENT_VERSION_IDS: &str = "{parent_version_ids: Array(UInt64)}"; + const FILTER_VERSION_ID: &str = "{filter_version_id: Array(UInt64)}"; + const FILTER_LOADER: &str = "{filter_loader: Array(String)}"; + const FILTER_GAME_VERSION: &str = "{filter_game_version: Array(String)}"; + const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; + + formatcp!( + "SELECT + bucket, + if({USE_PROJECT_ID}, source_project_id, 0) AS project_id, + parent_version_id, + version_id, + loader, + game_version, + country, + SUM(seconds) AS seconds + FROM ( + SELECT + widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, + project_id AS source_project_id, + 0 AS parent_version_id, + if({USE_VERSION_ID}, version_id, 0) AS version_id, + if({USE_LOADER}, loader, '') AS loader, + if({USE_GAME_VERSION}, game_version, '') AS game_version, + if({USE_COUNTRY}, country, '') AS country, + seconds + FROM playtime + WHERE + recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} + AND playtime.project_id IN {PROJECT_IDS} + AND (empty({FILTER_VERSION_ID}) OR playtime.version_id IN {FILTER_VERSION_ID}) + AND (empty({FILTER_LOADER}) OR playtime.loader IN {FILTER_LOADER}) + AND (empty({FILTER_GAME_VERSION}) OR playtime.game_version IN {FILTER_GAME_VERSION}) + AND (empty({FILTER_COUNTRY}) OR playtime.country IN {FILTER_COUNTRY}) + + UNION ALL + + SELECT + widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, + 0 AS source_project_id, + parent AS parent_version_id, + if({USE_VERSION_ID}, version_id, 0) AS version_id, + if({USE_LOADER}, loader, '') AS loader, + if({USE_GAME_VERSION}, game_version, '') AS game_version, + if({USE_COUNTRY}, country, '') AS country, + seconds + FROM playtime + WHERE + recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} + AND parent IN {PARENT_VERSION_IDS} + AND (empty({FILTER_VERSION_ID}) OR playtime.version_id IN {FILTER_VERSION_ID}) + AND (empty({FILTER_LOADER}) OR playtime.loader IN {FILTER_LOADER}) + AND (empty({FILTER_GAME_VERSION}) OR playtime.game_version IN {FILTER_GAME_VERSION}) + AND (empty({FILTER_COUNTRY}) OR playtime.country IN {FILTER_COUNTRY}) + ) + GROUP BY bucket, project_id, parent_version_id, version_id, loader, game_version, country" + ) +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct PlaytimeBucket { + bucket: u64, + project_id: DBProjectId, + version_id: Option, + loader: Option, + game_version: Option, + country: Option, +} + +pub(crate) async fn fetch( + cx: &mut QueryClickhouseContext<'_>, + parent_version_projects: &HashMap, + metrics: &Metrics, +) -> Result<(), ApiError> { + use ProjectPlaytimeField as F; + let uses = |field| metrics.bucket_by.contains(&field); + let use_columns = &[ + ("use_project_id", uses(F::ProjectId)), + ("use_version_id", uses(F::VersionId)), + ("use_loader", uses(F::Loader)), + ("use_game_version", uses(F::GameVersion)), + ("use_country", uses(F::Country)), + ]; + let uses_column = |name| { + use_columns + .iter() + .any(|(column_name, used)| *column_name == name && *used) + }; + + let mut query = cx + .clickhouse + .query(PLAYTIME) + .param("time_range_start", cx.req.time_range.start.timestamp()) + .param("time_range_end", cx.req.time_range.end.timestamp()) + .param("time_slices", cx.time_slices.len()) + .param("project_ids", cx.project_ids) + .param("parent_version_ids", cx.parent_version_ids); + for (param_name, used) in use_columns { + query = query.param(param_name, used) + } + for filter_param in [ + ClickhouseFilterParam::VersionId( + "filter_version_id", + &metrics.filter_by.version_id, + ), + ClickhouseFilterParam::String( + "filter_loader", + &metrics.filter_by.loader, + ), + ClickhouseFilterParam::String( + "filter_game_version", + &metrics.filter_by.game_version, + ), + ClickhouseFilterParam::String( + "filter_country", + &metrics.filter_by.country, + ), + ] { + query = filter_param.bind(query); + } + + let mut cursor = query.fetch::()?; + let mut buckets = HashMap::::new(); + + while let Some(row) = cursor.next().await? { + let project_id = + if uses_column("use_project_id") && row.project_id.0 == 0 { + parent_version_projects + .get(&row.parent_version_id) + .copied() + .unwrap_or(row.project_id) + } else { + row.project_id + }; + let key = PlaytimeBucket { + bucket: row.bucket, + project_id, + version_id: uses_column("use_version_id").then_some(row.version_id), + loader: uses_column("use_loader").then(|| row.loader.clone()), + game_version: uses_column("use_game_version") + .then(|| row.game_version.clone()), + country: uses_column("use_country").then(|| row.country.clone()), + }; + + *buckets.entry(key).or_default() += row.seconds; + } + + for (key, seconds) in buckets { + add_to_time_slice( + cx.time_slices, + key.bucket as usize, + AnalyticsData::Project(ProjectAnalytics { + source_project: key.project_id.into(), + metrics: ProjectMetrics::Playtime(ProjectPlaytime { + version_id: key + .version_id + .and_then(none_if_zero_version_id), + loader: key.loader.and_then(none_if_empty), + game_version: key.game_version.and_then(none_if_empty), + country: key + .country + .map(|country| condense_country(country, seconds)), + seconds, + }), + }), + )?; + } + + Ok(()) +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/project_revenue.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_revenue.rs new file mode 100644 index 0000000000..0fec8ee1e4 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_revenue.rs @@ -0,0 +1,98 @@ +use futures::StreamExt; +use rust_decimal::Decimal; +use serde::{Deserialize, Serialize}; +use sqlx::Row; + +use crate::{ + database::{PgPool, models::DBProjectId}, + models::ids::ProjectId, + routes::ApiError, + util::error::Context, +}; + +use super::super::{TimeSlice, add_to_time_slice}; +use super::{AnalyticsData, ProjectAnalytics, ProjectMetrics}; + +/// Fields for [`super::ReturnMetrics::project_revenue`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum ProjectRevenueField { + /// Project ID. + ProjectId, +} + +/// Filters for [`super::ReturnMetrics::project_revenue`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectRevenueFilters {} + +/// [`super::ReturnMetrics::project_revenue`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectRevenue { + /// Total revenue for this bucket. + pub(crate) revenue: Decimal, +} + +pub(crate) async fn fetch( + pool: &PgPool, + time_slices: &mut [TimeSlice], + req: &super::super::GetRequest, + num_time_slices: usize, + project_id_values: &[i64], +) -> Result<(), ApiError> { + let mut rows = sqlx::query( + "SELECT + WIDTH_BUCKET( + EXTRACT(EPOCH FROM created)::bigint, + EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint, + EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint, + $3::integer + ) AS bucket, + mod_id, + SUM(amount) amount_sum + FROM payouts_values + WHERE + -- only project revenue is counted here + -- for affiliate code revenue, see `affiliate_code_revenue` + payouts_values.mod_id IS NOT NULL + AND payouts_values.mod_id = ANY($4) + AND created BETWEEN $1 AND $2 + GROUP BY bucket, mod_id", + ) + .bind(req.time_range.start) + .bind(req.time_range.end) + .bind(num_time_slices as i64) + .bind(project_id_values) + .fetch(pool); + while let Some(row) = rows.next().await.transpose()? { + let bucket = row + .try_get::, _>("bucket")? + .wrap_internal_err("bucket should be non-null - query bug!")?; + let bucket = usize::try_from(bucket).wrap_internal_err_with(|| { + eyre::eyre!( + "bucket value {bucket} does not fit into `usize` - query bug!" + ) + })?; + + let mod_id = row.try_get::, _>("mod_id")?; + let amount_sum = row.try_get::, _>("amount_sum")?; + if let Some(source_project) = + mod_id.map(DBProjectId).map(ProjectId::from) + && let Some(revenue) = amount_sum + { + add_to_time_slice( + time_slices, + bucket, + AnalyticsData::Project(ProjectAnalytics { + source_project, + metrics: ProjectMetrics::Revenue(ProjectRevenue { + revenue, + }), + }), + )?; + } + } + + Ok(()) +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/metrics/project_views.rs b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_views.rs new file mode 100644 index 0000000000..32b19db5d7 --- /dev/null +++ b/apps/labrinth/src/routes/v3/analytics_get/metrics/project_views.rs @@ -0,0 +1,181 @@ +use const_format::formatcp; +use serde::{Deserialize, Serialize}; + +use crate::{database::models::DBProjectId, routes::ApiError}; + +use super::super::{ + ClickhouseFilterParam, ClickhouseQueryParams, QueryClickhouseContext, + condense_country, none_if_empty, query_clickhouse, +}; +use super::{AnalyticsData, Metrics, ProjectAnalytics, ProjectMetrics}; + +const TIME_RANGE_START: &str = "{time_range_start: UInt64}"; +const TIME_RANGE_END: &str = "{time_range_end: UInt64}"; +const TIME_SLICES: &str = "{time_slices: UInt64}"; +const PROJECT_IDS: &str = "{project_ids: Array(UInt64)}"; + +/// Fields for [`super::ReturnMetrics::project_views`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum ProjectViewsField { + /// Project ID. + ProjectId, + /// Referrer domain which linked to this project. + Domain, + /// Modrinth site path which was visited, e.g. `/mod/foo`. + SitePath, + /// Whether these views were monetized or not. + Monetized, + /// What country these views came from. + /// + /// To anonymize the data, the country may be reported as `XX`. + Country, +} + +/// Filters for [`super::ReturnMetrics::project_views`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectViewsFilters { + /// Referrer domains to include. + #[serde(default)] + pub domain: Vec, + /// Modrinth site paths to include. + #[serde(default)] + pub site_path: Vec, + /// Monetization states to include. + #[serde(default)] + pub monetized: Vec, + /// Country codes to include. + #[serde(default)] + pub country: Vec, +} + +/// [`super::ReturnMetrics::project_views`]. +#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ProjectViews { + /// [`ProjectViewsField::Domain`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub domain: Option, + /// [`ProjectViewsField::SitePath`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub site_path: Option, + /// [`ProjectViewsField::Monetized`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub monetized: Option, + /// [`ProjectViewsField::Country`]. + #[serde(skip_serializing_if = "Option::is_none")] + pub country: Option, + /// Total number of views for this bucket. + pub views: u64, +} + +#[derive(Debug, clickhouse::Row, serde::Deserialize)] +struct ViewRow { + bucket: u64, + project_id: DBProjectId, + domain: String, + site_path: String, + monetized: i8, + country: String, + views: u64, +} + +const VIEWS: &str = { + const USE_PROJECT_ID: &str = "{use_project_id: Bool}"; + const USE_DOMAIN: &str = "{use_domain: Bool}"; + const USE_SITE_PATH: &str = "{use_site_path: Bool}"; + const USE_MONETIZED: &str = "{use_monetized: Bool}"; + const USE_COUNTRY: &str = "{use_country: Bool}"; + const FILTER_DOMAIN: &str = "{filter_domain: Array(String)}"; + const FILTER_SITE_PATH: &str = "{filter_site_path: Array(String)}"; + const FILTER_MONETIZED: &str = "{filter_monetized: UInt8}"; + const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; + + formatcp!( + "SELECT + widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, + if({USE_PROJECT_ID}, project_id, 0) AS project_id, + if({USE_DOMAIN}, domain, '') AS domain, + if({USE_SITE_PATH}, site_path, '') AS site_path, + if({USE_MONETIZED}, CAST(monetized AS Int8), -1) AS monetized, + if({USE_COUNTRY}, country, '') AS country, + COUNT(*) AS views + FROM views + WHERE + recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} + -- make sure that the REAL project id is included, + -- not the possibly-zero one, + -- by using `views.project_id` instead of `project_id` + AND views.project_id IN {PROJECT_IDS} + AND (empty({FILTER_DOMAIN}) OR views.domain IN {FILTER_DOMAIN}) + AND (empty({FILTER_SITE_PATH}) OR views.site_path IN {FILTER_SITE_PATH}) + AND ({FILTER_MONETIZED} = 2 OR CAST(views.monetized AS UInt8) = {FILTER_MONETIZED}) + AND (empty({FILTER_COUNTRY}) OR views.country IN {FILTER_COUNTRY}) + GROUP BY bucket, project_id, domain, site_path, monetized, country + " + ) +}; + +pub(crate) async fn fetch( + cx: &mut QueryClickhouseContext<'_>, + metrics: &Metrics, +) -> Result<(), ApiError> { + use ProjectViewsField as F; + let uses = |field| metrics.bucket_by.contains(&field); + + query_clickhouse::( + cx, + VIEWS, + ClickhouseQueryParams::PROJECT_IDS, + &[ + ("use_project_id", uses(F::ProjectId)), + ("use_domain", uses(F::Domain)), + ("use_site_path", uses(F::SitePath)), + ("use_monetized", uses(F::Monetized)), + ("use_country", uses(F::Country)), + ], + vec![ + ClickhouseFilterParam::String( + "filter_domain", + &metrics.filter_by.domain, + ), + ClickhouseFilterParam::String( + "filter_site_path", + &metrics.filter_by.site_path, + ), + ClickhouseFilterParam::Bool( + "filter_monetized", + &metrics.filter_by.monetized, + ), + ClickhouseFilterParam::String( + "filter_country", + &metrics.filter_by.country, + ), + ], + |_| true, + |row| row.bucket, + |row| { + let country = if uses(F::Country) { + Some(condense_country(row.country, row.views)) + } else { + None + }; + AnalyticsData::Project(ProjectAnalytics { + source_project: row.project_id.into(), + metrics: ProjectMetrics::Views(ProjectViews { + domain: none_if_empty(row.domain), + site_path: none_if_empty(row.site_path), + monetized: match row.monetized { + 0 => Some(false), + 1 => Some(true), + _ => None, + }, + country, + views: row.views, + }), + }) + }, + ) + .await +} diff --git a/apps/labrinth/src/routes/v3/analytics_get/mod.rs b/apps/labrinth/src/routes/v3/analytics_get/mod.rs index 63986d65f1..3d763978f9 100644 --- a/apps/labrinth/src/routes/v3/analytics_get/mod.rs +++ b/apps/labrinth/src/routes/v3/analytics_get/mod.rs @@ -8,33 +8,23 @@ //! - this makes it inconvenient to have separate endpoints mod facets; +mod metrics; mod old; -use std::{ - collections::HashMap, - num::NonZeroU64, - sync::{ - LazyLock, - atomic::{AtomicUsize, Ordering}, - }, -}; +use std::{collections::HashMap, num::NonZeroU64}; use crate::database::PgPool; use actix_web::{HttpRequest, post, web}; use chrono::{DateTime, TimeDelta, Utc}; -use dashmap::DashMap; use eyre::eyre; -use futures::StreamExt; -use regex::Regex; -use rust_decimal::Decimal; -use serde::{Deserialize, Deserializer, Serialize, Serializer, de::Error as _}; +use serde::{Deserialize, Serialize}; use crate::{ auth::{AuthenticationError, get_user_from_headers}, database::{ self, DBProject, models::{ - DBAffiliateCode, DBAffiliateCodeId, DBProjectId, DBUser, DBUserId, + DBAffiliateCode, DBAffiliateCodeId, DBProjectId, DBUser, DBVersionId, }, redis::RedisPool, @@ -47,9 +37,11 @@ use crate::{ }, queue::session::AuthQueue, routes::ApiError, - util::error::Context, }; +pub(crate) use metrics::normalize_download_source; +pub use metrics::*; + pub fn config(cfg: &mut utoipa_actix_web::service_config::ServiceConfig) { cfg.service(fetch_analytics); cfg.configure(facets::config); @@ -105,258 +97,6 @@ pub enum TimeRangeResolution { Minutes(NonZeroU64), } -/// What metrics the caller would like to receive from this analytics get -/// request. -#[derive(Debug, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ReturnMetrics { - /// How many times a project page has been viewed. - pub project_views: Option>, - /// How many times a project has been downloaded. - pub project_downloads: - Option>, - /// How long users have been playing a project. - pub project_playtime: - Option>, - /// How much payout revenue a project has generated. - pub project_revenue: - Option>, - /// How many times an affiliate code has been clicked. - pub affiliate_code_clicks: - Option>, - /// How many times a product has been purchased with an affiliate code. - pub affiliate_code_conversions: Option< - Metrics, - >, - /// How much payout revenue an affiliate code has generated. - pub affiliate_code_revenue: - Option>, -} - -/// See [`ReturnMetrics`]. -#[derive(Debug, Serialize, Deserialize, utoipa::ToSchema)] -pub struct Metrics { - /// When collecting metrics, what fields do we want to group the results by? - /// - /// For example, if we have two views entries: - /// - `{ "project_id": "abcdefgh", "domain": "youtube.com", "count": 5 }` - /// - `{ "project_id": "abcdefgh", "domain": "discord.com", "count": 3 }` - /// - /// If we bucket by `domain`, then we will get two results: - /// - `{ "project_id": "abcdefgh", "domain": "youtube.com", "count": 5 }` - /// - `{ "project_id": "abcdefgh", "domain": "discord.com", "count": 3 }` - /// - /// If we do not bucket by `domain`, we will only get one, which is an - /// aggregate of the two rows: - /// - `{ "project_id": "abcdefgh", "count": 8 }` - #[serde(default = "Vec::default")] - pub bucket_by: Vec, - /// Filters to apply before aggregating this metric. - /// - /// Values within one field are ORed together. Different fields are ANDed - /// together. An empty list means that field is not filtered. - #[serde(default)] - pub filter_by: FilterBy, -} - -/// Fields for [`ReturnMetrics::project_views`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum ProjectViewsField { - /// Project ID. - ProjectId, - /// Referrer domain which linked to this project. - Domain, - /// Modrinth site path which was visited, e.g. `/mod/foo`. - SitePath, - /// Whether these views were monetized or not. - Monetized, - /// What country these views came from. - /// - /// To anonymize the data, the country may be reported as `XX`. - Country, -} - -/// Filters for [`ReturnMetrics::project_views`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectViewsFilters { - /// Referrer domains to include. - #[serde(default)] - pub domain: Vec, - /// Modrinth site paths to include. - #[serde(default)] - pub site_path: Vec, - /// Monetization states to include. - #[serde(default)] - pub monetized: Vec, - /// Country codes to include. - #[serde(default)] - pub country: Vec, -} - -/// Fields for [`ReturnMetrics::project_downloads`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum ProjectDownloadsField { - /// Project ID. - ProjectId, - /// Version ID of this project. - VersionId, - /// Referrer domain which linked to this project. - Domain, - /// Normalized user agent used to download this project. - UserAgent, - /// Whether these downloads were monetized or not. - Monetized, - /// What country these downloads came from. - /// - /// To anonymize the data, the country may be reported as `XX`. - Country, - /// Download reason. - Reason, - /// Game version used for this download. - GameVersion, - /// Mod loader used for this download. - Loader, -} - -/// Filters for [`ReturnMetrics::project_downloads`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectDownloadsFilters { - /// Version IDs to include. - #[serde(default)] - pub version_id: Vec, - /// Referrer domains to include. - #[serde(default)] - pub domain: Vec, - /// Normalized download sources to include. - #[serde(default)] - pub user_agent: Vec, - /// Monetization states to include. - #[serde(default)] - pub monetized: Vec, - /// Country codes to include. - #[serde(default)] - pub country: Vec, - /// Download reasons to include. - #[serde(default)] - pub reason: Vec, - /// Game versions to include. - #[serde(default)] - pub game_version: Vec, - /// Loaders to include. - #[serde(default)] - pub loader: Vec, -} - -/// Fields for [`ReturnMetrics::project_playtime`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum ProjectPlaytimeField { - /// Project ID. - ProjectId, - /// Version ID of this project. - VersionId, - /// Game mod loader which was used to count this playtime, e.g. Fabric. - Loader, - /// Game version which this project was played on. - GameVersion, - /// What country this playtime came from. - /// - /// To anonymize the data, the country may be reported as `XX`. - Country, -} - -/// Filters for [`ReturnMetrics::project_playtime`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectPlaytimeFilters { - /// Version IDs to include. - #[serde(default)] - pub version_id: Vec, - /// Loaders to include. - #[serde(default)] - pub loader: Vec, - /// Game versions to include. - #[serde(default)] - pub game_version: Vec, - /// Country codes to include. - #[serde(default)] - pub country: Vec, -} - -/// Fields for [`ReturnMetrics::project_revenue`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum ProjectRevenueField { - /// Project ID. - ProjectId, -} - -/// Filters for [`ReturnMetrics::project_revenue`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectRevenueFilters {} - -/// Fields for [`ReturnMetrics::affiliate_code_clicks`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum AffiliateCodeClicksField { - /// Affiliate code ID. - AffiliateCodeId, -} - -/// Filters for [`ReturnMetrics::affiliate_code_clicks`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeClicksFilters { - /// Affiliate code IDs to include. - #[serde(default)] - pub affiliate_code_id: Vec, -} - -/// Fields for [`ReturnMetrics::affiliate_code_conversions`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum AffiliateCodeConversionsField { - /// Affiliate code ID. - AffiliateCodeId, -} - -/// Filters for [`ReturnMetrics::affiliate_code_conversions`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeConversionsFilters { - /// Affiliate code IDs to include. - #[serde(default)] - pub affiliate_code_id: Vec, -} - -/// Fields for [`ReturnMetrics::affiliate_code_revenue`]. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, utoipa::ToSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum AffiliateCodeRevenueField { - /// Affiliate code ID. - AffiliateCodeId, -} - -/// Filters for [`ReturnMetrics::affiliate_code_revenue`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeRevenueFilters { - /// Affiliate code IDs to include. - #[serde(default)] - pub affiliate_code_id: Vec, -} - /// Minimum width of a [`TimeSlice`], controlled by [`TimeRange::resolution`]. pub const MIN_RESOLUTION: TimeDelta = TimeDelta::minutes(60); @@ -379,444 +119,8 @@ pub struct FetchResponse { #[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] pub struct TimeSlice(pub Vec); -/// Metrics collected in a single [`TimeSlice`]. -#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] -#[serde(untagged)] // the presence of `source_project`, `source_affiliate_code` determines the kind -pub enum AnalyticsData { - /// Project metrics. - Project(ProjectAnalytics), - AffiliateCode(AffiliateCodeAnalytics), -} - -/// Project metrics. -#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectAnalytics { - /// What project these metrics are for. - pub source_project: ProjectId, - /// Metrics collected. - #[serde(flatten)] - pub metrics: ProjectMetrics, -} - -/// Project metrics of a specific kind. -/// -/// If a field is not included in [`Metrics::bucket_by`], it will be [`None`]. -#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] -#[serde(rename_all = "snake_case", tag = "metric_kind")] -pub enum ProjectMetrics { - /// [`ReturnMetrics::project_views`]. - Views(ProjectViews), - /// [`ReturnMetrics::project_downloads`]. - Downloads(ProjectDownloads), - /// [`ReturnMetrics::project_playtime`]. - Playtime(ProjectPlaytime), - /// [`ReturnMetrics::project_revenue`]. - Revenue(ProjectRevenue), -} - -/// [`ReturnMetrics::project_views`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectViews { - /// [`ProjectViewsField::Domain`]. - #[serde(skip_serializing_if = "Option::is_none")] - pub domain: Option, - /// [`ProjectViewsField::SitePath`]. - #[serde(skip_serializing_if = "Option::is_none")] - pub site_path: Option, - /// [`ProjectViewsField::Monetized`]. - #[serde(skip_serializing_if = "Option::is_none")] - pub monetized: Option, - /// [`ProjectViewsField::Country`]. - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option, - /// Total number of views for this bucket. - pub views: u64, -} - -/// [`ReturnMetrics::project_downloads`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectDownloads { - /// [`ProjectDownloadsField::Domain`]. - #[serde(skip_serializing_if = "Option::is_none")] - domain: Option, - /// [`ProjectDownloadsField::UserAgent`]. - #[serde(skip_serializing_if = "Option::is_none")] - user_agent: Option, - /// [`ProjectDownloadsField::VersionId`]. - #[serde(skip_serializing_if = "Option::is_none")] - version_id: Option, - /// [`ProjectDownloadsField::Monetized`]. - #[serde(skip_serializing_if = "Option::is_none")] - monetized: Option, - /// [`ProjectDownloadsField::Country`]. - #[serde(skip_serializing_if = "Option::is_none")] - country: Option, - /// [`ProjectDownloadsField::Reason`]. - #[serde(skip_serializing_if = "Option::is_none")] - reason: Option, - /// [`ProjectDownloadsField::GameVersion`]. - #[serde(skip_serializing_if = "Option::is_none")] - game_version: Option, - /// [`ProjectDownloadsField::Loader`]. - #[serde(skip_serializing_if = "Option::is_none")] - loader: Option, - /// Total number of downloads for this bucket. - downloads: u64, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash, utoipa::ToSchema)] -pub enum DownloadSource { - Website, - ModrinthApp, - ModrinthHosting, - ModrinthMaven, - Other, - Named(String), -} - -impl Serialize for DownloadSource { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - Self::Named(name) => serializer.serialize_str(name), - Self::Website => serializer.serialize_str("website"), - Self::ModrinthApp => serializer.serialize_str("modrinth_app"), - Self::ModrinthHosting => { - serializer.serialize_str("modrinth_hosting") - } - Self::ModrinthMaven => serializer.serialize_str("modrinth_maven"), - Self::Other => serializer.serialize_str("other"), - } - } -} - -impl<'de> Deserialize<'de> for DownloadSource { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let source = String::deserialize(deserializer)?; - Ok(match source.as_str() { - "website" => Self::Website, - "modrinth_app" => Self::ModrinthApp, - "modrinth_hosting" => Self::ModrinthHosting, - "modrinth_maven" => Self::ModrinthMaven, - "other" => Self::Other, - _ if !source.is_empty() => Self::Named(source), - _ => { - return Err(D::Error::custom( - "download source cannot be empty", - )); - } - }) - } -} - -/// [`ReturnMetrics::project_playtime`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectPlaytime { - /// [`ProjectPlaytimeField::VersionId`]. - #[serde(skip_serializing_if = "Option::is_none")] - version_id: Option, - /// [`ProjectPlaytimeField::Loader`]. - #[serde(skip_serializing_if = "Option::is_none")] - loader: Option, - /// [`ProjectPlaytimeField::GameVersion`]. - #[serde(skip_serializing_if = "Option::is_none")] - game_version: Option, - /// [`ProjectPlaytimeField::Country`]. - #[serde(skip_serializing_if = "Option::is_none")] - country: Option, - /// Total number of seconds of playtime for this bucket. - seconds: u64, -} - -/// [`ReturnMetrics::project_revenue`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ProjectRevenue { - /// Total revenue for this bucket. - revenue: Decimal, -} - -/// Affiliate code metrics. -#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeAnalytics { - /// What affiliate code these metrics are for. - pub source_affiliate_code: AffiliateCodeId, - /// Metrics collected. - #[serde(flatten)] - pub metrics: AffiliateCodeMetrics, -} - -/// Affiliate code metrics of a specific kind. -/// -/// If a field is not included in [`Metrics::bucket_by`], it will be [`None`]. -#[derive(Debug, Clone, Serialize, Deserialize, utoipa::ToSchema)] -#[serde(rename_all = "snake_case", tag = "metric_kind")] -pub enum AffiliateCodeMetrics { - Clicks(AffiliateCodeClicks), - Conversions(AffiliateCodeConversions), - Revenue(AffiliateCodeRevenue), -} - -/// [`ReturnMetrics::affiliate_code_clicks`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeClicks { - /// Total clicks for this bucket. - pub clicks: u64, -} - -/// [`ReturnMetrics::affiliate_code_conversions`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeConversions { - /// Total conversions for this bucket. - pub conversions: u64, -} - -/// [`ReturnMetrics::affiliate_code_revenue`]. -#[derive(Debug, Clone, Default, Serialize, Deserialize, utoipa::ToSchema)] -pub struct AffiliateCodeRevenue { - /// Total revenue for this bucket. - pub revenue: Decimal, -} - // logic -/// Clickhouse queries - separate from [`sqlx`] queries. -mod query { - use crate::database::models::{ - DBAffiliateCodeId, DBProjectId, DBVersionId, - }; - use const_format::formatcp; - - const TIME_RANGE_START: &str = "{time_range_start: UInt64}"; - const TIME_RANGE_END: &str = "{time_range_end: UInt64}"; - const TIME_SLICES: &str = "{time_slices: UInt64}"; - const PROJECT_IDS: &str = "{project_ids: Array(UInt64)}"; - - #[derive(Debug, clickhouse::Row, serde::Deserialize)] - pub struct ViewRow { - pub bucket: u64, - pub project_id: DBProjectId, - pub domain: String, - pub site_path: String, - pub monetized: i8, - pub country: String, - pub views: u64, - } - - pub const VIEWS: &str = { - const USE_PROJECT_ID: &str = "{use_project_id: Bool}"; - const USE_DOMAIN: &str = "{use_domain: Bool}"; - const USE_SITE_PATH: &str = "{use_site_path: Bool}"; - const USE_MONETIZED: &str = "{use_monetized: Bool}"; - const USE_COUNTRY: &str = "{use_country: Bool}"; - const FILTER_DOMAIN: &str = "{filter_domain: Array(String)}"; - const FILTER_SITE_PATH: &str = "{filter_site_path: Array(String)}"; - const FILTER_MONETIZED: &str = "{filter_monetized: UInt8}"; - const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; - - formatcp!( - "SELECT - widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, - if({USE_PROJECT_ID}, project_id, 0) AS project_id, - if({USE_DOMAIN}, domain, '') AS domain, - if({USE_SITE_PATH}, site_path, '') AS site_path, - if({USE_MONETIZED}, CAST(monetized AS Int8), -1) AS monetized, - if({USE_COUNTRY}, country, '') AS country, - COUNT(*) AS views - FROM views - WHERE - recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} - -- make sure that the REAL project id is included, - -- not the possibly-zero one, - -- by using `views.project_id` instead of `project_id` - AND views.project_id IN {PROJECT_IDS} - AND (empty({FILTER_DOMAIN}) OR views.domain IN {FILTER_DOMAIN}) - AND (empty({FILTER_SITE_PATH}) OR views.site_path IN {FILTER_SITE_PATH}) - AND ({FILTER_MONETIZED} = 2 OR CAST(views.monetized AS UInt8) = {FILTER_MONETIZED}) - AND (empty({FILTER_COUNTRY}) OR views.country IN {FILTER_COUNTRY}) - GROUP BY bucket, project_id, domain, site_path, monetized, country - " - ) - }; - - #[derive(Debug, clickhouse::Row, serde::Deserialize)] - pub struct DownloadRow { - pub bucket: u64, - pub project_id: DBProjectId, - pub domain: String, - pub user_agent: String, - pub version_id: DBVersionId, - pub monetized: i8, - pub country: String, - pub reason: String, - pub game_version: String, - pub loader: String, - pub downloads: u64, - } - - pub const DOWNLOADS: &str = { - const USE_PROJECT_ID: &str = "{use_project_id: Bool}"; - const USE_DOMAIN: &str = "{use_domain: Bool}"; - const USE_USER_AGENT: &str = "{use_user_agent: Bool}"; - const USE_VERSION_ID: &str = "{use_version_id: Bool}"; - const USE_MONETIZED: &str = "{use_monetized: Bool}"; - const USE_COUNTRY: &str = "{use_country: Bool}"; - const USE_REASON: &str = "{use_reason: Bool}"; - const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; - const USE_LOADER: &str = "{use_loader: Bool}"; - const FILTER_DOMAIN: &str = "{filter_domain: Array(String)}"; - const FILTER_VERSION_ID: &str = "{filter_version_id: Array(UInt64)}"; - const FILTER_MONETIZED: &str = "{filter_monetized: UInt8}"; - const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; - const FILTER_REASON: &str = "{filter_reason: Array(String)}"; - const FILTER_GAME_VERSION: &str = - "{filter_game_version: Array(String)}"; - const FILTER_LOADER: &str = "{filter_loader: Array(String)}"; - - formatcp!( - "SELECT - widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, - if({USE_PROJECT_ID}, project_id, 0) AS project_id, - if({USE_DOMAIN}, domain, '') AS domain, - if({USE_USER_AGENT}, user_agent, '') AS user_agent, - if({USE_VERSION_ID}, version_id, 0) AS version_id, - if({USE_MONETIZED}, CAST(user_id != 0 AS Int8), -1) AS monetized, - if({USE_COUNTRY}, country, '') AS country, - if({USE_REASON}, reason, '') AS reason, - if({USE_GAME_VERSION}, game_version, '') AS game_version, - if({USE_LOADER}, loader, '') AS loader, - COUNT(*) AS downloads - FROM downloads - WHERE - recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} - -- make sure that the REAL project id is included, - -- not the possibly-zero one, - -- by using `downloads.project_id` instead of `project_id` - AND downloads.project_id IN {PROJECT_IDS} - AND (empty({FILTER_DOMAIN}) OR downloads.domain IN {FILTER_DOMAIN}) - AND (empty({FILTER_VERSION_ID}) OR downloads.version_id IN {FILTER_VERSION_ID}) - AND ({FILTER_MONETIZED} = 2 OR CAST(downloads.user_id != 0 AS UInt8) = {FILTER_MONETIZED}) - AND (empty({FILTER_COUNTRY}) OR downloads.country IN {FILTER_COUNTRY}) - AND (empty({FILTER_REASON}) OR downloads.reason IN {FILTER_REASON}) - AND (empty({FILTER_GAME_VERSION}) OR downloads.game_version IN {FILTER_GAME_VERSION}) - AND (empty({FILTER_LOADER}) OR downloads.loader IN {FILTER_LOADER}) - GROUP BY bucket, project_id, domain, user_agent, version_id, monetized, country, reason, game_version, loader" - ) - }; - - #[derive(Debug, clickhouse::Row, serde::Deserialize)] - pub struct PlaytimeRow { - pub bucket: u64, - pub project_id: DBProjectId, - pub parent_version_id: DBVersionId, - pub version_id: DBVersionId, - pub loader: String, - pub game_version: String, - pub country: String, - pub seconds: u64, - } - - pub const PLAYTIME: &str = { - const USE_PROJECT_ID: &str = "{use_project_id: Bool}"; - const USE_VERSION_ID: &str = "{use_version_id: Bool}"; - const USE_LOADER: &str = "{use_loader: Bool}"; - const USE_GAME_VERSION: &str = "{use_game_version: Bool}"; - const USE_COUNTRY: &str = "{use_country: Bool}"; - const PARENT_VERSION_IDS: &str = "{parent_version_ids: Array(UInt64)}"; - const FILTER_VERSION_ID: &str = "{filter_version_id: Array(UInt64)}"; - const FILTER_LOADER: &str = "{filter_loader: Array(String)}"; - const FILTER_GAME_VERSION: &str = - "{filter_game_version: Array(String)}"; - const FILTER_COUNTRY: &str = "{filter_country: Array(String)}"; - - formatcp!( - "SELECT - bucket, - if({USE_PROJECT_ID}, source_project_id, 0) AS project_id, - parent_version_id, - version_id, - loader, - game_version, - country, - SUM(seconds) AS seconds - FROM ( - SELECT - widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, - project_id AS source_project_id, - 0 AS parent_version_id, - if({USE_VERSION_ID}, version_id, 0) AS version_id, - if({USE_LOADER}, loader, '') AS loader, - if({USE_GAME_VERSION}, game_version, '') AS game_version, - if({USE_COUNTRY}, country, '') AS country, - seconds - FROM playtime - WHERE - recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} - AND playtime.project_id IN {PROJECT_IDS} - AND (empty({FILTER_VERSION_ID}) OR playtime.version_id IN {FILTER_VERSION_ID}) - AND (empty({FILTER_LOADER}) OR playtime.loader IN {FILTER_LOADER}) - AND (empty({FILTER_GAME_VERSION}) OR playtime.game_version IN {FILTER_GAME_VERSION}) - AND (empty({FILTER_COUNTRY}) OR playtime.country IN {FILTER_COUNTRY}) - - UNION ALL - - SELECT - widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, - 0 AS source_project_id, - parent AS parent_version_id, - if({USE_VERSION_ID}, version_id, 0) AS version_id, - if({USE_LOADER}, loader, '') AS loader, - if({USE_GAME_VERSION}, game_version, '') AS game_version, - if({USE_COUNTRY}, country, '') AS country, - seconds - FROM playtime - WHERE - recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} - AND parent IN {PARENT_VERSION_IDS} - AND (empty({FILTER_VERSION_ID}) OR playtime.version_id IN {FILTER_VERSION_ID}) - AND (empty({FILTER_LOADER}) OR playtime.loader IN {FILTER_LOADER}) - AND (empty({FILTER_GAME_VERSION}) OR playtime.game_version IN {FILTER_GAME_VERSION}) - AND (empty({FILTER_COUNTRY}) OR playtime.country IN {FILTER_COUNTRY}) - ) - GROUP BY bucket, project_id, parent_version_id, version_id, loader, game_version, country" - ) - }; - - #[derive(Debug, clickhouse::Row, serde::Deserialize)] - pub struct AffiliateCodeClickRow { - pub bucket: u64, - pub affiliate_code_id: DBAffiliateCodeId, - pub clicks: u64, - } - - pub const AFFILIATE_CODE_CLICKS: &str = { - const USE_AFFILIATE_CODE_ID: &str = "{use_affiliate_code_id: Bool}"; - const AFFILIATE_CODE_IDS: &str = "{affiliate_code_ids: Array(UInt64)}"; - const FILTER_AFFILIATE_CODE_ID: &str = - "{filter_affiliate_code_id: Array(UInt64)}"; - - formatcp!( - "SELECT - widthBucket(toUnixTimestamp(recorded), {TIME_RANGE_START}, {TIME_RANGE_END}, {TIME_SLICES}) AS bucket, - if({USE_AFFILIATE_CODE_ID}, affiliate_code_id, 0) AS affiliate_code_id, - COUNT(*) AS clicks - FROM affiliate_code_clicks - WHERE - recorded BETWEEN {TIME_RANGE_START} AND {TIME_RANGE_END} - -- make sure that the REAL affiliate code id is included, - -- not the possibly-zero one, - -- by using `affiliate_code_clicks.affiliate_code_id` instead of `project_id` - -- AND affiliate_code_clicks.affiliate_code_id IN {AFFILIATE_CODE_IDS} - AND (empty({FILTER_AFFILIATE_CODE_ID}) OR affiliate_code_id IN {FILTER_AFFILIATE_CODE_ID}) - GROUP BY bucket, affiliate_code_id" - ) - }; -} - /// Fetches analytics data for the authorized user's projects. #[utoipa::path( responses((status = OK, body = inline(FetchResponse))), @@ -940,303 +244,55 @@ pub async fn fetch_analytics( }; if let Some(metrics) = &req.return_metrics.project_views { - use ProjectViewsField as F; - let uses = |field| metrics.bucket_by.contains(&field); - - query_clickhouse::( - &mut query_clickhouse_cx, - query::VIEWS, - ClickhouseQueryParams::PROJECT_IDS, - &[ - ("use_project_id", uses(F::ProjectId)), - ("use_domain", uses(F::Domain)), - ("use_site_path", uses(F::SitePath)), - ("use_monetized", uses(F::Monetized)), - ("use_country", uses(F::Country)), - ], - vec![ - ClickhouseFilterParam::String( - "filter_domain", - &metrics.filter_by.domain, - ), - ClickhouseFilterParam::String( - "filter_site_path", - &metrics.filter_by.site_path, - ), - ClickhouseFilterParam::Bool( - "filter_monetized", - &metrics.filter_by.monetized, - ), - ClickhouseFilterParam::String( - "filter_country", - &metrics.filter_by.country, - ), - ], - |_| true, - |row| row.bucket, - |row| { - let country = if uses(F::Country) { - Some(condense_country(row.country, row.views)) - } else { - None - }; - AnalyticsData::Project(ProjectAnalytics { - source_project: row.project_id.into(), - metrics: ProjectMetrics::Views(ProjectViews { - domain: none_if_empty(row.domain), - site_path: none_if_empty(row.site_path), - monetized: match row.monetized { - 0 => Some(false), - 1 => Some(true), - _ => None, - }, - country, - views: row.views, - }), - }) - }, - ) - .await?; + metrics::fetch_project_views(&mut query_clickhouse_cx, metrics).await?; } if let Some(metrics) = &req.return_metrics.project_downloads { - use ProjectDownloadsField as F; - let uses = |field| metrics.bucket_by.contains(&field); - - query_clickhouse_downloads( - &mut query_clickhouse_cx, - &[ - ("use_project_id", uses(F::ProjectId)), - ("use_domain", uses(F::Domain)), - ( - "use_user_agent", - uses(F::UserAgent) - || !metrics.filter_by.user_agent.is_empty(), - ), - ("use_version_id", uses(F::VersionId)), - ("use_monetized", uses(F::Monetized)), - ("use_country", uses(F::Country)), - ("use_reason", uses(F::Reason)), - ("use_game_version", uses(F::GameVersion)), - ("use_loader", uses(F::Loader)), - ], - &metrics.filter_by, - uses(F::UserAgent), - vec![ - ClickhouseFilterParam::String( - "filter_domain", - &metrics.filter_by.domain, - ), - ClickhouseFilterParam::VersionId( - "filter_version_id", - &metrics.filter_by.version_id, - ), - ClickhouseFilterParam::Bool( - "filter_monetized", - &metrics.filter_by.monetized, - ), - ClickhouseFilterParam::String( - "filter_country", - &metrics.filter_by.country, - ), - ClickhouseFilterParam::DownloadReason( - "filter_reason", - &metrics.filter_by.reason, - ), - ClickhouseFilterParam::String( - "filter_game_version", - &metrics.filter_by.game_version, - ), - ClickhouseFilterParam::String( - "filter_loader", - &metrics.filter_by.loader, - ), - ], - ) - .await?; + metrics::fetch_project_downloads(&mut query_clickhouse_cx, metrics) + .await?; } if let Some(metrics) = &req.return_metrics.project_playtime { - use ProjectPlaytimeField as F; - let uses = |field| metrics.bucket_by.contains(&field); - - query_clickhouse_playtime( + metrics::fetch_project_playtime( &mut query_clickhouse_cx, &parent_version_projects, - &[ - ("use_project_id", uses(F::ProjectId)), - ("use_version_id", uses(F::VersionId)), - ("use_loader", uses(F::Loader)), - ("use_game_version", uses(F::GameVersion)), - ("use_country", uses(F::Country)), - ], - vec![ - ClickhouseFilterParam::VersionId( - "filter_version_id", - &metrics.filter_by.version_id, - ), - ClickhouseFilterParam::String( - "filter_loader", - &metrics.filter_by.loader, - ), - ClickhouseFilterParam::String( - "filter_game_version", - &metrics.filter_by.game_version, - ), - ClickhouseFilterParam::String( - "filter_country", - &metrics.filter_by.country, - ), - ], + metrics, ) .await?; } if let Some(metrics) = &req.return_metrics.affiliate_code_clicks { - use AffiliateCodeClicksField as F; - let uses = |field| metrics.bucket_by.contains(&field); - - tracing::info!("affiliate codes = {affiliate_code_ids:?}"); - - query_clickhouse::( - &mut query_clickhouse_cx, - query::AFFILIATE_CODE_CLICKS, - ClickhouseQueryParams::empty(), - &[("use_affiliate_code_id", uses(F::AffiliateCodeId))], - vec![ClickhouseFilterParam::AffiliateCodeId( - "filter_affiliate_code_id", - &metrics.filter_by.affiliate_code_id, - )], - |_| true, - |row| row.bucket, - |row| { - AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { - source_affiliate_code: row.affiliate_code_id.into(), - metrics: AffiliateCodeMetrics::Clicks( - AffiliateCodeClicks { clicks: row.clicks }, - ), - }) - }, - ) - .await?; + metrics::fetch_affiliate_code_clicks(&mut query_clickhouse_cx, metrics) + .await?; } + drop(query_clickhouse_cx); + if req.return_metrics.project_revenue.is_some() { if !scopes.contains(Scopes::PAYOUTS_READ) { return Err(AuthenticationError::InvalidCredentials.into()); } - let mut rows = sqlx::query!( - "SELECT - WIDTH_BUCKET( - EXTRACT(EPOCH FROM created)::bigint, - EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint, - EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint, - $3::integer - ) AS bucket, - mod_id, - SUM(amount) amount_sum - FROM payouts_values - WHERE - -- only project revenue is counted here - -- for affiliate code revenue, see `affiliate_code_revenue` - payouts_values.mod_id IS NOT NULL - AND payouts_values.mod_id = ANY($4) - AND created BETWEEN $1 AND $2 - GROUP BY bucket, mod_id", - req.time_range.start, - req.time_range.end, - num_time_slices as i64, + metrics::fetch_project_revenue( + &pool, + &mut time_slices, + &req, + num_time_slices, &project_id_values, ) - .fetch(&**pool); - while let Some(row) = rows.next().await.transpose()? { - let bucket = row - .bucket - .wrap_internal_err("bucket should be non-null - query bug!")?; - let bucket = usize::try_from(bucket).wrap_internal_err_with(|| { - eyre!("bucket value {bucket} does not fit into `usize` - query bug!") - })?; - - if let Some(source_project) = - row.mod_id.map(DBProjectId).map(ProjectId::from) - && let Some(revenue) = row.amount_sum - { - add_to_time_slice( - &mut time_slices, - bucket, - AnalyticsData::Project(ProjectAnalytics { - source_project, - metrics: ProjectMetrics::Revenue(ProjectRevenue { - revenue, - }), - }), - )?; - } - } + .await?; } if let Some(metrics) = &req.return_metrics.affiliate_code_conversions { - let filter_affiliate_code_ids = metrics - .filter_by - .affiliate_code_id - .iter() - .map(|id| DBAffiliateCodeId::from(*id).0) - .collect::>(); - let mut rows = sqlx::query!( - "SELECT - WIDTH_BUCKET( - EXTRACT(EPOCH FROM usa.created_at)::bigint, - EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint, - EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint, - $3::integer - ) AS bucket, - CASE WHEN $5 THEN affiliate_code ELSE 0 END AS affiliate_code, - COUNT(*) AS conversions - FROM users_subscriptions_affiliations usa - INNER JOIN affiliate_codes ac ON ac.id = usa.affiliate_code - INNER JOIN users_subscriptions us ON us.id = usa.subscription_id - INNER JOIN charges c ON c.subscription_id = us.id - WHERE - ac.affiliate = $4 - AND usa.created_at BETWEEN $1 AND $2 - AND c.status = 'succeeded' - AND (cardinality($6::bigint[]) = 0 OR affiliate_code = ANY($6)) - GROUP BY bucket, affiliate_code", - req.time_range.start, - req.time_range.end, - num_time_slices as i64, - DBUserId::from(user.id) as DBUserId, - metrics.bucket_by.contains(&AffiliateCodeConversionsField::AffiliateCodeId), - &filter_affiliate_code_ids, + metrics::fetch_affiliate_code_conversions( + &pool, + &mut time_slices, + &req, + user.id.into(), + num_time_slices, + metrics, ) - .fetch(&**pool); - while let Some(row) = rows.next().await.transpose()? { - let bucket = row - .bucket - .wrap_internal_err("bucket should be non-null - query bug!")?; - let bucket = usize::try_from(bucket).wrap_internal_err_with(|| { - eyre!("bucket value {bucket} does not fit into `usize` - query bug!") - })?; - - let source_affiliate_code = AffiliateCodeId::from( - DBAffiliateCodeId(row.affiliate_code.unwrap_or_default()), - ); - let conversions = - u64::try_from(row.conversions.unwrap_or_default()) - .unwrap_or(u64::MAX); - - add_to_time_slice( - &mut time_slices, - bucket, - AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { - source_affiliate_code, - metrics: AffiliateCodeMetrics::Conversions( - AffiliateCodeConversions { conversions }, - ), - }), - )?; - } + .await?; } if let Some(metrics) = &req.return_metrics.affiliate_code_revenue { @@ -1244,62 +300,15 @@ pub async fn fetch_analytics( return Err(AuthenticationError::InvalidCredentials.into()); } - let filter_affiliate_code_ids = metrics - .filter_by - .affiliate_code_id - .iter() - .map(|id| DBAffiliateCodeId::from(*id).0) - .collect::>(); - let mut rows = sqlx::query!( - "SELECT - WIDTH_BUCKET( - EXTRACT(EPOCH FROM created)::bigint, - EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint, - EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint, - $3::integer - ) AS bucket, - CASE WHEN $5 THEN affiliate_code_source ELSE 0 END AS affiliate_code_source, - SUM(amount) amount_sum - FROM payouts_values - WHERE - user_id = $4 - AND payouts_values.affiliate_code_source IS NOT NULL - AND created BETWEEN $1 AND $2 - AND (cardinality($6::bigint[]) = 0 OR affiliate_code_source = ANY($6)) - GROUP BY bucket, affiliate_code_source", - req.time_range.start, - req.time_range.end, - num_time_slices as i64, - DBUserId::from(user.id) as DBUserId, - metrics.bucket_by.contains(&AffiliateCodeRevenueField::AffiliateCodeId), - &filter_affiliate_code_ids, + metrics::fetch_affiliate_code_revenue( + &pool, + &mut time_slices, + &req, + user.id.into(), + num_time_slices, + metrics, ) - .fetch(&**pool); - while let Some(row) = rows.next().await.transpose()? { - let bucket = row - .bucket - .wrap_internal_err("bucket should be non-null - query bug!")?; - let bucket = usize::try_from(bucket).wrap_internal_err_with(|| { - eyre!("bucket value {bucket} does not fit into `usize` - query bug!") - })?; - - let source_affiliate_code = - AffiliateCodeId::from(DBAffiliateCodeId( - row.affiliate_code_source.unwrap_or_default(), - )); - let revenue = row.amount_sum.unwrap_or_default(); - - add_to_time_slice( - &mut time_slices, - bucket, - AnalyticsData::AffiliateCode(AffiliateCodeAnalytics { - source_affiliate_code, - metrics: AffiliateCodeMetrics::Revenue( - AffiliateCodeRevenue { revenue }, - ), - }), - )?; - } + .await?; } Ok(web::Json(FetchResponse { @@ -1307,121 +316,15 @@ pub async fn fetch_analytics( })) } -fn none_if_empty(s: String) -> Option { +pub(crate) fn none_if_empty(s: String) -> Option { if s.is_empty() { None } else { Some(s) } } -fn none_if_zero_version_id(v: DBVersionId) -> Option { +pub(crate) fn none_if_zero_version_id(v: DBVersionId) -> Option { if v.0 == 0 { None } else { Some(v.into()) } } -#[derive(Debug, Clone, Copy)] -enum DownloadSourcePattern { - Named(&'static str), - Website, - ModrinthApp, - ModrinthHosting, - ModrinthMaven, -} - -impl DownloadSourcePattern { - fn into_source(self) -> DownloadSource { - match self { - Self::Named(name) => DownloadSource::Named(name.into()), - Self::Website => DownloadSource::Website, - Self::ModrinthApp => DownloadSource::ModrinthApp, - Self::ModrinthHosting => DownloadSource::ModrinthHosting, - Self::ModrinthMaven => DownloadSource::ModrinthMaven, - } - } -} - -static DOWNLOAD_SOURCE_PATTERNS: LazyLock> = - LazyLock::new(|| { - use DownloadSourcePattern as P; - - [ - (r"^modrinth/kyros/", P::ModrinthHosting), - (r"^modrinth/theseus/", P::ModrinthApp), - (r"^(Gradle/|Apache-Maven/)", P::ModrinthMaven), - (r"^MultiMC/", P::Named("MultiMC")), - (r"^PrismLauncher/", P::Named("Prism Launcher")), - (r"^PolyMC/", P::Named("PolyMC")), - (r"^FCL/", P::Named("FCL")), - (r"^PCL2/", P::Named("PCL2")), - (r"^HMCL/", P::Named("HMCL")), - (r"^Lunar Client Launcher", P::Named("Lunar Client")), - (r"^PojavLauncher", P::Named("PojavLauncher")), - (r"^ATLauncher/", P::Named("ATLauncher")), - (r"FeatherLauncher/", P::Named("Feather Client")), - ( - r"^FeatherMC/Feather Client Rust Launcher/", - P::Named("Feather Client"), - ), - (r"Feather/[0-9A-Za-z]+", P::Named("Feather Client")), - (r"^PandoraLauncher/", P::Named("Pandora Launcher")), - (r"^unsup", P::Named("unsup")), - (r"nothub/mrpack-install", P::Named("mrpack-install")), - (r"^(packwiz-installer|packwiz/)", P::Named("Packwiz")), - ( - r"^(Mozilla/|Chrome/|Chromium/|Firefox/|Safari/|AppleWebKit/|Edg/|OPR/)", - P::Website, - ), - ] - .into_iter() - .map(|(pattern, source)| { - ( - Regex::new(pattern) - .expect("download source regex should be valid"), - source, - ) - }) - .collect() - }); - -// Put a cap of 100MB on the download source cache. We can adjust this as needed, -// if we find we're getting too few cache hits, or too much memory usage. -const MAX_DOWNLOAD_SOURCE_CACHE_BYTES: usize = 100 * 1024 * 1024; - -static DOWNLOAD_SOURCE_CACHE: LazyLock< - DashMap>, -> = LazyLock::new(DashMap::new); - -static DOWNLOAD_SOURCE_CACHE_BYTES: AtomicUsize = AtomicUsize::new(0); - -fn normalize_download_source(user_agent: &str) -> Option { - if let Some(source) = DOWNLOAD_SOURCE_CACHE.get(user_agent) { - return source.clone(); - } - - let source = normalize_download_source_uncached(user_agent); - - // This is intentionally a simple bounded cache. Reads are the hot path, - // and the distinct UA set should settle after common launchers are seen. - // If this becomes lock-contentious, ArcSwap plus an immutable map would - // avoid DashMap's shard locks while keeping misses cheaper than cloning a - // std HashMap. - let key_bytes = user_agent.len(); - let previous_bytes = - DOWNLOAD_SOURCE_CACHE_BYTES.fetch_add(key_bytes, Ordering::Relaxed); - if previous_bytes + key_bytes <= MAX_DOWNLOAD_SOURCE_CACHE_BYTES { - DOWNLOAD_SOURCE_CACHE.insert(user_agent.to_owned(), source.clone()); - } else { - DOWNLOAD_SOURCE_CACHE_BYTES.fetch_sub(key_bytes, Ordering::Relaxed); - } - - source -} - -fn normalize_download_source_uncached( - user_agent: &str, -) -> Option { - DOWNLOAD_SOURCE_PATTERNS.iter().find_map(|(regex, source)| { - regex.is_match(user_agent).then(|| source.into_source()) - }) -} - -fn condense_country(country: String, count: u64) -> String { +pub(crate) fn condense_country(country: String, count: u64) -> String { // Every country under '50' (view or downloads) should be condensed into 'XX' if count < 50 { "XX".to_string() @@ -1430,47 +333,23 @@ fn condense_country(country: String, count: u64) -> String { } } -struct QueryClickhouseContext<'a> { - clickhouse: &'a clickhouse::Client, - req: &'a GetRequest, - time_slices: &'a mut [TimeSlice], - project_ids: &'a [DBProjectId], - parent_version_ids: &'a [DBVersionId], - affiliate_code_ids: &'a [DBAffiliateCodeId], -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -struct PlaytimeBucket { - bucket: u64, - project_id: DBProjectId, - version_id: Option, - loader: Option, - game_version: Option, - country: Option, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -struct DownloadBucket { - bucket: u64, - project_id: DBProjectId, - domain: Option, - user_agent: Option, - version_id: Option, - monetized: Option, - country: Option, - reason: Option, - game_version: Option, - loader: Option, +pub(crate) struct QueryClickhouseContext<'a> { + pub(crate) clickhouse: &'a clickhouse::Client, + pub(crate) req: &'a GetRequest, + pub(crate) time_slices: &'a mut [TimeSlice], + pub(crate) project_ids: &'a [DBProjectId], + pub(crate) parent_version_ids: &'a [DBVersionId], + pub(crate) affiliate_code_ids: &'a [DBAffiliateCodeId], } #[derive(Debug, Clone, Copy, Default)] -struct ClickhouseQueryParams { - project_ids: bool, - parent_version_ids: bool, - affiliate_code_ids: bool, +pub(crate) struct ClickhouseQueryParams { + pub(crate) project_ids: bool, + pub(crate) parent_version_ids: bool, + pub(crate) affiliate_code_ids: bool, } -enum ClickhouseFilterParam<'a> { +pub(crate) enum ClickhouseFilterParam<'a> { String(&'static str, &'a [String]), Bool(&'static str, &'a [bool]), VersionId(&'static str, &'a [VersionId]), @@ -1479,7 +358,10 @@ enum ClickhouseFilterParam<'a> { } impl ClickhouseFilterParam<'_> { - fn bind(self, query: clickhouse::query::Query) -> clickhouse::query::Query { + pub(crate) fn bind( + self, + query: clickhouse::query::Query, + ) -> clickhouse::query::Query { match self { Self::String(name, values) => query.param(name, values), Self::Bool(name, values) => { @@ -1514,13 +396,13 @@ impl ClickhouseFilterParam<'_> { } impl ClickhouseQueryParams { - const PROJECT_IDS: Self = Self { + pub(crate) const PROJECT_IDS: Self = Self { project_ids: true, parent_version_ids: false, affiliate_code_ids: false, }; - const fn empty() -> Self { + pub(crate) const fn empty() -> Self { Self { project_ids: false, parent_version_ids: false, @@ -1543,181 +425,7 @@ impl std::ops::BitOr for ClickhouseQueryParams { } } -async fn query_clickhouse_playtime( - cx: &mut QueryClickhouseContext<'_>, - parent_version_projects: &HashMap, - use_columns: &[(&str, bool)], - filter_params: Vec>, -) -> Result<(), ApiError> { - let uses = |name| { - use_columns - .iter() - .any(|(column_name, used)| *column_name == name && *used) - }; - let mut query = cx - .clickhouse - .query(query::PLAYTIME) - .param("time_range_start", cx.req.time_range.start.timestamp()) - .param("time_range_end", cx.req.time_range.end.timestamp()) - .param("time_slices", cx.time_slices.len()) - .param("project_ids", cx.project_ids) - .param("parent_version_ids", cx.parent_version_ids); - for (param_name, used) in use_columns { - query = query.param(param_name, used) - } - for filter_param in filter_params { - query = filter_param.bind(query); - } - - let mut cursor = query.fetch::()?; - let mut buckets = HashMap::::new(); - - while let Some(row) = cursor.next().await? { - let project_id = if uses("use_project_id") && row.project_id.0 == 0 { - parent_version_projects - .get(&row.parent_version_id) - .copied() - .unwrap_or(row.project_id) - } else { - row.project_id - }; - let key = PlaytimeBucket { - bucket: row.bucket, - project_id, - version_id: uses("use_version_id").then_some(row.version_id), - loader: uses("use_loader").then(|| row.loader.clone()), - game_version: uses("use_game_version") - .then(|| row.game_version.clone()), - country: uses("use_country").then(|| row.country.clone()), - }; - - *buckets.entry(key).or_default() += row.seconds; - } - - for (key, seconds) in buckets { - let bucket = key.bucket as usize; - add_to_time_slice( - cx.time_slices, - bucket, - AnalyticsData::Project(ProjectAnalytics { - source_project: key.project_id.into(), - metrics: ProjectMetrics::Playtime(ProjectPlaytime { - version_id: key - .version_id - .and_then(none_if_zero_version_id), - loader: key.loader.and_then(none_if_empty), - game_version: key.game_version.and_then(none_if_empty), - country: key - .country - .map(|country| condense_country(country, seconds)), - seconds, - }), - }), - )?; - } - - Ok(()) -} - -async fn query_clickhouse_downloads( - cx: &mut QueryClickhouseContext<'_>, - use_columns: &[(&str, bool)], - filters: &ProjectDownloadsFilters, - bucket_by_user_agent: bool, - filter_params: Vec>, -) -> Result<(), ApiError> { - let uses = |name| { - use_columns - .iter() - .any(|(column_name, used)| *column_name == name && *used) - }; - let mut query = cx - .clickhouse - .query(query::DOWNLOADS) - .param("time_range_start", cx.req.time_range.start.timestamp()) - .param("time_range_end", cx.req.time_range.end.timestamp()) - .param("time_slices", cx.time_slices.len()) - .param("project_ids", cx.project_ids); - for (param_name, used) in use_columns { - query = query.param(param_name, used) - } - for filter_param in filter_params { - query = filter_param.bind(query); - } - - let mut cursor = query.fetch::()?; - let mut buckets = HashMap::::new(); - - while let Some(row) = cursor.next().await? { - let normalized_source = normalize_download_source(&row.user_agent); - if !filters.user_agent.is_empty() - && !normalized_source - .as_ref() - .is_some_and(|source| filters.user_agent.contains(source)) - { - continue; - } - - let key = DownloadBucket { - bucket: row.bucket, - project_id: row.project_id, - domain: uses("use_domain").then(|| row.domain.clone()), - user_agent: bucket_by_user_agent - .then_some(normalized_source) - .flatten(), - version_id: uses("use_version_id").then_some(row.version_id), - monetized: if uses("use_monetized") { - match row.monetized { - 0 => Some(false), - 1 => Some(true), - _ => None, - } - } else { - None - }, - country: uses("use_country").then(|| row.country.clone()), - reason: if uses("use_reason") { - none_if_empty(row.reason.clone()).and_then(|s| s.parse().ok()) - } else { - None - }, - game_version: uses("use_game_version") - .then(|| row.game_version.clone()), - loader: uses("use_loader").then(|| row.loader.clone()), - }; - - *buckets.entry(key).or_default() += row.downloads; - } - - for (key, downloads) in buckets { - add_to_time_slice( - cx.time_slices, - key.bucket as usize, - AnalyticsData::Project(ProjectAnalytics { - source_project: key.project_id.into(), - metrics: ProjectMetrics::Downloads(ProjectDownloads { - domain: key.domain.and_then(none_if_empty), - user_agent: key.user_agent, - version_id: key - .version_id - .and_then(none_if_zero_version_id), - monetized: key.monetized, - country: key - .country - .map(|country| condense_country(country, downloads)), - reason: key.reason, - game_version: key.game_version.and_then(none_if_empty), - loader: key.loader.and_then(none_if_empty), - downloads, - }), - }), - )?; - } - - Ok(()) -} - -async fn query_clickhouse( +pub(crate) async fn query_clickhouse( cx: &mut QueryClickhouseContext<'_>, query: &str, params: ClickhouseQueryParams, @@ -1765,7 +473,7 @@ where Ok(()) } -fn add_to_time_slice( +pub(crate) fn add_to_time_slice( time_slices: &mut [TimeSlice], bucket: usize, data: AnalyticsData, From 335478ff6134a368a543268a99e402c6100a680d Mon Sep 17 00:00:00 2001 From: aecsocket Date: Sat, 23 May 2026 21:22:42 +0100 Subject: [PATCH 8/8] prepare --- ...b5efefeedfc0fe11eebe9c939725ea0dad677.json | 39 ------------------- ...04b1f2d7da89f733dce227911753c30238eea.json | 39 ------------------- ...40a2339b12ff15f1f9e53a27a1c599a33e43b.json | 37 ------------------ .../src/routes/v3/analytics_get/mod.rs | 1 + 4 files changed, 1 insertion(+), 115 deletions(-) delete mode 100644 apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json delete mode 100644 apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json delete mode 100644 apps/labrinth/.sqlx/query-8d38218e5a0c9297be7c6c77acf40a2339b12ff15f1f9e53a27a1c599a33e43b.json diff --git a/apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json b/apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json deleted file mode 100644 index 28e51da655..0000000000 --- a/apps/labrinth/.sqlx/query-4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM usa.created_at)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n CASE WHEN $5 THEN affiliate_code ELSE 0 END AS affiliate_code,\n COUNT(*) AS conversions\n FROM users_subscriptions_affiliations usa\n INNER JOIN affiliate_codes ac ON ac.id = usa.affiliate_code\n INNER JOIN users_subscriptions us ON us.id = usa.subscription_id\n INNER JOIN charges c ON c.subscription_id = us.id\n WHERE\n ac.affiliate = $4\n AND usa.created_at BETWEEN $1 AND $2\n AND c.status = 'succeeded'\n AND (cardinality($6::bigint[]) = 0 OR affiliate_code = ANY($6))\n GROUP BY bucket, affiliate_code", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "bucket", - "type_info": "Int4" - }, - { - "ordinal": 1, - "name": "affiliate_code", - "type_info": "Int8" - }, - { - "ordinal": 2, - "name": "conversions", - "type_info": "Int8" - } - ], - "parameters": { - "Left": [ - "Timestamptz", - "Timestamptz", - "Int4", - "Int8", - "Bool", - "Int8Array" - ] - }, - "nullable": [ - null, - null, - null - ] - }, - "hash": "4b9d6d9533145f3d31eb6c0d15eb5efefeedfc0fe11eebe9c939725ea0dad677" -} diff --git a/apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json b/apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json deleted file mode 100644 index f059dc932a..0000000000 --- a/apps/labrinth/.sqlx/query-71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM created)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n CASE WHEN $5 THEN affiliate_code_source ELSE 0 END AS affiliate_code_source,\n SUM(amount) amount_sum\n FROM payouts_values\n WHERE\n user_id = $4\n AND payouts_values.affiliate_code_source IS NOT NULL\n AND created BETWEEN $1 AND $2\n AND (cardinality($6::bigint[]) = 0 OR affiliate_code_source = ANY($6))\n GROUP BY bucket, affiliate_code_source", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "bucket", - "type_info": "Int4" - }, - { - "ordinal": 1, - "name": "affiliate_code_source", - "type_info": "Int8" - }, - { - "ordinal": 2, - "name": "amount_sum", - "type_info": "Numeric" - } - ], - "parameters": { - "Left": [ - "Timestamptz", - "Timestamptz", - "Int4", - "Int8", - "Bool", - "Int8Array" - ] - }, - "nullable": [ - null, - null, - null - ] - }, - "hash": "71eb9aca5ea309ddbb05986d47d04b1f2d7da89f733dce227911753c30238eea" -} diff --git a/apps/labrinth/.sqlx/query-8d38218e5a0c9297be7c6c77acf40a2339b12ff15f1f9e53a27a1c599a33e43b.json b/apps/labrinth/.sqlx/query-8d38218e5a0c9297be7c6c77acf40a2339b12ff15f1f9e53a27a1c599a33e43b.json deleted file mode 100644 index 9489929402..0000000000 --- a/apps/labrinth/.sqlx/query-8d38218e5a0c9297be7c6c77acf40a2339b12ff15f1f9e53a27a1c599a33e43b.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "SELECT\n WIDTH_BUCKET(\n EXTRACT(EPOCH FROM created)::bigint,\n EXTRACT(EPOCH FROM $1::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n EXTRACT(EPOCH FROM $2::timestamp with time zone AT TIME ZONE 'UTC')::bigint,\n $3::integer\n ) AS bucket,\n mod_id,\n SUM(amount) amount_sum\n FROM payouts_values\n WHERE\n -- only project revenue is counted here\n -- for affiliate code revenue, see `affiliate_code_revenue`\n payouts_values.mod_id IS NOT NULL\n AND payouts_values.mod_id = ANY($4)\n AND created BETWEEN $1 AND $2\n GROUP BY bucket, mod_id", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "bucket", - "type_info": "Int4" - }, - { - "ordinal": 1, - "name": "mod_id", - "type_info": "Int8" - }, - { - "ordinal": 2, - "name": "amount_sum", - "type_info": "Numeric" - } - ], - "parameters": { - "Left": [ - "Timestamptz", - "Timestamptz", - "Int4", - "Int8Array" - ] - }, - "nullable": [ - null, - true, - null - ] - }, - "hash": "8d38218e5a0c9297be7c6c77acf40a2339b12ff15f1f9e53a27a1c599a33e43b" -} diff --git a/apps/labrinth/src/routes/v3/analytics_get/mod.rs b/apps/labrinth/src/routes/v3/analytics_get/mod.rs index 3d763978f9..4f03fa4d57 100644 --- a/apps/labrinth/src/routes/v3/analytics_get/mod.rs +++ b/apps/labrinth/src/routes/v3/analytics_get/mod.rs @@ -574,6 +574,7 @@ async fn filter_allowed_project_ids( #[cfg(test)] mod tests { + use rust_decimal::Decimal; use serde_json::json; use super::*;