From b60a6f7c1908c3733fb2fd2c3c372124690345c5 Mon Sep 17 00:00:00 2001
From: Mano Toth <71388581+tothmano@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:30:51 +0100
Subject: [PATCH] Add percentileif (#153)
---
apl/aggregation-function/percentileif.mdx | 152 ++++++++++++++++++
.../statistical-functions.mdx | 1 +
mint.json | 1 +
3 files changed, 154 insertions(+)
create mode 100644 apl/aggregation-function/percentileif.mdx
diff --git a/apl/aggregation-function/percentileif.mdx b/apl/aggregation-function/percentileif.mdx
new file mode 100644
index 00000000..6521e389
--- /dev/null
+++ b/apl/aggregation-function/percentileif.mdx
@@ -0,0 +1,152 @@
+---
+title: percentileif
+description: 'This page explains how to use the percentileif aggregation function in APL.'
+---
+
+The `percentileif` aggregation function calculates the percentile of a numeric column, conditional on a specified boolean predicate. This function is useful for filtering data dynamically and determining percentile values based only on relevant subsets of data.
+
+You can use `percentileif` to gain insights in various scenarios, such as:
+
+- Identifying response time percentiles for HTTP requests from specific regions.
+- Calculating percentiles of span durations for specific service types in OpenTelemetry traces.
+- Analyzing security events by percentile within defined risk categories.
+
+## For users of other query languages
+
+If you come from other query languages, this section explains how to adjust your existing queries to achieve the same results in APL.
+
+
+
+
+The `percentileif` aggregation in APL works similarly to `percentile` combined with conditional filtering in SPL. However, APL integrates the condition directly into the aggregation for simplicity.
+
+
+```sql Splunk example
+stats perc95(req_duration_ms) as p95 where geo.country="US"
+```
+
+```kusto APL equivalent
+['sample-http-logs']
+| summarize percentileif(req_duration_ms, 95, geo.country == 'US')
+```
+
+
+
+
+
+In SQL, you typically calculate percentiles using window functions or aggregate functions combined with a `WHERE` clause. APL simplifies this by embedding the condition directly in the `percentileif` aggregation.
+
+
+```sql SQL example
+SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY req_duration_ms)
+FROM sample_http_logs
+WHERE geo_country = 'US'
+```
+
+```kusto APL equivalent
+['sample-http-logs']
+| summarize percentileif(req_duration_ms, 95, geo.country == 'US')
+```
+
+
+
+
+
+## Usage
+
+### Syntax
+
+```kusto
+summarize percentileif(Field, Percentile, Predicate)
+```
+
+### Parameters
+
+| Parameter | Description |
+|------------------|--------------------------------------------------------------------------------------------------------------|
+| `Field` | The numeric field from which to calculate the percentile. |
+| `Percentile` | A number between 0 and 100 that specifies the percentile to calculate. |
+| `Predicate` | A Boolean expression that filters rows to include in the calculation. |
+
+### Returns
+
+The function returns a single numeric value representing the specified percentile of the `Field` for rows where the `Predicate` evaluates to `true`.
+
+## Use case examples
+
+
+
+
+You can use `percentileif` to analyze request durations for specific HTTP methods.
+
+**Query**
+
+```kusto
+['sample-http-logs']
+| summarize post_p90 = percentileif(req_duration_ms, 90, method == "POST"), get_p90 = percentileif(req_duration_ms, 90, method == "GET") by bin_auto(_time)
+```
+
+[Run in Playground](https://play.axiom.co/axiom-play-qf1k/explorer?initForm=%7B%22apl%22%3A%22%5B'sample-http-logs'%5D%20%7C%20summarize%20post_p90%20%3D%20percentileif(req_duration_ms%2C%2090%2C%20method%20%3D%3D%20'POST')%2C%20get_p90%20%3D%20percentileif(req_duration_ms%2C%2090%2C%20method%20%3D%3D%20'GET')%20by%20bin_auto(_time)%22%7D)
+
+**Output**
+
+| post_p90 | get_p90 |
+|--------------|---|
+| 1.691 ms | 1.453 ms |
+
+This query calculates the 90th percentile of request durations for HTTP POST and GET methods.
+
+
+
+
+You can use `percentileif` to measure span durations for specific services and operation kinds.
+
+**Query**
+
+```kusto
+['otel-demo-traces']
+| summarize percentileif(duration, 95, ['service.name'] == 'frontend' and kind == 'server')
+```
+
+[Run in Playground](https://play.axiom.co/axiom-play-qf1k/explorer?initForm=%7B%22apl%22%3A%22%5B%27otel-demo-traces%27%5D%20%7C%20summarize%20percentileif%28duration%2C%2095%2C%20%5B%27service.name%27%5D%20%3D%3D%20%27frontend%27%20and%20kind%20%3D%3D%20%27server%27%29%22%7D)
+
+**Output**
+
+| Percentile95 |
+|---------------|
+| 1.2s |
+
+This query calculates the 95th percentile of span durations for server spans in the `frontend` service.
+
+
+
+
+You can use `percentileif` to calculate response time percentiles for specific HTTP status codes.
+
+**Query**
+
+```kusto
+['sample-http-logs']
+| summarize percentileif(req_duration_ms, 75, status == '404')
+```
+
+[Run in Playground](https://play.axiom.co/axiom-play-qf1k/explorer?initForm=%7B%22apl%22%3A%22%5B%27sample-http-logs%27%5D%20%7C%20summarize%20percentileif%28req_duration_ms%2C%2075%2C%20status%20%3D%3D%20%27404%27%29%22%7D)
+
+**Output**
+
+| Percentile75 |
+|--------------|
+| 350 |
+
+This query calculates the 75th percentile of request durations for HTTP 404 errors.
+
+
+
+
+## List of related aggregations
+
+- [percentile](/apl/aggregation-function/percentile): Calculates the percentile for all rows without any filtering. Use `percentile` when you don’t need conditional filtering.
+- [avgif](/apl/aggregation-function/avgif): Calculates the average of a numeric column based on a condition. Use `avgif` for mean calculations instead of percentiles.
+- [minif](/apl/aggregation-function/minif): Returns the minimum value of a numeric column where a condition is true. Use `minif` for identifying the lowest values within subsets.
+- [maxif](/apl/aggregation-function/maxif): Returns the maximum value of a numeric column where a condition is true. Use `maxif` for identifying the highest values within subsets.
+- [sumif](/apl/aggregation-function/sumif): Sums a numeric column based on a condition. Use `sumif` for conditional total calculations.
\ No newline at end of file
diff --git a/apl/aggregation-function/statistical-functions.mdx b/apl/aggregation-function/statistical-functions.mdx
index e6d41ef3..dbb71c37 100644
--- a/apl/aggregation-function/statistical-functions.mdx
+++ b/apl/aggregation-function/statistical-functions.mdx
@@ -26,6 +26,7 @@ The table summarizes the aggregation functions available in APL. Use all these a
| [min](/apl/aggregation-function/min) | Returns the minimum value across the group. |
| [minif](/apl/aggregation-function/minif) | Returns the minimum of an expression in records for which the predicate evaluates to true. |
| [percentile](/apl/aggregation-function/percentile) | Calculates the requested percentiles of the group and produces a timeseries chart. |
+| [percentileif](/apl/aggregation-function/percentileif) | Calculates the requested percentiles of the field for the rows where the predicate evaluates to true. |
| [rate](/apl/aggregation-function/rate) | Calculates the rate of values in a group per second. |
| [stdev](/apl/aggregation-function/stdev) | Calculates the standard deviation of an expression across the group. |
| [stdevif](/apl/aggregation-function/stdevif) | Calculates the standard deviation of an expression in records for which the predicate evaluates to true. |
diff --git a/mint.json b/mint.json
index 8b20193e..63dc2948 100644
--- a/mint.json
+++ b/mint.json
@@ -366,6 +366,7 @@
"apl/aggregation-function/min",
"apl/aggregation-function/minif",
"apl/aggregation-function/percentile",
+ "apl/aggregation-function/percentileif",
"apl/aggregation-function/rate",
"apl/aggregation-function/stdev",
"apl/aggregation-function/stdevif",