Skip to content

Commit

Permalink
HTTP queries for 2024 (#3763)
Browse files Browse the repository at this point in the history
* 2024 HTTP queries

* Revert README

* Linting

* Convert more queries

* Linting

* Bug fixes

* More conversions

* More conversions

* Final conversions

* Linting

* More queries

* Linting

* More linting

* Resource Hint and Fetch Priority queries

* More queries

* Linting

* Linting
  • Loading branch information
tunetheweb authored Nov 5, 2024
1 parent 6f4be9c commit 5cea947
Show file tree
Hide file tree
Showing 34 changed files with 1,825 additions and 0 deletions.
48 changes: 48 additions & 0 deletions sql/2024/http/connections_per_page_load_dist.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#standardSQL

# Measure the distribution of TCP Connections per site.

SELECT
percentile,
client,
http_version_category,
COUNT(0) AS num_pages,
APPROX_QUANTILES(_connections, 1000)[OFFSET(percentile * 10)] AS connections
FROM (
SELECT
client,
page,
CASE
WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'quic' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) LIKE 'h3%' THEN 'HTTP/2+'
WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/3' THEN 'HTTP/2+'
WHEN JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') IS NULL THEN 'Unknown'
ELSE 'Non-HTTP/2'
END AS http_version_category
FROM
`httparchive.all.requests`
WHERE
date = '2024-06-01' AND
is_root_page AND
is_main_document)
JOIN (
SELECT
client,
page,
CAST(JSON_EXTRACT_SCALAR(summary, '$._connections') AS INT64) AS _connections
FROM
`httparchive.all.pages`
WHERE
date = '2024-06-01' AND
is_root_page)
USING
(client, page),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client,
http_version_category
ORDER BY
percentile,
client,
num_pages DESC,
http_version_category
24 changes: 24 additions & 0 deletions sql/2024/http/dns_https_svcb_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
SELECT
client,
COUNT(0) AS total_pages,
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') AS dns_https,
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn,
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_svcb,
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn,
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb,
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn
FROM
`httparchive.all.pages`
WHERE
date = '2024-06-01' AND
is_root_page
GROUP BY
client
ORDER BY
client
31 changes: 31 additions & 0 deletions sql/2024/http/dns_https_svcb_usage_cdn.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
SELECT
client,
COUNT(0) AS total_pages,
JSON_EXTRACT_SCALAR(r.summary, '$._cdn_provider') AS cdn,
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') AS dns_https,
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn,
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_svcb,
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn,
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb,
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn,
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn
FROM
`httparchive.all.pages` p
INNER JOIN
`httparchive.all.requests` r
USING (client, date, page, is_root_page)
WHERE
date = '2024-06-01' AND
is_root_page AND
is_main_document
GROUP BY
client,
cdn
ORDER BY
client,
cdn
60 changes: 60 additions & 0 deletions sql/2024/http/early_hints_per_page.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#standardSQL

# Distribution of number of early hints resources

CREATE TEMPORARY FUNCTION getNumEarlyHints(early_hints_header STRING)
RETURNS STRUCT<num_hints INT, num_resources_hinted INT> LANGUAGE js AS '''
try {
var num_hints = 0;
var num_resources_hinted = 0;
theJSON = JSON.parse(early_hints_header);
for (var key of Object.keys(theJSON)) {
if (theJSON[key].startsWith('link:')) {
num_hints++;
} else {
continue;
};
num_resources_hinted = num_resources_hinted + theJSON[key].split(',').length;
}
return {
num_hints,
num_resources_hinted
};
} catch {
return {
num_hints: 0,
num_resources_hinted: 0
};
}
''';

SELECT
client,
percentile,
COUNT(DISTINCT page) AS num_pages,
APPROX_QUANTILES(early_hints.num_hints, 1000)[OFFSET(percentile * 10)] AS num_hints,
APPROX_QUANTILES(early_hints.num_resources_hinted, 1000)[OFFSET(percentile * 10)] AS num_resources_hinted
FROM
(
SELECT
client,
page,
getNumEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')) AS early_hints
FROM
`httparchive.all.requests`
WHERE
date = '2024-06-01' AND
is_root_page AND
is_main_document
),
UNNEST(GENERATE_ARRAY(1, 100)) AS percentile
GROUP BY
client,
percentile
ORDER BY
client,
percentile
19 changes: 19 additions & 0 deletions sql/2024/http/early_hints_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#standardSQL

# Distribution of number of early hints resources

SELECT
client,
COUNT(DISTINCT page) AS num_pages,
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) AS early_hints,
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) / COUNT(DISTINCT page) AS early_hints_pct,
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') AS early_hints_shopify,
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') / COUNT(DISTINCT page) AS early_hints_shopify_pct
FROM
`httparchive.all.requests`
WHERE
date = '2024-06-01' AND
is_main_document AND
is_root_page
GROUP BY
client
75 changes: 75 additions & 0 deletions sql/2024/http/early_hints_usage_as_percentile_within_used.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING)
RETURNS STRUCT<preconnects INT64, preloads INT64, asTypes ARRAY<STRUCT<key STRING, value INT64>>> LANGUAGE js AS '''
try {
var preconnects = 0;
var preloads = 0;
var as = {};
theJSON = JSON.parse(early_hints_header);
for (var key of Object.keys(theJSON)) {
if (!theJSON[key].startsWith('link:')) {
continue;
};
var hints = theJSON[key].split(',');
hints.forEach(hint => {
var attributes = hint.split(';');
var fetchType='';
var hintType='';
attributes.forEach(attribute => {
if (attribute.trim().startsWith('rel')) {
hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", '');
}
if (attribute.trim().startsWith('as')) {
fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", '');
}
});
if (hintType === 'preconnect') {
preconnects++;
}
if (hintType === 'preload') {
preloads++;
as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1;
}
});
}
var asArray = [];
for (var key in as) {
asArray.push({key: key, value: as[key]});
}
return {
preconnects: preconnects,
preloads: preloads,
asTypes: asArray
};
} catch (e) {
return {};
}
''';

SELECT
client,
is_root_page,
percentile,
asTypes.key AS asType,
APPROX_QUANTILES(CAST(asTypes.value AS INT64), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS number,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls
FROM
`httparchive.all.requests`,
UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes,
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile
WHERE
date = '2024-06-01' AND
is_main_document AND
JSON_QUERY(payload, '$._early_hint_headers') != '' AND
asTypes.key IS NOT NULL
GROUP BY
client,
is_root_page,
percentile,
asTypes.key
ORDER BY
client,
is_root_page,
percentile,
asTypes.key
92 changes: 92 additions & 0 deletions sql/2024/http/early_hints_usage_as_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING)
RETURNS STRUCT<preconnects INT64, preloads INT64, asTypes ARRAY<STRUCT<key STRING, value INT64>>> LANGUAGE js AS '''
try {
var preconnects = 0;
var preloads = 0;
var as = {};
theJSON = JSON.parse(early_hints_header);
for (var key of Object.keys(theJSON)) {
if (!theJSON[key].startsWith('link:')) {
continue;
};
var hints = theJSON[key].split(',');
hints.forEach(hint => {
var attributes = hint.split(';');
var fetchType='';
var hintType='';
attributes.forEach(attribute => {
if (attribute.trim().startsWith('rel')) {
hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", '');
}
if (attribute.trim().startsWith('as')) {
fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", '');
}
});
if (hintType === 'preconnect') {
preconnects++;
}
if (hintType === 'preload') {
preloads++;
as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1;
}
});
}
var asArray = [];
for (var key in as) {
asArray.push({key: key, value: as[key]});
}
return {
preconnects: preconnects,
preloads: preloads,
asTypes: asArray
};
} catch (e) {
return {};
}
''';

WITH totals AS (
SELECT
date,
is_root_page,
client,
COUNT(0) AS total
FROM
`httparchive.all.pages`
WHERE
date = '2024-06-01'
GROUP BY
date,
client,
is_root_page
)

SELECT
client,
is_root_page,
asTypes.key AS asType,
COUNT(DISTINCT page) AS num_pages,
COUNT(DISTINCT page) / total AS pct_pages,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls
FROM
`httparchive.all.requests`,
UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes
JOIN
totals
USING (date, client, is_root_page)
WHERE
date = '2024-06-01' AND
is_main_document AND
JSON_QUERY(payload, '$._early_hint_headers') != '' AND
asTypes.key IS NOT NULL
GROUP BY
client,
is_root_page,
total,
asTypes.key
ORDER BY
client,
is_root_page,
pct_pages DESC
Loading

0 comments on commit 5cea947

Please sign in to comment.