-
-
Notifications
You must be signed in to change notification settings - Fork 175
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* 2024 HTTP queries * Revert README * Linting * Convert more queries * Linting * Bug fixes * More conversions * More conversions * Final conversions * Linting * More queries * Linting * More linting * Resource Hint and Fetch Priority queries * More queries * Linting * Linting
- Loading branch information
1 parent
6f4be9c
commit 5cea947
Showing
34 changed files
with
1,825 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#standardSQL | ||
|
||
# Measure the distribution of TCP Connections per site. | ||
|
||
SELECT | ||
percentile, | ||
client, | ||
http_version_category, | ||
COUNT(0) AS num_pages, | ||
APPROX_QUANTILES(_connections, 1000)[OFFSET(percentile * 10)] AS connections | ||
FROM ( | ||
SELECT | ||
client, | ||
page, | ||
CASE | ||
WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'quic' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) LIKE 'h3%' THEN 'HTTP/2+' | ||
WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/3' THEN 'HTTP/2+' | ||
WHEN JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') IS NULL THEN 'Unknown' | ||
ELSE 'Non-HTTP/2' | ||
END AS http_version_category | ||
FROM | ||
`httparchive.all.requests` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page AND | ||
is_main_document) | ||
JOIN ( | ||
SELECT | ||
client, | ||
page, | ||
CAST(JSON_EXTRACT_SCALAR(summary, '$._connections') AS INT64) AS _connections | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page) | ||
USING | ||
(client, page), | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
GROUP BY | ||
percentile, | ||
client, | ||
http_version_category | ||
ORDER BY | ||
percentile, | ||
client, | ||
num_pages DESC, | ||
http_version_category |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
SELECT | ||
client, | ||
COUNT(0) AS total_pages, | ||
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') AS dns_https, | ||
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn, | ||
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_svcb, | ||
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn, | ||
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb, | ||
COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page | ||
GROUP BY | ||
client | ||
ORDER BY | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
SELECT | ||
client, | ||
COUNT(0) AS total_pages, | ||
JSON_EXTRACT_SCALAR(r.summary, '$._cdn_provider') AS cdn, | ||
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') AS dns_https, | ||
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn, | ||
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_svcb, | ||
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn, | ||
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb, | ||
COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn, | ||
COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn | ||
FROM | ||
`httparchive.all.pages` p | ||
INNER JOIN | ||
`httparchive.all.requests` r | ||
USING (client, date, page, is_root_page) | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page AND | ||
is_main_document | ||
GROUP BY | ||
client, | ||
cdn | ||
ORDER BY | ||
client, | ||
cdn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#standardSQL | ||
|
||
# Distribution of number of early hints resources | ||
|
||
CREATE TEMPORARY FUNCTION getNumEarlyHints(early_hints_header STRING) | ||
RETURNS STRUCT<num_hints INT, num_resources_hinted INT> LANGUAGE js AS ''' | ||
try { | ||
var num_hints = 0; | ||
var num_resources_hinted = 0; | ||
theJSON = JSON.parse(early_hints_header); | ||
for (var key of Object.keys(theJSON)) { | ||
if (theJSON[key].startsWith('link:')) { | ||
num_hints++; | ||
} else { | ||
continue; | ||
}; | ||
num_resources_hinted = num_resources_hinted + theJSON[key].split(',').length; | ||
} | ||
return { | ||
num_hints, | ||
num_resources_hinted | ||
}; | ||
} catch { | ||
return { | ||
num_hints: 0, | ||
num_resources_hinted: 0 | ||
}; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
percentile, | ||
COUNT(DISTINCT page) AS num_pages, | ||
APPROX_QUANTILES(early_hints.num_hints, 1000)[OFFSET(percentile * 10)] AS num_hints, | ||
APPROX_QUANTILES(early_hints.num_resources_hinted, 1000)[OFFSET(percentile * 10)] AS num_resources_hinted | ||
FROM | ||
( | ||
SELECT | ||
client, | ||
page, | ||
getNumEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')) AS early_hints | ||
FROM | ||
`httparchive.all.requests` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_root_page AND | ||
is_main_document | ||
), | ||
UNNEST(GENERATE_ARRAY(1, 100)) AS percentile | ||
GROUP BY | ||
client, | ||
percentile | ||
ORDER BY | ||
client, | ||
percentile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#standardSQL | ||
|
||
# Distribution of number of early hints resources | ||
|
||
SELECT | ||
client, | ||
COUNT(DISTINCT page) AS num_pages, | ||
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) AS early_hints, | ||
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) / COUNT(DISTINCT page) AS early_hints_pct, | ||
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') AS early_hints_shopify, | ||
COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') / COUNT(DISTINCT page) AS early_hints_shopify_pct | ||
FROM | ||
`httparchive.all.requests` | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_main_document AND | ||
is_root_page | ||
GROUP BY | ||
client |
75 changes: 75 additions & 0 deletions
75
sql/2024/http/early_hints_usage_as_percentile_within_used.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING) | ||
RETURNS STRUCT<preconnects INT64, preloads INT64, asTypes ARRAY<STRUCT<key STRING, value INT64>>> LANGUAGE js AS ''' | ||
try { | ||
var preconnects = 0; | ||
var preloads = 0; | ||
var as = {}; | ||
theJSON = JSON.parse(early_hints_header); | ||
for (var key of Object.keys(theJSON)) { | ||
if (!theJSON[key].startsWith('link:')) { | ||
continue; | ||
}; | ||
var hints = theJSON[key].split(','); | ||
hints.forEach(hint => { | ||
var attributes = hint.split(';'); | ||
var fetchType=''; | ||
var hintType=''; | ||
attributes.forEach(attribute => { | ||
if (attribute.trim().startsWith('rel')) { | ||
hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", ''); | ||
} | ||
if (attribute.trim().startsWith('as')) { | ||
fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", ''); | ||
} | ||
}); | ||
if (hintType === 'preconnect') { | ||
preconnects++; | ||
} | ||
if (hintType === 'preload') { | ||
preloads++; | ||
as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1; | ||
} | ||
}); | ||
} | ||
var asArray = []; | ||
for (var key in as) { | ||
asArray.push({key: key, value: as[key]}); | ||
} | ||
return { | ||
preconnects: preconnects, | ||
preloads: preloads, | ||
asTypes: asArray | ||
}; | ||
} catch (e) { | ||
return {}; | ||
} | ||
'''; | ||
|
||
SELECT | ||
client, | ||
is_root_page, | ||
percentile, | ||
asTypes.key AS asType, | ||
APPROX_QUANTILES(CAST(asTypes.value AS INT64), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS number, | ||
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls | ||
FROM | ||
`httparchive.all.requests`, | ||
UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes, | ||
UNNEST([10, 25, 50, 75, 90, 100]) AS percentile | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_main_document AND | ||
JSON_QUERY(payload, '$._early_hint_headers') != '' AND | ||
asTypes.key IS NOT NULL | ||
GROUP BY | ||
client, | ||
is_root_page, | ||
percentile, | ||
asTypes.key | ||
ORDER BY | ||
client, | ||
is_root_page, | ||
percentile, | ||
asTypes.key |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING) | ||
RETURNS STRUCT<preconnects INT64, preloads INT64, asTypes ARRAY<STRUCT<key STRING, value INT64>>> LANGUAGE js AS ''' | ||
try { | ||
var preconnects = 0; | ||
var preloads = 0; | ||
var as = {}; | ||
theJSON = JSON.parse(early_hints_header); | ||
for (var key of Object.keys(theJSON)) { | ||
if (!theJSON[key].startsWith('link:')) { | ||
continue; | ||
}; | ||
var hints = theJSON[key].split(','); | ||
hints.forEach(hint => { | ||
var attributes = hint.split(';'); | ||
var fetchType=''; | ||
var hintType=''; | ||
attributes.forEach(attribute => { | ||
if (attribute.trim().startsWith('rel')) { | ||
hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", ''); | ||
} | ||
if (attribute.trim().startsWith('as')) { | ||
fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", ''); | ||
} | ||
}); | ||
if (hintType === 'preconnect') { | ||
preconnects++; | ||
} | ||
if (hintType === 'preload') { | ||
preloads++; | ||
as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1; | ||
} | ||
}); | ||
} | ||
var asArray = []; | ||
for (var key in as) { | ||
asArray.push({key: key, value: as[key]}); | ||
} | ||
return { | ||
preconnects: preconnects, | ||
preloads: preloads, | ||
asTypes: asArray | ||
}; | ||
} catch (e) { | ||
return {}; | ||
} | ||
'''; | ||
|
||
WITH totals AS ( | ||
SELECT | ||
date, | ||
is_root_page, | ||
client, | ||
COUNT(0) AS total | ||
FROM | ||
`httparchive.all.pages` | ||
WHERE | ||
date = '2024-06-01' | ||
GROUP BY | ||
date, | ||
client, | ||
is_root_page | ||
) | ||
|
||
SELECT | ||
client, | ||
is_root_page, | ||
asTypes.key AS asType, | ||
COUNT(DISTINCT page) AS num_pages, | ||
COUNT(DISTINCT page) / total AS pct_pages, | ||
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls | ||
FROM | ||
`httparchive.all.requests`, | ||
UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes | ||
JOIN | ||
totals | ||
USING (date, client, is_root_page) | ||
WHERE | ||
date = '2024-06-01' AND | ||
is_main_document AND | ||
JSON_QUERY(payload, '$._early_hint_headers') != '' AND | ||
asTypes.key IS NOT NULL | ||
GROUP BY | ||
client, | ||
is_root_page, | ||
total, | ||
asTypes.key | ||
ORDER BY | ||
client, | ||
is_root_page, | ||
pct_pages DESC |
Oops, something went wrong.