diff --git a/sql/2024/http/connections_per_page_load_dist.sql b/sql/2024/http/connections_per_page_load_dist.sql new file mode 100644 index 00000000000..3d2d55cf768 --- /dev/null +++ b/sql/2024/http/connections_per_page_load_dist.sql @@ -0,0 +1,48 @@ +#standardSQL + +# Measure the distribution of TCP Connections per site. + +SELECT + percentile, + client, + http_version_category, + COUNT(0) AS num_pages, + APPROX_QUANTILES(_connections, 1000)[OFFSET(percentile * 10)] AS connections +FROM ( + SELECT + client, + page, + CASE + WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'quic' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) LIKE 'h3%' THEN 'HTTP/2+' + WHEN LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/2' OR LOWER(JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion')) = 'http/3' THEN 'HTTP/2+' + WHEN JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') IS NULL THEN 'Unknown' + ELSE 'Non-HTTP/2' + END AS http_version_category + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document) +JOIN ( + SELECT + client, + page, + CAST(JSON_EXTRACT_SCALAR(summary, '$._connections') AS INT64) AS _connections + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' AND + is_root_page) +USING + (client, page), + UNNEST([10, 25, 50, 75, 90]) AS percentile +GROUP BY + percentile, + client, + http_version_category +ORDER BY + percentile, + client, + num_pages DESC, + http_version_category diff --git a/sql/2024/http/dns_https_svcb_usage.sql b/sql/2024/http/dns_https_svcb_usage.sql new file mode 100644 index 00000000000..6cdd09c00cb --- /dev/null +++ b/sql/2024/http/dns_https_svcb_usage.sql @@ -0,0 +1,24 @@ +SELECT + client, + COUNT(0) AS total_pages, + COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') AS dns_https, + COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn, + COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_svcb, + COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn, + COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb, + COUNTIF(JSON_EXTRACT(payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn +FROM + `httparchive.all.pages` +WHERE + date = '2024-06-01' AND + is_root_page +GROUP BY + client +ORDER BY + client diff --git a/sql/2024/http/dns_https_svcb_usage_cdn.sql b/sql/2024/http/dns_https_svcb_usage_cdn.sql new file mode 100644 index 00000000000..31bd3e8a45f --- /dev/null +++ b/sql/2024/http/dns_https_svcb_usage_cdn.sql @@ -0,0 +1,31 @@ +SELECT + client, + COUNT(0) AS total_pages, + JSON_EXTRACT_SCALAR(r.summary, '$._cdn_provider') AS cdn, + COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') AS dns_https, + COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]') / COUNT(0) AS pct_dns_https, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_alpn, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_alpn, + COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_svcb, + COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_svcb, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_svcb_alpn, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_svcb_alpn, + COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') AS dns_https_or_svcb, + COUNTIF(JSON_EXTRACT(p.payload, '$._origin_dns.https') != '[]' OR JSON_EXTRACT(p.payload, '$._origin_dns.svcb') != '[]') / COUNT(0) AS pct_dns_https_or_svcb, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) AS dns_https_or_svcb_alpn, + COUNTIF(REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.https'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL OR REGEXP_EXTRACT(JSON_EXTRACT(p.payload, '$._origin_dns.svcb'), r'alpn=\\"[^"]*h3[^"]*\\"') IS NOT NULL) / COUNT(0) AS pct_dns_https_or_svcb_alpn +FROM + `httparchive.all.pages` p +INNER JOIN + `httparchive.all.requests` r +USING (client, date, page, is_root_page) +WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document +GROUP BY + client, + cdn +ORDER BY + client, + cdn diff --git a/sql/2024/http/early_hints_per_page.sql b/sql/2024/http/early_hints_per_page.sql new file mode 100644 index 00000000000..db93f37c413 --- /dev/null +++ b/sql/2024/http/early_hints_per_page.sql @@ -0,0 +1,60 @@ +#standardSQL + +# Distribution of number of early hints resources + +CREATE TEMPORARY FUNCTION getNumEarlyHints(early_hints_header STRING) +RETURNS STRUCT LANGUAGE js AS ''' +try { + + var num_hints = 0; + var num_resources_hinted = 0; + + theJSON = JSON.parse(early_hints_header); + + for (var key of Object.keys(theJSON)) { + if (theJSON[key].startsWith('link:')) { + num_hints++; + } else { + continue; + }; + num_resources_hinted = num_resources_hinted + theJSON[key].split(',').length; + } + + return { + num_hints, + num_resources_hinted + }; +} catch { + return { + num_hints: 0, + num_resources_hinted: 0 + }; +} +'''; + +SELECT + client, + percentile, + COUNT(DISTINCT page) AS num_pages, + APPROX_QUANTILES(early_hints.num_hints, 1000)[OFFSET(percentile * 10)] AS num_hints, + APPROX_QUANTILES(early_hints.num_resources_hinted, 1000)[OFFSET(percentile * 10)] AS num_resources_hinted +FROM + ( + SELECT + client, + page, + getNumEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')) AS early_hints + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document + ), + UNNEST(GENERATE_ARRAY(1, 100)) AS percentile +GROUP BY + client, + percentile +ORDER BY + client, + percentile diff --git a/sql/2024/http/early_hints_usage.sql b/sql/2024/http/early_hints_usage.sql new file mode 100644 index 00000000000..c8028630bd2 --- /dev/null +++ b/sql/2024/http/early_hints_usage.sql @@ -0,0 +1,19 @@ +#standardSQL + +# Distribution of number of early hints resources + +SELECT + client, + COUNT(DISTINCT page) AS num_pages, + COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) AS early_hints, + COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') IS NOT NULL) / COUNT(DISTINCT page) AS early_hints_pct, + COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') AS early_hints_shopify, + COUNTIF(JSON_EXTRACT(payload, '$._early_hint_headers') LIKE '%shopify%') / COUNT(DISTINCT page) AS early_hints_shopify_pct +FROM + `httparchive.all.requests` +WHERE + date = '2024-06-01' AND + is_main_document AND + is_root_page +GROUP BY + client diff --git a/sql/2024/http/early_hints_usage_as_percentile_within_used.sql b/sql/2024/http/early_hints_usage_as_percentile_within_used.sql new file mode 100644 index 00000000000..13ee38a9a80 --- /dev/null +++ b/sql/2024/http/early_hints_usage_as_percentile_within_used.sql @@ -0,0 +1,75 @@ +CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING) +RETURNS STRUCT>> LANGUAGE js AS ''' +try { + var preconnects = 0; + var preloads = 0; + var as = {}; + + theJSON = JSON.parse(early_hints_header); + for (var key of Object.keys(theJSON)) { + if (!theJSON[key].startsWith('link:')) { + continue; + }; + var hints = theJSON[key].split(','); + hints.forEach(hint => { + + var attributes = hint.split(';'); + var fetchType=''; + var hintType=''; + attributes.forEach(attribute => { + if (attribute.trim().startsWith('rel')) { + hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", ''); + } + if (attribute.trim().startsWith('as')) { + fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", ''); + } + }); + if (hintType === 'preconnect') { + preconnects++; + } + if (hintType === 'preload') { + preloads++; + as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1; + } + }); + } + var asArray = []; + for (var key in as) { + asArray.push({key: key, value: as[key]}); + } + return { + preconnects: preconnects, + preloads: preloads, + asTypes: asArray + }; +} catch (e) { + return {}; +} +'''; + +SELECT + client, + is_root_page, + percentile, + asTypes.key AS asType, + APPROX_QUANTILES(CAST(asTypes.value AS INT64), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS number, + ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls +FROM + `httparchive.all.requests`, + UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +WHERE + date = '2024-06-01' AND + is_main_document AND + JSON_QUERY(payload, '$._early_hint_headers') != '' AND + asTypes.key IS NOT NULL +GROUP BY + client, + is_root_page, + percentile, + asTypes.key +ORDER BY + client, + is_root_page, + percentile, + asTypes.key diff --git a/sql/2024/http/early_hints_usage_as_usage.sql b/sql/2024/http/early_hints_usage_as_usage.sql new file mode 100644 index 00000000000..88bdf45838e --- /dev/null +++ b/sql/2024/http/early_hints_usage_as_usage.sql @@ -0,0 +1,92 @@ +CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING) +RETURNS STRUCT>> LANGUAGE js AS ''' +try { + var preconnects = 0; + var preloads = 0; + var as = {}; + + theJSON = JSON.parse(early_hints_header); + for (var key of Object.keys(theJSON)) { + if (!theJSON[key].startsWith('link:')) { + continue; + }; + var hints = theJSON[key].split(','); + hints.forEach(hint => { + + var attributes = hint.split(';'); + var fetchType=''; + var hintType=''; + attributes.forEach(attribute => { + if (attribute.trim().startsWith('rel')) { + hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", ''); + } + if (attribute.trim().startsWith('as')) { + fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", ''); + } + }); + if (hintType === 'preconnect') { + preconnects++; + } + if (hintType === 'preload') { + preloads++; + as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1; + } + }); + } + var asArray = []; + for (var key in as) { + asArray.push({key: key, value: as[key]}); + } + return { + preconnects: preconnects, + preloads: preloads, + asTypes: asArray + }; +} catch (e) { + return {}; +} +'''; + +WITH totals AS ( + SELECT + date, + is_root_page, + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + date, + client, + is_root_page +) + +SELECT + client, + is_root_page, + asTypes.key AS asType, + COUNT(DISTINCT page) AS num_pages, + COUNT(DISTINCT page) / total AS pct_pages, + ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls +FROM + `httparchive.all.requests`, + UNNEST(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).asTypes) AS asTypes +JOIN + totals +USING (date, client, is_root_page) +WHERE + date = '2024-06-01' AND + is_main_document AND + JSON_QUERY(payload, '$._early_hint_headers') != '' AND + asTypes.key IS NOT NULL +GROUP BY + client, + is_root_page, + total, + asTypes.key +ORDER BY + client, + is_root_page, + pct_pages DESC diff --git a/sql/2024/http/early_hints_usage_percentile_within_used.sql b/sql/2024/http/early_hints_usage_percentile_within_used.sql new file mode 100644 index 00000000000..c7c9ca50acd --- /dev/null +++ b/sql/2024/http/early_hints_usage_percentile_within_used.sql @@ -0,0 +1,71 @@ +CREATE TEMPORARY FUNCTION getEarlyHints(early_hints_header STRING) +RETURNS STRUCT>> LANGUAGE js AS ''' +try { + var preconnects = 0; + var preloads = 0; + var as = {}; + + theJSON = JSON.parse(early_hints_header); + for (var key of Object.keys(theJSON)) { + if (!theJSON[key].startsWith('link:')) { + continue; + }; + var hints = theJSON[key].split(','); + hints.forEach(hint => { + + var attributes = hint.split(';'); + var fetchType=''; + var hintType=''; + attributes.forEach(attribute => { + if (attribute.trim().startsWith('rel')) { + hintType=attribute.trim().slice(4).replaceAll('"', '').replaceAll("'", ''); + } + if (attribute.trim().startsWith('as')) { + fetchType=attribute.trim().slice(3).replaceAll('"', '').replaceAll("'", ''); + } + }); + if (hintType === 'preconnect') { + preconnects++; + } + if (hintType === 'preload') { + preloads++; + as[fetchType] = as[fetchType] ? as[fetchType] + 1 : 1; + } + }); + } + var asArray = []; + for (var key in as) { + asArray.push({key: key, value: as[key]}); + } + return { + preconnects: preconnects, + preloads: preloads, + asTypes: asArray + }; +} catch (e) { + return {}; +} +'''; + +SELECT + client, + is_root_page, + percentile, + APPROX_QUANTILES(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).preconnects, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS preconnects, + APPROX_QUANTILES(getEarlyHints(JSON_EXTRACT(payload, '$._early_hint_headers')).preloads, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS preloads, + ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls +FROM + `httparchive.all.requests`, + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +WHERE + date = '2024-06-01' AND + is_main_document AND + JSON_QUERY(payload, '$._early_hint_headers') != '' +GROUP BY + client, + is_root_page, + percentile +ORDER BY + client, + is_root_page, + percentile diff --git a/sql/2024/http/fetchpriority_usage.sql b/sql/2024/http/fetchpriority_usage.sql new file mode 100644 index 00000000000..5f65d8e0ffd --- /dev/null +++ b/sql/2024/http/fetchpriority_usage.sql @@ -0,0 +1,23 @@ +SELECT + client, + is_root_page, + COUNT(0) AS total_sites, + COUNTIF(num_priority_hints > 0) AS sites_using_priority_hints, + COUNTIF(num_priority_hints > 0) / COUNT(0) AS sites_using_priority_hints_pct +FROM ( + SELECT + client, + is_root_page, + page, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.almanac.priority-hints.total') AS INT64) AS num_priority_hints + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) +GROUP BY + client, + is_root_page +ORDER BY + client, + is_root_page diff --git a/sql/2024/http/h2_adoption_by_cdn_pct.sql b/sql/2024/http/h2_adoption_by_cdn_pct.sql new file mode 100644 index 00000000000..913a2eb4e45 --- /dev/null +++ b/sql/2024/http/h2_adoption_by_cdn_pct.sql @@ -0,0 +1,37 @@ +#standardSQL + +# Percentage of requests using HTTP/2+ vs HTTP/1.1 broken down by whether the +# request was served from CDN. + +SELECT + client, + cdn, + CASE + WHEN LOWER(protocol) = 'quic' OR LOWER(protocol) LIKE 'h3%' THEN 'HTTP/2+' + WHEN LOWER(protocol) = 'http/2' OR LOWER(protocol) = 'http/3' THEN 'HTTP/2+' + WHEN protocol IS NULL THEN 'Unknown' + ELSE UPPER(protocol) + END AS http_version_category, + COUNT(0) AS num_reqs, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_reqs +FROM ( + SELECT + client, + CASE + WHEN LENGTH(JSON_EXTRACT_SCALAR(summary, '$._cdn_provider')) > 0 THEN 'from-cdn' + ELSE 'non-cdn' + END AS cdn, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol + FROM + `httparchive.all.requests` + WHERE + date = '2022-06-01' AND + is_root_page) +GROUP BY + client, + cdn, + http_version_category +ORDER BY + client ASC, + num_reqs DESC diff --git a/sql/2024/http/h2_adoption_each_cdn_breakdown.sql b/sql/2024/http/h2_adoption_each_cdn_breakdown.sql new file mode 100644 index 00000000000..3ceec32516d --- /dev/null +++ b/sql/2024/http/h2_adoption_each_cdn_breakdown.sql @@ -0,0 +1,35 @@ +#standardSQL + +# Percentage of requests using HTTP/2+ vs HTTP/1.1 broken down by whether the +# request was served from CDN. + +SELECT + client, + _cdn_provider, + CASE + WHEN LOWER(protocol) = 'quic' OR LOWER(protocol) LIKE 'h3%' THEN 'HTTP/2+' + WHEN LOWER(protocol) = 'http/2' OR LOWER(protocol) = 'http/3' THEN 'HTTP/2+' + WHEN protocol IS NULL THEN 'Unknown' + ELSE UPPER(protocol) + END AS http_version_category, + COUNT(0) AS num_reqs, + SUM(COUNT(0)) OVER (PARTITION BY client, _cdn_provider) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client, _cdn_provider) AS pct_reqs +FROM ( + SELECT + client, + JSON_EXTRACT_SCALAR(summary, '$._cdn_provider') AS _cdn_provider, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page AND + LENGTH(JSON_EXTRACT_SCALAR(summary, '$._cdn_provider')) > 0) +GROUP BY + client, + _cdn_provider, + http_version_category +ORDER BY + client ASC, + num_reqs DESC diff --git a/sql/2024/http/h2_adoption_pages_reqs.sql b/sql/2024/http/h2_adoption_pages_reqs.sql new file mode 100644 index 00000000000..3f497d55753 --- /dev/null +++ b/sql/2024/http/h2_adoption_pages_reqs.sql @@ -0,0 +1,41 @@ +#standardSQL + +# Percentage of websites using HTTP/2+ vs HTTP/1.1; this is based on the home +# page. + +SELECT + client, + protocol, + CASE + WHEN LOWER(protocol) = 'quic' OR LOWER(protocol) LIKE 'h3%' THEN 'HTTP/2+' + WHEN LOWER(protocol) = 'http/2' OR LOWER(protocol) = 'http/3' THEN 'HTTP/2+' + WHEN protocol IS NULL THEN 'Unknown' + ELSE UPPER(protocol) + END AS http_version_category, + CASE + WHEN LOWER(protocol) LIKE 'h3%' OR LOWER(protocol) = 'quic' THEN 'HTTP/3' + WHEN protocol IS NULL THEN 'Unknown' + ELSE UPPER(protocol) + END AS http_version, + COUNT(0) AS num_reqs, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_reqs, + COUNTIF(is_main_document) AS num_pages, + SUM(COUNTIF(is_main_document)) OVER (PARTITION BY client) AS total_pages, + COUNTIF(is_main_document) / SUM(COUNTIF(is_main_document)) OVER (PARTITION BY client) AS pct_pages +FROM ( + SELECT + client, + is_main_document, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page) +GROUP BY + client, + protocol +ORDER BY + client ASC, + num_reqs DESC diff --git a/sql/2024/http/h2_adoption_reqs_dist.sql b/sql/2024/http/h2_adoption_reqs_dist.sql new file mode 100644 index 00000000000..d1ade900e37 --- /dev/null +++ b/sql/2024/http/h2_adoption_reqs_dist.sql @@ -0,0 +1,50 @@ +#standardSQL + +# Distribution of requests being on HTTP/2 vs HTTP/1.1 + +SELECT + client, + http_version_category, + percentile, + APPROX_QUANTILES(num_reqs, 1000)[OFFSET(percentile * 10)] AS num_reqs, + APPROX_QUANTILES(total_reqs, 1000)[OFFSET(percentile * 10)] AS total_reqs, + APPROX_QUANTILES(pct_reqs, 1000)[OFFSET(percentile * 10)] AS pct_reqs +FROM + ( + SELECT + client, + page, + CASE + WHEN LOWER(protocol) = 'quic' OR LOWER(protocol) LIKE 'h3%' THEN 'HTTP/2+' + WHEN LOWER(protocol) = 'http/2' OR LOWER(protocol) = 'http/3' THEN 'HTTP/2+' + ELSE 'non-HTTP/2' + END AS http_version_category, + COUNT(0) AS num_reqs, + SUM(COUNT(0)) OVER (PARTITION BY client, page) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client, page) AS pct_reqs + FROM ( + SELECT + client, + is_main_document, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page) + GROUP BY + client, + page, + http_version_category + ORDER BY + client ASC, + num_reqs + ), + UNNEST(GENERATE_ARRAY(1, 100)) AS percentile +GROUP BY + client, + http_version_category, + percentile +ORDER BY + client, + percentile diff --git a/sql/2024/http/h3_support.sql b/sql/2024/http/h3_support.sql new file mode 100644 index 00000000000..1aa098d27db --- /dev/null +++ b/sql/2024/http/h3_support.sql @@ -0,0 +1,54 @@ +#standardSQL +# +# This query is based off the HTTP/3 support query on HTTP Archive time series. +# +# The amount of requests either using HTTP/3 or able to use it. +# +# We measure "ability to use" as well as "actual use", as HTTP Archive is a +# cold crawl and so less likely to use HTTP/3 which requires prior visits. +# +# For "able to use" we look at the alt-svc response header. +# +# We also only measure official HTTP/3 (ALPN h3, h3-29) and not gQUIC or other +# prior versions. h3-29 is the final draft version and will be switched to h3 +# when HTTP/3 is approved so we include that as it is HTTP/3 in all but name. +# + +SELECT + date, + client, + CASE + WHEN protocol IN ('HTTP/3', 'h3', 'h3-29') OR + protocol IN ('HTTP/3', 'h3', 'h3-29') OR + alt_svc LIKE '%h3=%' OR + alt_svc LIKE '%h3-29=%' THEN 'h3_supported' + ELSE 'h3_not_supported' + END AS h3_status, + COUNT(0) AS num_reqs, + SUM(COUNT(0)) OVER (PARTITION BY date, client) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY date, client) AS pct_reqs, + COUNTIF(is_main_document) AS num_pages, + SUM(COUNTIF(is_main_document)) OVER (PARTITION BY date, client) AS total_pages, + COUNTIF(is_main_document) / SUM(COUNTIF(is_main_document)) OVER (PARTITION BY date, client) AS pct_pages +FROM ( + SELECT + date, + client, + is_main_document, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol, + resp_headers.value AS alt_svc + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' + WHERE + date IN ('2022-06-01', '2023-06-01', '2024-06-01') AND + is_root_page) +GROUP BY + date, + client, + h3_status +ORDER BY + date, + client, + h3_status diff --git a/sql/2024/http/h3_support_each_cdn_breakdown.sql b/sql/2024/http/h3_support_each_cdn_breakdown.sql new file mode 100644 index 00000000000..507bd0bc3fc --- /dev/null +++ b/sql/2024/http/h3_support_each_cdn_breakdown.sql @@ -0,0 +1,39 @@ +#standardSQL + +# Percentage of requests with HTTP/3 support broken down by whether the +# request was served from CDN. + +SELECT + client, + _cdn_provider, + CASE + WHEN protocol IN ('HTTP/3', 'h3', 'h3-29') OR + protocol IN ('HTTP/3', 'h3', 'h3-29') OR + alt_svc LIKE '%h3=%' OR + alt_svc LIKE '%h3-29=%' THEN 'h3_supported' + ELSE 'h3_not_supported' + END AS h3_status, + COUNT(0) AS num_reqs, + SUM(COUNT(0)) OVER (PARTITION BY client, _cdn_provider) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client, _cdn_provider) AS pct_reqs +FROM ( + SELECT + client, + JSON_EXTRACT_SCALAR(summary, '$._cdn_provider') AS _cdn_provider, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol, + resp_headers.value AS alt_svc + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' + WHERE + date = '2024-06-01' AND + is_root_page AND + LENGTH(JSON_EXTRACT_SCALAR(summary, '$._cdn_provider')) > 0) +GROUP BY + client, + _cdn_provider, + h3_status +ORDER BY + client ASC, + num_reqs DESC diff --git a/sql/2024/http/h3_support_from_cdn.sql b/sql/2024/http/h3_support_from_cdn.sql new file mode 100644 index 00000000000..3ebfd74b3e8 --- /dev/null +++ b/sql/2024/http/h3_support_from_cdn.sql @@ -0,0 +1,52 @@ +#standardSQL +# +# This query is based off the HTTP/3 support query on HTTP Archive time series. +# +# The amount of requests either using HTTP/3 or able to use it. +# +# We measure "ability to use" as well as "actual use", as HTTP Archive is a +# cold crawl and so less likely to use HTTP/3 which requires prior visits. +# +# For "able to use" we look at the alt-svc response header. +# +# We also only measure official HTTP/3 (ALPN h3, h3-29) and not gQUIC or other +# prior versions. h3-29 is the final draft version and will be switched to h3 +# when HTTP/3 is approved so we include that as it is HTTP/3 in all but name. +# + +SELECT + client, + CASE + WHEN LENGTH(_cdn_provider) > 0 THEN 'from-cdn' + ELSE 'non-cdn' + END AS cdn, + CASE + WHEN protocol IN ('HTTP/3', 'h3', 'h3-29') OR + protocol IN ('HTTP/3', 'h3', 'h3-29') OR + alt_svc LIKE '%h3=%' OR + alt_svc LIKE '%h3-29=%' THEN 'h3_supported' + ELSE 'h3_not_supported' + END AS h3_status, + COUNT(0) AS num_reqs, + SUM(count(0)) OVER (PARTITION BY client) AS total_reqs, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_reqs +FROM ( + SELECT + client, + JSON_EXTRACT_SCALAR(summary, '$._cdn_provider') AS _cdn_provider, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol, + resp_headers.value AS alt_svc + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' + WHERE + date = '2024-06-01' AND + is_root_page) +GROUP BY + client, + cdn, + h3_status +ORDER BY + cdn, + client diff --git a/sql/2024/http/h3_switches.sql b/sql/2024/http/h3_switches.sql new file mode 100644 index 00000000000..868dffa66bd --- /dev/null +++ b/sql/2024/http/h3_switches.sql @@ -0,0 +1,39 @@ +#standardSQL +# Pages which had resources from domains with both h2 and h3 requests +# Note this returns 0 rows at the moment +# + +SELECT + date, + client, + page, + url_host, + COUNTIF(protocol IN ('HTTP/2', 'h2')) AS h2_requests, + COUNTIF(protocol IN ('HTTP/3', 'h3', 'h3-29')) AS h3_requests +FROM ( + SELECT + date, + client, + page, + NET.HOST(url) AS url_host, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page) +GROUP BY + date, + client, + page, + url_host, + protocol +HAVING + h2_requests > 0 AND + h3_requests > 0 +ORDER BY + date, + client, + page, + url_host, + protocol diff --git a/sql/2024/http/h3_usage_site.sql b/sql/2024/http/h3_usage_site.sql new file mode 100644 index 00000000000..02881c4dc02 --- /dev/null +++ b/sql/2024/http/h3_usage_site.sql @@ -0,0 +1,50 @@ +#standardSQL +# Whether sites use HTTP/3 +# As HTTP Archive does a cold crawl, immediate usage suggests either support advertised why DNS +# or home page redirect and browser upgrades +# + +SELECT + date, + client, + CASE + WHEN protocol IN ('HTTP/3', 'h3', 'h3-29') OR + protocol IN ('HTTP/3', 'h3', 'h3-29') THEN 'h3_used' + ELSE 'h3_not_used' + END AS h3_used, + CASE + WHEN protocol IN ('HTTP/3', 'h3', 'h3-29') OR + protocol IN ('HTTP/3', 'h3', 'h3-29') OR + alt_svc LIKE '%h3=%' OR + alt_svc LIKE '%h3-29=%' THEN 'h3_supported' + ELSE 'h3_not_supported' + END AS h3_supported, + COUNT(DISTINCT page) AS num_pages, + SUM(COUNT(DISTINCT page)) OVER (PARTITION BY date, client) AS total_pages, + COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY date, client) AS pct_pages +FROM ( + SELECT + date, + client, + page, + is_main_document, + JSON_EXTRACT_SCALAR(summary, '$.respHttpVersion') AS protocol, + resp_headers.value AS alt_svc + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' + WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document) +GROUP BY + date, + client, + h3_used, + h3_supported +ORDER BY + date, + client, + h3_used, + h3_supported diff --git a/sql/2024/http/lcp_element_data_by_type.sql b/sql/2024/http/lcp_element_data_by_type.sql new file mode 100644 index 00000000000..256ff460328 --- /dev/null +++ b/sql/2024/http/lcp_element_data_by_type.sql @@ -0,0 +1,142 @@ +CREATE TEMP FUNCTION getLoadingAttr(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const loadingAttr = data.find(attr => attr['name'] === 'loading') + return loadingAttr.value + } catch (e) { + return ''; + } +'''; + +CREATE TEMP FUNCTION getDecodingAttr(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const decodingAttr = data.find(attr => attr['name'] === 'decoding') + return decodingAttr.value + } catch (e) { + return ''; + } +'''; + +CREATE TEMP FUNCTION getFetchPriorityAttr(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const fetchPriorityAttr = data.find(attr => attr['name'] === 'fetchpriority') + return fetchPriorityAttr.value + } catch (e) { + return ''; + } +'''; + +CREATE TEMP FUNCTION getLoadingClasses(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const classes = data.find(attr => attr['name'] === 'class').value + if (classes.indexOf('lazyload') !== -1) { + return classes + } else { + return '' + } + } catch (e) { + return ''; + } +'''; + +CREATE TEMPORARY FUNCTION getResourceHints(linkNodes STRING) +RETURNS STRUCT +LANGUAGE js AS ''' +var hints = ['preload', 'prefetch', 'preconnect', 'prerender', 'dns-prefetch', 'modulepreload']; +try { + var linkNodes = JSON.parse(linkNodes); + return hints.reduce((results, hint) => { + results[hint] = !!linkNodes.nodes.find(link => link.rel.toLowerCase() == hint); + return results; + }, {}); +} catch (e) { + return hints.reduce((results, hint) => { + results[hint] = false; + return results; + }, {}); +} +'''; + +WITH lcp_stats AS ( + SELECT + client, + page, + JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.nodeName') AS nodeName, + JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.url') AS elementUrl, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.size') AS INT64) AS size, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.loadTime') AS FLOAT64) AS loadTime, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.startTime') AS FLOAT64) AS startTime, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.renderTime') AS FLOAT64) AS renderTime, + JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes') AS attributes, + getLoadingAttr(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS loading, + getDecodingAttr(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS decoding, + getLoadingClasses(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS classWithLazyload, + getFetchPriorityAttr(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS fetchPriority, + LOWER(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_resource.initialPriority')) AS initalPriority, + LOWER(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_resource.priority')) AS priority, + getResourceHints(JSON_EXTRACT(custom_metrics, '$.almanac.link-nodes')) AS hints + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' AND + is_root_page +) + +SELECT + client, + nodeName, + COUNT(DISTINCT page) AS pages, + ANY_VALUE(total) AS total, + COUNT(DISTINCT page) / ANY_VALUE(total) AS pct, + COUNTIF(elementUrl != '') AS haveImages, + COUNTIF(elementUrl != '') / COUNT(DISTINCT page) AS pct_haveImages, + COUNTIF(loading = 'eager') AS native_eagerload, + COUNTIF(loading = 'lazy') AS native_lazyload, + COUNTIF(classWithLazyload != '') AS lazyload_class, + COUNTIF(classWithLazyload != '' OR loading = 'lazy') AS probably_lazyLoaded, + COUNTIF(classWithLazyload != '' OR loading = 'lazy') / COUNT(DISTINCT page) AS pct_prob_lazyloaded, + COUNTIF(decoding = 'async') AS async_decoding, + COUNTIF(decoding = 'sync') AS sync_decoding, + COUNTIF(decoding = 'auto') AS auto_decoding, + COUNTIF(fetchPriority = 'low') AS fetchpriority_low, + COUNTIF(fetchPriority = 'high') AS fetchpriority_high, + COUNTIF(fetchPriority = 'low') / COUNT(DISTINCT page) AS pct_fetchpriority_low, + COUNTIF(fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_fetchpriority_high, + COUNTIF(loading = 'lazy' AND fetchPriority = 'high') AS native_lazyload_and_fetch_priority, + COUNTIF(loading = 'lazy' AND fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_native_lazyload_and_fetch_priority, + COUNTIF(initalPriority = 'low') AS inital_priority_low, + COUNTIF(initalPriority = 'medium') AS inital_priority_medium, + COUNTIF(initalPriority = 'high') AS inital_priority_high_and_fetchpriority, + COUNTIF(initalPriority = 'high' AND fetchPriority = 'high') AS inital_priority_high, + COUNTIF(initalPriority = 'low') / COUNT(DISTINCT page) AS pct_inital_priority_low, + COUNTIF(initalPriority = 'medium') / COUNT(DISTINCT page) AS pct_inital_priority_medium, + COUNTIF(initalPriority = 'high') / COUNT(DISTINCT page) AS pct_inital_priority_high, + COUNTIF(initalPriority = 'high' AND fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_inital_priority_high_and_fetchpriority, + COUNTIF(loading = 'lazy' AND fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_native_lazyload_and_fetch_priority, + COUNTIF(hints.preload) AS preload, + COUNTIF(hints.preload) / COUNT(DISTINCT page) AS pct_preload +FROM + lcp_stats +JOIN ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' AND + is_root_page + GROUP BY + client) +USING + (client) +GROUP BY + client, + nodeName +HAVING + pages > 1000 +ORDER BY + pct DESC diff --git a/sql/2024/http/lcp_element_data_with_urls.sql b/sql/2024/http/lcp_element_data_with_urls.sql new file mode 100644 index 00000000000..4af565098ca --- /dev/null +++ b/sql/2024/http/lcp_element_data_with_urls.sql @@ -0,0 +1,142 @@ +CREATE TEMP FUNCTION getLoadingAttr(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const loadingAttr = data.find(attr => attr['name'] === 'loading') + return loadingAttr.value + } catch (e) { + return ''; + } +'''; + +CREATE TEMP FUNCTION getDecodingAttr(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const decodingAttr = data.find(attr => attr['name'] === 'decoding') + return decodingAttr.value + } catch (e) { + return ''; + } +'''; + +CREATE TEMP FUNCTION getFetchPriorityAttr(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const fetchPriorityAttr = data.find(attr => attr['name'] === 'fetchpriority') + return fetchPriorityAttr.value + } catch (e) { + return ''; + } +'''; + +CREATE TEMP FUNCTION getLoadingClasses(attributes STRING) RETURNS STRING LANGUAGE js AS ''' + try { + const data = JSON.parse(attributes); + const classes = data.find(attr => attr['name'] === 'class').value + if (classes.indexOf('lazyload') !== -1) { + return classes + } else { + return '' + } + } catch (e) { + return ''; + } +'''; + +CREATE TEMPORARY FUNCTION getResourceHints(linkNodes STRING) +RETURNS STRUCT +LANGUAGE js AS ''' +var hints = ['preload', 'prefetch', 'preconnect', 'prerender', 'dns-prefetch', 'modulepreload']; +try { + var linkNodes = JSON.parse(linkNodes); + return hints.reduce((results, hint) => { + results[hint] = !!linkNodes.nodes.find(link => link.rel.toLowerCase() == hint); + return results; + }, {}); +} catch (e) { + return hints.reduce((results, hint) => { + results[hint] = false; + return results; + }, {}); +} +'''; + +WITH lcp_stats AS ( + SELECT + client, + page, + JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.nodeName') AS nodeName, + JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.url') AS elementUrl, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.size') AS INT64) AS size, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.loadTime') AS FLOAT64) AS loadTime, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.startTime') AS FLOAT64) AS startTime, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.renderTime') AS FLOAT64) AS renderTime, + JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes') AS attributes, + getLoadingAttr(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS loading, + getDecodingAttr(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS decoding, + getLoadingClasses(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS classWithLazyload, + getFetchPriorityAttr(JSON_EXTRACT(custom_metrics, '$.performance.lcp_elem_stats.attributes')) AS fetchPriority, + LOWER(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_resource.initialPriority')) AS initalPriority, + LOWER(JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_resource.priority')) AS priority, + getResourceHints(JSON_EXTRACT(custom_metrics, '$.almanac.link-nodes')) AS hints + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' AND + is_root_page AND + JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.url') IS NOT NULL AND + JSON_EXTRACT_SCALAR(custom_metrics, '$.performance.lcp_elem_stats.url') != '' +) + +SELECT + client, + COUNT(DISTINCT page) AS pages, + ANY_VALUE(total) AS total, + COUNT(DISTINCT page) / ANY_VALUE(total) AS pct, + COUNTIF(elementUrl != '') AS haveImages, + COUNTIF(elementUrl != '') / COUNT(DISTINCT page) AS pct_haveImages, + COUNTIF(loading = 'eager') AS native_eagerload, + COUNTIF(loading = 'lazy') AS native_lazyload, + COUNTIF(classWithLazyload != '') AS lazyload_class, + COUNTIF(classWithLazyload != '' OR loading = 'lazy') AS probably_lazyLoaded, + COUNTIF(classWithLazyload != '' OR loading = 'lazy') / COUNT(DISTINCT page) AS pct_prob_lazyloaded, + COUNTIF(decoding = 'async') AS async_decoding, + COUNTIF(decoding = 'sync') AS sync_decoding, + COUNTIF(decoding = 'auto') AS auto_decoding, + COUNTIF(fetchPriority = 'low') AS fetchpriority_low, + COUNTIF(fetchPriority = 'high') AS fetchpriority_high, + COUNTIF(fetchPriority = 'low') / COUNT(DISTINCT page) AS pct_fetchpriority_low, + COUNTIF(fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_fetchpriority_high, + COUNTIF(loading = 'lazy' AND fetchPriority = 'high') AS native_lazyload_and_fetch_priority, + COUNTIF(loading = 'lazy' AND fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_native_lazyload_and_fetch_priority, + COUNTIF(initalPriority = 'low') AS inital_priority_low, + COUNTIF(initalPriority = 'medium') AS inital_priority_medium, + COUNTIF(initalPriority = 'high') AS inital_priority_high_and_fetchpriority, + COUNTIF(initalPriority = 'high' AND fetchPriority = 'high') AS inital_priority_high, + COUNTIF(initalPriority = 'low') / COUNT(DISTINCT page) AS pct_inital_priority_low, + COUNTIF(initalPriority = 'medium') / COUNT(DISTINCT page) AS pct_inital_priority_medium, + COUNTIF(initalPriority = 'high') / COUNT(DISTINCT page) AS pct_inital_priority_high, + COUNTIF(initalPriority = 'high' AND fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_inital_priority_high_and_fetchpriority, + COUNTIF(loading = 'lazy' AND fetchPriority = 'high') / COUNT(DISTINCT page) AS pct_native_lazyload_and_fetch_priority, + COUNTIF(hints.preload) AS preload, + COUNTIF(hints.preload) / COUNT(DISTINCT page) AS pct_preload +FROM + lcp_stats +JOIN ( + SELECT + client, + COUNT(0) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' AND + is_root_page + GROUP BY + client) +USING + (client) +GROUP BY + client +HAVING + pages > 1000 +ORDER BY + pct DESC diff --git a/sql/2024/http/preload_as_values.sql b/sql/2024/http/preload_as_values.sql new file mode 100644 index 00000000000..c70b4c3c329 --- /dev/null +++ b/sql/2024/http/preload_as_values.sql @@ -0,0 +1,85 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getResourceHintAttrs(link_nodes STRING) +RETURNS ARRAY> +LANGUAGE js AS ''' +var hints = new Set(['preload']); +var attributes = ['as']; +try { + var linkNodes = JSON.parse(link_nodes); + return linkNodes.nodes.reduce((results, link) => { + var hint = link.rel.toLowerCase(); + if (!hints.has(hint)) { + return results; + } + attributes.forEach(attribute => { + var value = link[attribute]; + results.push({ + name: hint, + attribute: attribute, + // Support empty strings. + value: typeof value == 'string' ? value : null + }); + }); + return results; + }, []); +} catch (e) { + return []; +} +'''; + +WITH total_pages AS ( + SELECT + client, + is_root_page, + COUNT(DISTINCT page) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client, + is_root_page +) + +SELECT + client, + is_root_page, + as_value, + total, + COUNT(DISTINCT IF(preload_hints > 0, page, NULL)) AS preload_pages, + COUNT(DISTINCT IF(preload_hints > 0, page, NULL)) / total AS preload_pages_pct +FROM ( + SELECT + client, + is_root_page, + page, + as_value, + COUNT(0) AS preload_hints + FROM ( + SELECT + client, + is_root_page, + page, + hint.value AS as_value + FROM + `httparchive.all.pages` + JOIN + UNNEST(getResourceHintAttrs(JSON_QUERY(custom_metrics, '$.almanac.link-nodes'))) AS hint + WHERE + date = '2024-06-01' + ) + GROUP BY + client, + is_root_page, + page, + as_value +) +JOIN + total_pages USING (client, is_root_page) +GROUP BY + client, + is_root_page, + as_value, + total +ORDER BY + preload_pages_pct DESC diff --git a/sql/2024/http/preload_as_values_fetchpriority.sql b/sql/2024/http/preload_as_values_fetchpriority.sql new file mode 100644 index 00000000000..c4b1b087598 --- /dev/null +++ b/sql/2024/http/preload_as_values_fetchpriority.sql @@ -0,0 +1,91 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getResourceHintAttrs(link_nodes STRING) +RETURNS ARRAY> +LANGUAGE js AS ''' +var hints = new Set(['preload']); +var attributes = ['as']; +try { + var linkNodes = JSON.parse(link_nodes); + return linkNodes.nodes.reduce((results, link) => { + var hint = link.rel.toLowerCase(); + if (!hints.has(hint) || !link['fetchpriority']) { + return results; + } + attributes.forEach(attribute => { + var value = link[attribute]; + results.push({ + name: hint, + attribute: attribute, + // Support empty strings. + value: typeof value == 'string' ? value : null, + fetchpriority: link['fetchpriority'] + }); + }); + return results; + }, []); +} catch (e) { + return []; +} +'''; + +WITH total_pages AS ( + SELECT + client, + is_root_page, + COUNT(DISTINCT page) AS total + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + GROUP BY + client, + is_root_page +) + +SELECT + client, + is_root_page, + as_value, + fetchpriority, + total, + COUNT(DISTINCT IF(preload_hints > 0, page, NULL)) AS preload_pages, + COUNT(DISTINCT IF(preload_hints > 0, page, NULL)) / total AS preload_pages_pct +FROM ( + SELECT + client, + is_root_page, + page, + as_value, + fetchpriority, + COUNT(0) AS preload_hints + FROM ( + SELECT + client, + is_root_page, + page, + hint.value AS as_value, + hint.fetchpriority AS fetchpriority + FROM + `httparchive.all.pages` + JOIN + UNNEST(getResourceHintAttrs(JSON_QUERY(custom_metrics, '$.almanac.link-nodes'))) AS hint + WHERE + date = '2024-06-01' + ) + GROUP BY + client, + is_root_page, + page, + as_value, + fetchpriority +) +JOIN + total_pages USING (client, is_root_page) +GROUP BY + client, + is_root_page, + as_value, + fetchpriority, + total +ORDER BY + preload_pages_pct DESC diff --git a/sql/2024/http/priority_hints_per_page.sql b/sql/2024/http/priority_hints_per_page.sql new file mode 100644 index 00000000000..f8801312472 --- /dev/null +++ b/sql/2024/http/priority_hints_per_page.sql @@ -0,0 +1,27 @@ +#standardSQL + +# Number of resources with priority hints. + +SELECT + client, + is_root_page, + percentile, + APPROX_QUANTILES(num_priority_hints, 1000)[OFFSET(percentile * 10)] AS num_percentiles +FROM ( + SELECT + client, + is_root_page, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.almanac.priority-hints.total') AS INT64) AS num_priority_hints + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01'), + UNNEST([10, 25, 50, 75, 90, 95, 100]) AS percentile +GROUP BY + client, + is_root_page, + percentile +ORDER BY + client, + is_root_page, + percentile diff --git a/sql/2024/http/priority_hints_usage.sql b/sql/2024/http/priority_hints_usage.sql new file mode 100644 index 00000000000..46ae92b0093 --- /dev/null +++ b/sql/2024/http/priority_hints_usage.sql @@ -0,0 +1,26 @@ +#standardSQL + +# Number of resources with priority hints. + +SELECT + client, + is_root_page, + COUNT(0) AS total_sites, + COUNTIF(num_priority_hints > 0) AS sites_using_priority_hints, + COUNTIF(num_priority_hints > 0) / COUNT(0) AS sites_using_priority_hints_pct +FROM ( + SELECT + client, + is_root_page, + CAST(JSON_EXTRACT_SCALAR(custom_metrics, '$.almanac.priority-hints.total') AS INT64) AS num_priority_hints + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) +GROUP BY + client, + is_root_page +ORDER BY + client, + is_root_page diff --git a/sql/2024/http/protocol_advertised_via_altsvc.sql b/sql/2024/http/protocol_advertised_via_altsvc.sql new file mode 100644 index 00000000000..257f9fec8d0 --- /dev/null +++ b/sql/2024/http/protocol_advertised_via_altsvc.sql @@ -0,0 +1,54 @@ +#standardSQL +# Protocol advertised via alt-svc breakdown + +CREATE TEMPORARY FUNCTION extractHTTPHeader(altsvcHeaderValue STRING) +RETURNS ARRAY LANGUAGE js AS ''' +try { + const result = []; + const splittedAltSvcHeaderValue = altsvcHeaderValue.split(','); + for (let altsvcToken of splittedAltSvcHeaderValue) { + const protocolPortToken = altsvcToken.trim().split(';')[0]; + const protocolToken = protocolPortToken.split('=')[0]; + result.push(protocolToken); + } + return result; +} catch (e) { + return []; +} +'''; + +WITH altsvcTable AS ( + SELECT + client, + url, + extractHTTPHeader(resp_headers.value) AS protocol + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' + WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document +) +SELECT + client, + protocol, + COUNT(0) AS protocol_count, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total_advertised, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_advertised +FROM + ( + SELECT + client, + url, + flattened_protocol AS protocol + FROM + altsvcTable, altsvcTable.protocol AS flattened_protocol + ) +GROUP BY + client, + protocol +ORDER BY + client ASC, + pct_advertised DESC diff --git a/sql/2024/http/resource_hint_overuse_examples.sql b/sql/2024/http/resource_hint_overuse_examples.sql new file mode 100644 index 00000000000..7cb07db7c38 --- /dev/null +++ b/sql/2024/http/resource_hint_overuse_examples.sql @@ -0,0 +1,71 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getResourceHints(link_nodes STRING) +RETURNS ARRAY> +LANGUAGE js AS ''' +var hints = new Set(['preconnect', 'preload', 'prefetch', 'modulepreload']); +var attributes = ['as']; +try { + var linkNodes = JSON.parse(link_nodes); + return linkNodes.nodes.reduce((results, link) => { + var hint = link.rel.toLowerCase(); + if (!hints.has(hint)) { + return results; + } + attributes.forEach(attribute => { + var value = link[attribute]; + results.push({ + name: hint, + attribute: attribute, + // Support empty strings. + value: typeof value == 'string' ? value : null + }); + }); + return results; + }, []); +} catch (e) { + return []; +} +'''; + +SELECT + client, + is_root_page, + page, + SUM(preconnect_hint) AS dnsprefetch_hints, + SUM(preconnect_hint) AS preconnect_hints, + SUM(prefetch_hint) AS prefetch_hints, + SUM(preload_hint) AS preload_hints, + SUM(modulepreload_hint) AS modulepreload_hints +FROM ( + SELECT + client, + is_root_page, + page, + IF(hint.name = 'dns-prefetch', 1, 0) AS dnsprefetch_hint, + IF(hint.name = 'preconnect', 1, 0) AS preconnect_hint, + IF(hint.name = 'prefetch', 1, 0) AS prefetch_hint, + IF(hint.name = 'preload', 1, 0) AS preload_hint, + IF(hint.name = 'modulepreload', 1, 0) AS modulepreload_hint + FROM + `httparchive.all.pages` + LEFT JOIN + UNNEST(getResourceHints(JSON_QUERY(custom_metrics, '$.almanac.link-nodes'))) AS hint + WHERE + date = '2024-06-01' + ) +GROUP BY + client, + is_root_page, + page +HAVING + dnsprefetch_hints > 1000 OR + preconnect_hints > 1000 OR + prefetch_hints > 1000 OR + preload_hints > 1000 OR + modulepreload_hints > 1000 +ORDER BY + preload_hints DESC, + modulepreload_hints DESC, + prefetch_hints DESC, + preconnect_hints DESC, + dnsprefetch_hints DESC diff --git a/sql/2024/http/resource_hint_usage.sql b/sql/2024/http/resource_hint_usage.sql new file mode 100644 index 00000000000..e5e9880b05d --- /dev/null +++ b/sql/2024/http/resource_hint_usage.sql @@ -0,0 +1,72 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getResourceHints(link_nodes STRING) +RETURNS ARRAY> +LANGUAGE js AS ''' +var hints = new Set(['dns-prefetch', 'preconnect', 'preload', 'prefetch', 'modulepreload']); +var attributes = ['as']; +try { + var linkNodes = JSON.parse(link_nodes); + return linkNodes.nodes.reduce((results, link) => { + var hint = link.rel.toLowerCase(); + if (!hints.has(hint)) { + return results; + } + results.push({ + name: hint + }); + return results; + }, []); +} catch (e) { + return []; +} +'''; + +SELECT + client, + is_root_page, + COUNT(DISTINCT page) AS total, + COUNT(DISTINCT IF(dnsprefetch_hints > 0, page, NULL)) AS dnsprefetch_pages, + COUNT(DISTINCT IF(dnsprefetch_hints > 0, page, NULL)) / COUNT(DISTINCT page) AS dnsprefetch_pct, + COUNT(DISTINCT IF(preconnect_hints > 0, page, NULL)) AS preconnect_pages, + COUNT(DISTINCT IF(preconnect_hints > 0, page, NULL)) / COUNT(DISTINCT page) AS preconnect_pct, + COUNT(DISTINCT IF(prefetch_hints > 0, page, NULL)) AS prefetch_pages, + COUNT(DISTINCT IF(prefetch_hints > 0, page, NULL)) / COUNT(DISTINCT page) AS prefetch_pct, + COUNT(DISTINCT IF(preload_hints > 0, page, NULL)) AS preload_pages, + COUNT(DISTINCT IF(preload_hints > 0, page, NULL)) / COUNT(DISTINCT page) AS preload_pct, + COUNT(DISTINCT IF(modulepreload_hints > 0, page, NULL)) AS modulepreload_pages, + COUNT(DISTINCT IF(modulepreload_hints > 0, page, NULL)) / COUNT(DISTINCT page) AS modulepreload_pct +FROM ( + SELECT + client, + is_root_page, + page, + SUM(dnsprefetch_hint) AS dnsprefetch_hints, + SUM(preconnect_hint) AS preconnect_hints, + SUM(prefetch_hint) AS prefetch_hints, + SUM(preload_hint) AS preload_hints, + SUM(modulepreload_hint) AS modulepreload_hints + FROM ( + SELECT + client, + is_root_page, + page, + IF(hint.name = 'dns-prefetch', 1, 0) AS dnsprefetch_hint, + IF(hint.name = 'preconnect', 1, 0) AS preconnect_hint, + IF(hint.name = 'prefetch', 1, 0) AS prefetch_hint, + IF(hint.name = 'preload', 1, 0) AS preload_hint, + IF(hint.name = 'modulepreload', 1, 0) AS modulepreload_hint + FROM + `httparchive.all.pages` + LEFT JOIN + UNNEST(getResourceHints(JSON_EXTRACT(custom_metrics, '$.almanac.link-nodes'))) AS hint + WHERE + date = '2024-06-01' + ) + GROUP BY + client, + is_root_page, + page +) +GROUP BY + client, + is_root_page diff --git a/sql/2024/http/resource_hint_usage_percentile.sql b/sql/2024/http/resource_hint_usage_percentile.sql new file mode 100644 index 00000000000..13ce7a42e0d --- /dev/null +++ b/sql/2024/http/resource_hint_usage_percentile.sql @@ -0,0 +1,72 @@ +#standardSQL +CREATE TEMPORARY FUNCTION getResourceHints(link_nodes STRING) +RETURNS ARRAY> +LANGUAGE js AS ''' +var hints = new Set(['dns-prefetch', 'preconnect', 'preload', 'prefetch', 'modulepreload']); +try { + var linkNodes = JSON.parse(link_nodes); + return linkNodes.nodes.reduce((results, link) => { + var hint = link.rel.toLowerCase(); + if (!hints.has(hint)) { + return results; + } + results.push({ + name: hint + }); + return results; + }, []); +} catch (e) { + return []; +} +'''; + +SELECT + client, + is_root_page, + percentile, + APPROX_QUANTILES(dnsprefetch_hints, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS dnsprefetches, + APPROX_QUANTILES(preconnect_hints, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS preconnects, + APPROX_QUANTILES(prefetch_hints, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS prefetches, + APPROX_QUANTILES(preload_hints, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS preloads, + APPROX_QUANTILES(modulepreload_hints, 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS modulepreloads +FROM ( + SELECT + client, + is_root_page, + page, + SUM(dnsprefetch_hint) AS dnsprefetch_hints, + SUM(preconnect_hint) AS preconnect_hints, + SUM(prefetch_hint) AS prefetch_hints, + SUM(preload_hint) AS preload_hints, + SUM(modulepreload_hint) AS modulepreload_hints + FROM ( + SELECT + client, + is_root_page, + page, + IF(hint.name = 'dns-prefetch', 1, 0) AS dnsprefetch_hint, + IF(hint.name = 'preconnect', 1, 0) AS preconnect_hint, + IF(hint.name = 'prefetch', 1, 0) AS prefetch_hint, + IF(hint.name = 'preload', 1, 0) AS preload_hint, + IF(hint.name = 'modulepreload', 1, 0) AS modulepreload_hint + FROM + `httparchive.all.pages` + LEFT JOIN + UNNEST(getResourceHints(JSON_QUERY(custom_metrics, '$.almanac.link-nodes'))) AS hint + WHERE + date = '2024-06-01' + ) + GROUP BY + client, + is_root_page, + page + ), + UNNEST([10, 25, 50, 75, 90, 100]) AS percentile +GROUP BY + client, + is_root_page, + percentile +ORDER BY + client, + is_root_page, + percentile diff --git a/sql/2024/http/sites_cdn_with_altsvc.sql b/sql/2024/http/sites_cdn_with_altsvc.sql new file mode 100644 index 00000000000..3602ee98648 --- /dev/null +++ b/sql/2024/http/sites_cdn_with_altsvc.sql @@ -0,0 +1,35 @@ +#standardSQL + +# Percentage of requests for main site document using CDN and ALTSVC + +WITH requests AS ( + SELECT + client, + CASE + WHEN LENGTH(JSON_EXTRACT_SCALAR(summary, '$._cdn_provider')) > 0 THEN 'from-cdn' + ELSE 'non-cdn' + END AS cdn, + resp_headers.value AS altsvc + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' + WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document +) + +SELECT + client, + cdn, + COUNTIF(altsvc IS NOT NULL) AS sites_with_altsvc, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNTIF(altsvc IS NOT NULL) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_sites_with_altsvc +FROM + requests +GROUP BY + client, + cdn +ORDER BY + pct_sites_with_altsvc DESC diff --git a/sql/2024/http/sites_using_link_preload_header.sql b/sql/2024/http/sites_using_link_preload_header.sql new file mode 100644 index 00000000000..1ef93b75499 --- /dev/null +++ b/sql/2024/http/sites_using_link_preload_header.sql @@ -0,0 +1,54 @@ +#standardSQL + +# Number of . + +CREATE TEMPORARY FUNCTION getNumLinkRelPreloadRespHeader(linkHeader STRING) +RETURNS INT64 LANGUAGE js AS ''' +try { + let numPreload = 0; + + const linkHeaderTokens = linkHeader.split(','); + for (let linkHeaderToken of linkHeaderTokens) { + const linkTokenProps = linkHeaderToken.split(';'); + for (let linkTokenProp of linkTokenProps) { + if (linkTokenProps.includes('rel=preload')) { + numPreload++; + } + } + } + return numPreload; +} catch (e) { + return 0; +} +'''; + +SELECT + client, + is_root_page, + COUNTIF(num_link_rel_preload_resp_header > 0) AS num_sites_using_link_preload_resp_header, + COUNT(0) AS total_sites, + COUNTIF(num_link_rel_preload_resp_header > 0) / COUNT(0) AS pct_sites_using_link_preload_resp_header +FROM ( + SELECT + client, + is_root_page, + page, + SUM(getNumLinkRelPreloadRespHeader(resp_headers.value)) AS num_link_rel_preload_resp_header + FROM + `httparchive.all.requests` + LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'link' + WHERE + date = '2024-06-01' AND + is_main_document + GROUP BY + client, + is_root_page, + page +) +GROUP BY + client, + is_root_page +ORDER BY + client, + is_root_page diff --git a/sql/2024/http/sites_using_link_preload_tag.sql b/sql/2024/http/sites_using_link_preload_tag.sql new file mode 100644 index 00000000000..4f4335612b8 --- /dev/null +++ b/sql/2024/http/sites_using_link_preload_tag.sql @@ -0,0 +1,37 @@ +#standardSQL + +# Number of . + +CREATE TEMPORARY FUNCTION getNumLinkRelPreload(almanac_custom_metric STRING) +RETURNS INT LANGUAGE js AS ''' +try { + const almanac = JSON.parse(almanac_custom_metric); + const link_preload_nodes = almanac['link-nodes']['nodes'].filter(link_node => link_node['rel'] === 'preload') + return link_preload_nodes.length; +} catch (e) { + return -1; +} +'''; + +SELECT + client, + is_root_page, + COUNTIF(num_link_rel_preload_tag > 0) AS num_sites_using_link_preload_tag, + COUNT(0) AS total_sites, + COUNTIF(num_link_rel_preload_tag > 0) / COUNT(0) AS pct_sites_using_link_preload_tag +FROM ( + SELECT + client, + is_root_page, + getNumLinkRelPreload(JSON_EXTRACT(custom_metrics, '$.almanac')) AS num_link_rel_preload_tag + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' +) +GROUP BY + client, + is_root_page +ORDER BY + client, + is_root_page diff --git a/sql/2024/http/sites_with_altsvc.sql b/sql/2024/http/sites_with_altsvc.sql new file mode 100644 index 00000000000..685e9c60271 --- /dev/null +++ b/sql/2024/http/sites_with_altsvc.sql @@ -0,0 +1,20 @@ +#standardSQL +# Fraction of sites with alt-svc header + +SELECT + client, + COUNTIF(resp_headers.value IS NOT NULL) AS sites_with_altsvc, + SUM(COUNT(0)) OVER (PARTITION BY client) AS total, + COUNTIF(resp_headers.value IS NOT NULL) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_sites_with_altsvc +FROM + `httparchive.all.requests` +LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' +WHERE + date = '2024-06-01' AND + is_root_page AND + is_main_document +GROUP BY + client +ORDER BY + pct_sites_with_altsvc DESC diff --git a/sql/2024/http/third_party_origins_with_altsvc..sql b/sql/2024/http/third_party_origins_with_altsvc..sql new file mode 100644 index 00000000000..b5b167efcf7 --- /dev/null +++ b/sql/2024/http/third_party_origins_with_altsvc..sql @@ -0,0 +1,22 @@ +#standardSQL +# Fraction of sites with alt-svc header by request origin + +SELECT + client, + NET.HOST(url) AS origin, + COUNT(DISTINCT page) AS sites_using_origin, + COUNT(DISTINCT IF(resp_headers.value IS NOT NULL, page, NULL)) AS sites_with_altsvc, + COUNT(DISTINCT IF(resp_headers.value IS NOT NULL, page, NULL)) / COUNT(DISTINCT page) AS pct_sites_with_altsvc +FROM + `httparchive.all.requests` +LEFT OUTER JOIN + UNNEST(response_headers) AS resp_headers ON LOWER(resp_headers.name) = 'alt-svc' +WHERE + date = '2024-06-01' AND + is_root_page +GROUP BY + client, + NET.HOST(url) +ORDER BY + sites_using_origin DESC +LIMIT 10000 diff --git a/sql/2024/http/unused_preloads.sql b/sql/2024/http/unused_preloads.sql new file mode 100644 index 00000000000..5905b36c0a8 --- /dev/null +++ b/sql/2024/http/unused_preloads.sql @@ -0,0 +1,35 @@ +#standardSQL +SELECT + client, + is_root_page, + unused_preloads, + COUNT(0) AS freq, + SUM(COUNT(0)) OVER (PARTITION BY client, is_root_page) AS total, + COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client, is_root_page) AS pct, + ARRAY_TO_STRING(ARRAY_AGG(DISTINCT page LIMIT 5), ' ') AS sample_urls +FROM ( + SELECT + client, + is_root_page, + COALESCE(ARRAY_LENGTH(REGEXP_EXTRACT_ALL(consoleLog, r'was preloaded using link preload but not used within a few seconds')), 0) AS unused_preloads, + page + FROM ( + SELECT + client, + is_root_page, + JSON_QUERY(payload, '$._consoleLog') AS consoleLog, + page + FROM + `httparchive.all.pages` + WHERE + date = '2024-06-01' + ) +) +GROUP BY + client, + is_root_page, + unused_preloads +ORDER BY + client, + is_root_page, + unused_preloads