Skip to content

Commit

Permalink
Fonts 2021 sql (#2338)
Browse files Browse the repository at this point in the history
* Create 05_01.web_fonts_usage.sql

* Create 05_02.web_fonts_usage_by_country.sql

* Rename 05_01.web_fonts_usage.sql to web_fonts_usage.sql

* Rename 05_02.web_fonts_usage_by_country.sql to .web_fonts_usage_by_country.sql

* Rename .web_fonts_usage_by_country.sql to web_fonts_usage_by_country.sql

* Update web_fonts_usage_by_country.sql

* Create fonts_format.sql

* Create impact_on_core_web_vitals.sql

* Update and rename impact_on_core_web_vitals.sql to self_hosted_vs_hosted_with_fcp_lcp.sql

* Create web_font_usage_breakdown_with_fcp_lcp.sql

* Delete self_hosted_vs_hosted_with_fcp_lcp.sql

* Create popular_typeface.sql

2020 query with 2021 date

* Create variable_font_adoption.sql

2020 queries with 2021 date

* Create color_fonts.sql

2020 Queries with 2021 dates

* Create variable_font_axis.sql

2020 query with 2021 date

* Rename variable_font_axis.sql to variable_font_axes_used.sql

2020 query with 2021 date

* Create self_hosted_vs_hosted_with_fcp_lcp.sql

2020 query with 2021 date

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

* Create font_display.sql

2020 query with 2021 date

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

indenting change from 2020 query with 2021 date

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

2020 query w.2021 date + indentation fix.

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

indentation fix

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

:( indentation fix

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

whitespace issues.

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

space after comma

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

:) new indentation issue fix

* Update self_hosted_vs_hosted_with_fcp_lcp.sql

* Update sql/2021/fonts/fonts_format.sql

Co-authored-by: Rick Viscomi <[email protected]>

* Update sql/2021/fonts/fonts_format.sql

Co-authored-by: Rick Viscomi <[email protected]>

* Update sql/2021/fonts/variable_font_axes_used.sql

Co-authored-by: Rick Viscomi <[email protected]>

Co-authored-by: Rick Viscomi <[email protected]>
  • Loading branch information
konfirmed and rviscomi authored Oct 4, 2021
1 parent a892f6d commit 7dce229
Show file tree
Hide file tree
Showing 10 changed files with 400 additions and 0 deletions.
37 changes: 37 additions & 0 deletions sql/2021/fonts/color_fonts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#standardSQL
#color_fonts
SELECT
client,
format,
COUNT(DISTINCT page) AS pages_color,
total_page,
COUNT(DISTINCT page) / total_page AS pct_color
FROM (
SELECT
client,
page,
format,
payload
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_page
FROM
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(client),
# Color fonts have any of sbix, cbdt, svg or colr tables.
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)(sbix|CBDT|SVG|COLR)')) AS format
GROUP BY
client,
total_page,
format
ORDER BY
pages_color DESC
70 changes: 70 additions & 0 deletions sql/2021/fonts/font_display.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#standardSQL
#font_display
CREATE TEMPORARY FUNCTION getFontDisplay(css STRING)
RETURNS ARRAY < STRING > LANGUAGE js AS '''
try {
var reduceValues = (values, rule) => {
if ('rules' in rule) {
return rule.rules.reduce(reduceValues, values);
}
if (!('declarations' in rule)) {
return values;
}
if (rule.type != 'font-face') {
return values;
}
rule.declarations.forEach(d => {
if (d.property.toLowerCase() == 'font-display') {
values.push(d.value);
}
});
return values;
};
var $ = JSON.parse(css);
return $.stylesheet.rules.reduce(reduceValues, []);
} catch (e) {
return [null];
}
''';

SELECT
client,
font_display,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct_display,
APPROX_QUANTILES(fcp, 1000)[OFFSET(500)] AS median_fcp,
APPROX_QUANTILES(lcp, 1000)[OFFSET(500)] AS median_lcp
FROM (
SELECT DISTINCT
client,
page,
font_display
FROM
`httparchive.almanac.parsed_css`
LEFT JOIN
UNNEST(getFontDisplay(css)) AS font_display
WHERE
date = '2021-07-01')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
CAST(JSON_EXTRACT_SCALAR(payload,
"$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp,
CAST(JSON_EXTRACT_SCALAR(payload,
"$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX,
url,
payload)
USING
(client,
page)
GROUP BY
client,
font_display
ORDER BY
pages DESC
20 changes: 20 additions & 0 deletions sql/2021/fonts/fonts_format.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#standardSQL
#font_formats
SELECT
client,
LOWER(IFNULL(REGEXP_EXTRACT(mimeType, '/(?:x-)?(?:font-)?(.*)'), ext)) AS mime_type,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font' AND
mimeType != ''
GROUP BY
client,
mime_type
ORDER BY
client,
pct DESC
48 changes: 48 additions & 0 deletions sql/2021/fonts/popular_typeface.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#standardSQL
#popular_typeface
CREATE TEMPORARY FUNCTION getFontFamilies(css STRING)
RETURNS ARRAY <STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(css);
return $.stylesheet.rules.filter(rule => rule.type == 'font-face').map(rule => {
var family = rule.declarations && rule.declarations.find(d => d.property == 'font-family');
return family && family.value.replace(/[\'"]/g, '');
}).filter(family => family);
} catch (e) {
return [];
}
''';

SELECT
client,
font_family,
pages,
total,
pages / total AS pct
FROM (
SELECT
client,
font_family,
COUNT(DISTINCT page) AS pages
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getFontFamilies(css)) AS font_family
WHERE
date = '2021-07-01'
GROUP BY
client,
font_family)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
client)
USING
(client)
WHERE
pages / total >= 0.004
ORDER BY
pct DESC
42 changes: 42 additions & 0 deletions sql/2021/fonts/self_hosted_vs_hosted_with_fcp_lcp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#standardSQL #self_hosted_vs_hosted_with_fcp
SELECT
client,
CASE
WHEN pct_self_hosted_hosted = 1 THEN 'self-hosted'
WHEN pct_self_hosted_hosted = 0 THEN 'external'
ELSE 'both'
END AS font_host,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct,
APPROX_QUANTILES(fcp, 1000)[OFFSET(500)] AS median_fcp,
APPROX_QUANTILES(lcp, 1000)[OFFSET(500)] AS median_lcp
FROM (
SELECT
client,
page,
COUNTIF(NET.HOST(page) = NET.HOST(url)) / COUNT(0) AS pct_self_hosted_hosted
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font'
GROUP BY
client,
page)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp
FROM
`httparchive.pages.2021_07_01_*`)
USING
(client, page)
GROUP BY
client,
font_host
ORDER BY
font_host,
client
53 changes: 53 additions & 0 deletions sql/2021/fonts/variable_font_adoption.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#standardSQL
#variable_font_with_fcp
CREATE TEMP FUNCTION getName(font_details STRING) RETURNS STRING LANGUAGE js AS '''
try {
const metadata = RegExp('(not to be used for anything other than web font use!|web use only|web_use_only|:|;|^google$|copyright|©|(c)|rights reserved|published by|generated by|property of|trademark|version|v\\d+|release|untitled|^bold$|^light$|^semibold$|^defaults$|^normal$|^regular$|^[a-f0-9]+$|Vernon Adams|Jan Kovarik|Jan Kovarik|Mark Simonson|Paul D. Hunt|Kai Bernau|Kris Sowersby|Joshua Darden|Jos Buivenga|Yugo Kajiwara|Moslem Ebrahimi|Hadrien Boyer|Russell Benson|Ryan Martinson|Joen Asmussen|Olivier Gourvat|Hannes von Doehren|René Bieder|House Industries|GoDaddy|TypeSquare|Dalton Maag Ltd|_null_name_substitute_|^font$|Moveable Type)', 'i')
return Object.values(JSON.parse(font_details).names).find(name => {
name = name.trim();
return name.length > 2 &&
!metadata.test(name) &&
isNaN(Number(name));
});
} catch (e) {
return null;
}
''';
SELECT
client,
name,
COUNT(DISTINCT page) AS freq_vf,
total_page,
COUNT(DISTINCT page) / total_page AS pct_vf
FROM (
SELECT
client,
page,
getName(JSON_EXTRACT(payload, '$._font_details')) AS name
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font' AND
REGEXP_CONTAINS(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)gvar'))
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
COUNT(0) AS total_page
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX, url, payload)
USING
(client, page)
WHERE
name IS NOT NULL
GROUP BY
client,
name,
total_page
HAVING
freq_vf > 100
ORDER BY
freq_vf DESC
41 changes: 41 additions & 0 deletions sql/2021/fonts/variable_font_axes_used.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#standardSQL
#VF_axis_value
CREATE TEMPORARY FUNCTION getFontVariationSettings(css STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var reduceValues = (values, rule) => {
if ('rules' in rule) {
return rule.rules.reduce(reduceValues, values);
}
if (!('declarations' in rule)) {
return values;
}
return values.concat(rule.declarations.filter(d => d.property.toLowerCase() == 'font-variation-settings').map(d => d.value));
};
var $ = JSON.parse(css);
return $.stylesheet.rules.reduce(reduceValues, []);
} catch (e) {
return [];
}
''';
SELECT
client,
REGEXP_EXTRACT(LOWER(values), '[\'"]([\\w]{4})[\'"]') AS axis,
CAST(REGEXP_EXTRACT(value, '\\d+') AS NUMERIC) AS num_axis,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getFontVariationSettings(css)) AS value,
UNNEST(SPLIT(value, ',')) AS values
WHERE
date = '2021-07-01'
GROUP BY
client,
axis,
num_axis
HAVING
axis IS NOT NULL
ORDER BY
pages DESC
41 changes: 41 additions & 0 deletions sql/2021/fonts/web_font_usage_breakdown_with_fcp_lcp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#standardSQL
#web_font_usage_breakdown_with_fcp_lcp
SELECT
client,
NET.HOST(url) AS host,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct,
APPROX_QUANTILES(fcp, 1000)[OFFSET(500)] AS median_fcp,
APPROX_QUANTILES(lcp, 1000)[OFFSET(500)] AS median_lcp
FROM (
SELECT
client,
page,
url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font' AND
NET.HOST(page) != NET.HOST(url)
GROUP BY
client, url,
page)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp
FROM
`httparchive.pages.2021_07_01_*`)
USING
(client, page)
GROUP BY
client,
host
HAVING
pages >= 1000
ORDER BY
pct DESC
19 changes: 19 additions & 0 deletions sql/2021/fonts/web_fonts_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#standardSQL
#web_fonts_usage
SELECT
IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d+)_(\d+)_(\d+).*', r'\1-\2-\3') AS date,
COUNTIF(reqFont > 0) AS freq_fonts,
COUNT(0) AS total,
COUNTIF(reqFont > 0) / COUNT(0) AS pct_fonts
FROM
`httparchive.summary_pages.*`
WHERE
reqFont IS NOT NULL AND
bytesFont IS NOT NULL
GROUP BY
client,
date
ORDER BY
date DESC,
client
Loading

0 comments on commit 7dce229

Please sign in to comment.