Skip to content

Commit

Permalink
Media 2021 queries - video (#2392)
Browse files Browse the repository at this point in the history
* video initial queries

* Run sqlfluff fix on the queries

* more formating

* more formating

* Final linting fixes?

* git is hard

* upated tables

* fixed last query

* linter fixes- i hope

* linter fixes- i hope

* linter fixes- i hope

* linter fixes- i hope

Co-authored-by: Barry <[email protected]>
  • Loading branch information
dougsillars and tunetheweb authored Nov 8, 2021
1 parent b67766b commit 0416e8f
Show file tree
Hide file tree
Showing 13 changed files with 458 additions and 0 deletions.
16 changes: 16 additions & 0 deletions sql/2021/media/siteswithvideo_desktop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
SELECT
count(pageURL)
FROM (
SELECT
url AS pageURL,
JSON_VALUE( payload, "$._media" ) AS media,
CAST( JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
( JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM `httparchive.pages.2021_07_01_desktop`
)
WHERE
num_video_nodes > 0
16 changes: 16 additions & 0 deletions sql/2021/media/siteswithvideo_mobile.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
SELECT
count(pageURL)
FROM (
SELECT
url AS pageURL,
JSON_VALUE( payload, "$._media" ) AS media,
CAST( JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
( JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
( JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM `httparchive.pages.2021_07_01_mobile`
)
WHERE
num_video_nodes > 0
41 changes: 41 additions & 0 deletions sql/2021/media/video_attribute_count_desktop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#standardSQL

WITH videonotes AS (

SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
ARRAY_TO_STRING(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts"), " ") AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_desktop`
),

video_attributes AS (
SELECT
pageURL,
JSON_VALUE(video_attributes_values_counts, "$.attribute") AS attribute,
JSON_VALUE(video_attributes_values_counts, "$.value") AS value,
cast(JSON_VALUE(video_attributes_values_counts, "$.count") AS int64) AS cnt,
video_attributes_values_counts
FROM
videonotes
WHERE
num_video_nodes > 0
)

SELECT
attribute,
value,
SUM(cnt) AS freq
FROM
video_attributes
GROUP BY
attribute,
value
ORDER BY
attribute ASC
44 changes: 44 additions & 0 deletions sql/2021/media/video_attribute_count_mobile.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#standardSQL

WITH videonotes AS (

SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
ARRAY_TO_STRING(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts"), " ") AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_mobile`
),


video_attributes AS (
SELECT
pageURL,
JSON_VALUE(video_attributes_values_counts, "$.attribute") AS attribute,
JSON_VALUE(video_attributes_values_counts, "$.value") AS value,
cast(JSON_VALUE(video_attributes_values_counts, "$.count") AS int64) AS cnt,
video_attributes_values_counts
FROM
videonotes
WHERE
num_video_nodes > 0
)


SELECT
attribute,
value,
SUM(cnt) AS freq
FROM
video_attributes
GROUP BY
attribute,
value
ORDER BY
attribute ASC

52 changes: 52 additions & 0 deletions sql/2021/media/video_durations_desktop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#standardSQL

WITH videonotes AS (
SELECT
pageURL,
CAST(durations AS FLOAT64) AS durations,
CASE
WHEN CAST(durations AS FLOAT64) <= 1 THEN "under1"
WHEN (CAST(durations AS FLOAT64) > 1 AND CAST(durations AS FLOAT64) <= 5) THEN "under5"
WHEN (CAST(durations AS FLOAT64) > 5 AND CAST(durations AS FLOAT64) <= 10) THEN "under10"
WHEN (CAST(durations AS FLOAT64) > 10 AND CAST(durations AS FLOAT64) <= 20) THEN "under20"
WHEN (CAST(durations AS FLOAT64) > 20 AND CAST(durations AS FLOAT64) <= 30) THEN "under30"
WHEN (CAST(durations AS FLOAT64) > 30 AND CAST(durations AS FLOAT64) <= 45) THEN "under45"
WHEN (CAST(durations AS FLOAT64) > 45 AND CAST(durations AS FLOAT64) <= 60) THEN "under60"
WHEN (CAST(durations AS FLOAT64) > 60 AND CAST(durations AS FLOAT64) <= 90) THEN "under90"
WHEN (CAST(durations AS FLOAT64) > 90 AND CAST(durations AS FLOAT64) <= 120) THEN "under120"
WHEN (CAST(durations AS FLOAT64) > 120 AND CAST(durations AS FLOAT64) <= 180) THEN "under180"
WHEN (CAST(durations AS FLOAT64) > 180 AND CAST(durations AS FLOAT64) <= 300) THEN "under300"
WHEN (CAST(durations AS FLOAT64) > 300 AND CAST(durations AS FLOAT64) <= 600) THEN "under600"
ELSE "over600"
END AS duration_bucket
FROM (
SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_desktop`
)
CROSS JOIN
UNNEST(video_duration) AS durations
WHERE
num_video_nodes > 0 AND
durations != "null"
ORDER BY
durations DESC
)

SELECT
duration_bucket,
COUNT(duration_bucket) AS freq
FROM
videonotes
GROUP BY
duration_bucket
ORDER BY
duration_bucket ASC
53 changes: 53 additions & 0 deletions sql/2021/media/video_durations_mobile.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#standardSQL

WITH videonotes AS (
SELECT
pageURL,
CAST(durations AS FLOAT64) AS durations,
CASE
WHEN CAST(durations AS FLOAT64) <= 1 THEN "under1"
WHEN (CAST(durations AS FLOAT64) > 1 AND CAST(durations AS FLOAT64) <= 5) THEN "under5"
WHEN (CAST(durations AS FLOAT64) > 5 AND CAST(durations AS FLOAT64) <= 10) THEN "under10"
WHEN (CAST(durations AS FLOAT64) > 10 AND CAST(durations AS FLOAT64) <= 20) THEN "under20"
WHEN (CAST(durations AS FLOAT64) > 20 AND CAST(durations AS FLOAT64) <= 30) THEN "under30"
WHEN (CAST(durations AS FLOAT64) > 30 AND CAST(durations AS FLOAT64) <= 45) THEN "under45"
WHEN (CAST(durations AS FLOAT64) > 45 AND CAST(durations AS FLOAT64) <= 60) THEN "under60"
WHEN (CAST(durations AS FLOAT64) > 60 AND CAST(durations AS FLOAT64) <= 90) THEN "under90"
WHEN (CAST(durations AS FLOAT64) > 90 AND CAST(durations AS FLOAT64) <= 120) THEN "under120"
WHEN (CAST(durations AS FLOAT64) > 120 AND CAST(durations AS FLOAT64) <= 180) THEN "under180"
WHEN (CAST(durations AS FLOAT64) > 180 AND CAST(durations AS FLOAT64) <= 300) THEN "under300"
WHEN (CAST(durations AS FLOAT64) > 300 AND CAST(durations AS FLOAT64) <= 600) THEN "under600"
ELSE "over600"
END AS duration_bucket
FROM (
SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_mobile`
)
CROSS JOIN
UNNEST(video_duration) AS durations
WHERE
num_video_nodes > 0 AND
durations != "null"
ORDER BY
durations DESC
)


SELECT
duration_bucket,
COUNT(duration_bucket) AS freq
FROM
videonotes
GROUP BY
duration_bucket
ORDER BY
duration_bucket ASC
5 changes: 5 additions & 0 deletions sql/2021/media/video_ext.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
SELECT _TABLE_SUFFIX AS client, ext, COUNT(ext) AS cnt
FROM `httparchive.summary_requests.2021_07_01_*`
WHERE mimetype LIKE "%video%"
GROUP BY client, ext
ORDER BY cnt DESC
39 changes: 39 additions & 0 deletions sql/2021/media/video_number_of_sources_desktop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#standardSQL

WITH videonotes AS (
SELECT
pageURL,
num_video_nodes,
video_source_format_count,
source_count,
video_source_format_type
FROM (

SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_desktop`
)
CROSS JOIN
UNNEST(video_source_format_count) AS source_count

)

SELECT
cast(source_count AS int64) AS source_counter,
COUNT(cast(source_count AS int64)) AS numberofoccurances
FROM
videonotes
WHERE
num_video_nodes > 0
GROUP BY
source_count
ORDER BY
source_counter DESC
37 changes: 37 additions & 0 deletions sql/2021/media/video_number_of_sources_mobile.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#standardSQL

WITH videonotes AS (
SELECT
pageURL,
num_video_nodes,
video_source_format_count,
source_count,
video_source_format_type
FROM (
SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_mobile`
)
CROSS JOIN
UNNEST(video_source_format_count) AS source_count
)

SELECT
cast(source_count AS int64) AS source_counter,
COUNT(cast(source_count AS int64)) AS numberofoccurances
FROM
videonotes
WHERE
num_video_nodes > 0
GROUP BY
source_count
ORDER BY
source_counter DESC
42 changes: 42 additions & 0 deletions sql/2021/media/video_source_formats_desktop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#standardSQL

WITH videonotes AS (
SELECT
pageURL,
num_video_nodes,
video_source_format_type,
source_formats,
video_source_format_type,
source_format_count
FROM (
SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_desktop`
)
CROSS JOIN
UNNEST(video_source_format_type) AS source_formats
CROSS JOIN
UNNEST(video_source_format_count) AS source_format_count
)


SELECT
source_formats,
COUNT(source_formats) AS numberofoccurances
FROM
videonotes
WHERE
num_video_nodes > 0
GROUP BY
source_formats
ORDER BY
numberofoccurances DESC

43 changes: 43 additions & 0 deletions sql/2021/media/video_source_formats_mobile.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#standardSQL

WITH videonotes AS (
SELECT
pageURL,
num_video_nodes,
video_source_format_type,
source_formats,
video_source_format_type,
source_format_count
FROM (
SELECT
url AS pageURL,
JSON_VALUE(payload, "$._media") AS media,
CAST(JSON_VALUE(JSON_VALUE(payload, "$._media"), "$.num_video_nodes") AS INT64) AS num_video_nodes,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_durations")) AS video_duration,
(JSON_QUERY(JSON_VALUE(payload, "$._media"), "$.video_display_style")) AS video_display_style,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_attributes_values_counts")) AS video_attributes_values_counts,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_count")) AS video_source_format_count,
(JSON_QUERY_ARRAY(JSON_VALUE(payload, "$._media"), "$.video_source_format_type")) AS video_source_format_type
FROM
`httparchive.pages.2021_07_01_mobile`
)
CROSS JOIN
UNNEST(video_source_format_type) AS source_formats
CROSS JOIN
UNNEST(video_source_format_count) AS source_format_count

)


SELECT
source_formats,
COUNT(source_formats) AS numberofoccurances
FROM
videonotes
WHERE
num_video_nodes > 0
GROUP BY
source_formats
ORDER BY
numberofoccurances DESC

Loading

0 comments on commit 0416e8f

Please sign in to comment.