From bd373e90b0079f57af0213b2c0d7344d08ca41c7 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 16 Jul 2024 01:48:09 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20(csv=20download):=20WIP=20set=20?= =?UTF-8?q?up=20readme=20and=20metadata?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- functions/_common/grapherRenderer.ts | 110 +++++++++++++++++++++++++++ functions/grapher/[slug].ts | 84 +++++++++++++++++--- functions/package.json | 1 + yarn.lock | 83 +++++++++++++++++++- 4 files changed, 266 insertions(+), 12 deletions(-) diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 37d19951b3..fe59bef20b 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -174,6 +174,71 @@ async function initGrapher( return grapher } +export async function fetchMetadataForGrapher( + slug: string, + env: Env, + searchParams?: URLSearchParams +) { + const grapherLogger = new TimeLogger("grapher") + console.log("Initializing grapher") + const grapher = await initGrapher( + { + slug, + options: TWITTER_OPTIONS, + searchParams: searchParams ?? new URLSearchParams(""), + env, + }, + grapherLogger + ) + console.log("Downloading data") + await grapher.downloadLegacyDataFromOwidVariableIds() + console.log("Getting defs") + const defs = grapher.inputTable + .getColumns(grapher.inputTable.columnNames) + .map((col) => col.def) + console.log("Returning response") + return new Response(JSON.stringify(defs), { + headers: { + "Content-Type": "application/json", + }, + }) +} + +export async function fetchZipForGrapher( + slug: string, + env: Env, + searchParams?: URLSearchParams +) { + const grapherLogger = new TimeLogger("grapher") + const grapher = await initGrapher( + { + slug, + options: TWITTER_OPTIONS, + searchParams: searchParams ?? new URLSearchParams(""), + env, + }, + grapherLogger + ) + await grapher.downloadLegacyDataFromOwidVariableIds() + const defs = grapher.inputTable + .getColumns(grapher.inputTable.columnNames) + .map((col) => col.def) + const table = + searchParams.get("csvType") === "filtered" + ? grapher.transformedTable + : grapher.inputTable + const json = JSON.stringify(defs) + const zip = new JSZip() + zip.file("metadata.json", json) + zip.file("data.csv", table.toPrettyCsv()) + const content = await zip.generateAsync({ type: "blob" }) + return new Response(content, { + headers: { + "Content-Type": "application/zip", + }, + }) +} + export async function fetchCsvForGrapher( slug: string, env: Env, @@ -201,6 +266,51 @@ export async function fetchCsvForGrapher( }) } +export async function fetchReadmeForGrapher( + slug: string, + env: Env, + searchParams?: URLSearchParams +) { + console.log("Initializing grapher") + const grapherLogger = new TimeLogger("grapher") + const grapher = await initGrapher( + { + slug, + options: TWITTER_OPTIONS, + searchParams: searchParams ?? new URLSearchParams(""), + env, + }, + grapherLogger + ) + console.log("Downloading data") + await grapher.downloadLegacyDataFromOwidVariableIds() + console.log("Getting defs") + const sources = grapher.inputTable + .getColumns(grapher.inputTable.columnNames) + .map( + (col) => `{## ${col.def.name}} +${col.def.description} + ` + ) + console.log("Returning response") + + const readme = `# ${grapher.title} - Data package + +This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. +The source of this data is ${grapher.sourceDesc}. + +## Individual time series information + +${sources.join("\n")} + + ` + return new Response(readme, { + headers: { + "Content-Type": "text/markdown", + }, + }) +} + async function fetchAndRenderGrapherToSvg({ slug, options, diff --git a/functions/grapher/[slug].ts b/functions/grapher/[slug].ts index 13b568e6d7..72f8bed558 100644 --- a/functions/grapher/[slug].ts +++ b/functions/grapher/[slug].ts @@ -1,6 +1,19 @@ import { IRequestStrict, Router, error } from "itty-router" -import { fetchCsvForGrapher } from "../_common/grapherRenderer.js" +import { + fetchCsvForGrapher, + fetchMetadataForGrapher, + fetchZipForGrapher, + fetchReadmeForGrapher, +} from "../_common/grapherRenderer.js" import { Env } from "./thumbnail/[slug].js" + +enum PageType { + grapher = "grapher", + csv = "csv", + metadata = "metadata", + readme = "readme", + zip = "zip", +} export const onRequestGet: PagesFunction = async (context) => { // Makes it so that if there's an error, we will just deliver the original page before the HTML rewrite. // Only caveat is that redirects will not be taken into account for some reason; but on the other hand the worker is so simple that it's unlikely to fail. @@ -28,14 +41,42 @@ export const onRequestGet: PagesFunction = async (context) => { const { request, env, params } = context const url = new URL(request.url) - const isCsvRequest = url.pathname.endsWith(".csv") - const createRedirectResponse = (redirSlug: string, currentUrl: URL) => - new Response(null, { + let pageType = PageType.grapher + if (url.pathname.endsWith(".csv")) { + pageType = PageType.csv + } + if (url.pathname.endsWith(".zip")) { + pageType = PageType.zip + } + if (url.pathname.endsWith(".metadata.json")) { + pageType = PageType.metadata + } + if (url.pathname.endsWith(".readme.md")) { + // eventually this not be accessible outside the zip file + pageType = PageType.readme + } + + const createRedirectResponse = (redirSlug: string, currentUrl: URL) => { + let extension = "" + if (pageType === PageType.csv) { + extension = ".csv" + } + if (pageType === PageType.zip) { + extension = ".zip" + } + if (pageType === PageType.metadata) { + extension = ".metadata.json" + } + if (pageType === PageType.readme) { + extension = "readme.md" + } + return new Response(null, { status: 302, headers: { - Location: `/grapher/${redirSlug}${isCsvRequest ? ".csv" : ""}${currentUrl.search}`, + Location: `/grapher/${redirSlug}${extension}${currentUrl.search}`, }, }) + } const originalSlug = params.slug as string @@ -69,8 +110,19 @@ export const onRequestGet: PagesFunction = async (context) => { const grapherUrl = new URL(request.url) // if we have a csv url, then create a new url without the csv extension but keeping the query params // this is to check if the page exists and to redirect to the correct page if it does - if (isCsvRequest) { - grapherUrl.pathname = url.pathname.replace(/\.csv$/, "") + if (pageType !== PageType.grapher) { + if (pageType === PageType.csv) { + grapherUrl.pathname = url.pathname.replace(/\.csv$/, "") + } + if (pageType === PageType.zip) { + grapherUrl.pathname = url.pathname.replace(/\.zip$/, "") + } + if (pageType === PageType.metadata) { + grapherUrl.pathname = url.pathname.replace(/\.metadata.json$/, "") + } + if (pageType === PageType.readme) { + grapherUrl.pathname = url.pathname.replace(/\.readme\.md$/, "") + } } const grapherPageResp = await env.ASSETS.fetch(grapherUrl, { @@ -135,8 +187,7 @@ export const onRequestGet: PagesFunction = async (context) => { return rewriter.transform(grapherPageResp) } - const shouldCache = - !url.searchParams.has("nocache") && context.request.url.endsWith(".csv") + const shouldCache = !url.searchParams.has("nocache") const cache = caches.default if (shouldCache) { @@ -153,6 +204,21 @@ export const onRequestGet: PagesFunction = async (context) => { async ({ params: { slug } }, { searchParams }, env) => fetchCsvForGrapher(slug, env, searchParams) // pass undefined if we want the full csv ) + .get( + "/grapher/:slug.metadata.json", + async ({ params: { slug } }, { searchParams }, env) => + fetchMetadataForGrapher(slug, env, searchParams) // pass undefined if we want the full csv + ) + .get( + "/grapher/:slug.zip", + async ({ params: { slug } }, { searchParams }, env) => + fetchZipForGrapher(slug, env, searchParams) // pass undefined if we want the full csv + ) + .get( + "/grapher/:slug.readme.md", + async ({ params: { slug } }, { searchParams }, env) => + fetchReadmeForGrapher(slug, env, searchParams) // pass undefined if we want the full csv + ) .get( "/grapher/:slug", async ({ params: { slug } }, { searchParams }, env) => diff --git a/functions/package.json b/functions/package.json index ee9979ff2f..a4f1240e46 100644 --- a/functions/package.json +++ b/functions/package.json @@ -4,6 +4,7 @@ "@ourworldindata/grapher": "workspace:^", "@ourworldindata/utils": "workspace:^", "itty-router": "^5.0.17", + "jszip": "^3.10.1", "stripe": "^14.20.0", "svg2png-wasm": "^1.4.1" }, diff --git a/yarn.lock b/yarn.lock index 275b17579f..5535693d4d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2949,7 +2949,7 @@ __metadata: languageName: node linkType: hard -"@npmcli/package-json@npm:5.2.0, @npmcli/package-json@npm:^5.0.0, @npmcli/package-json@npm:^5.1.0": +"@npmcli/package-json@npm:5.2.0, @npmcli/package-json@npm:^5.1.0": version: 5.2.0 resolution: "@npmcli/package-json@npm:5.2.0" dependencies: @@ -2964,6 +2964,21 @@ __metadata: languageName: node linkType: hard +"@npmcli/package-json@npm:^5.0.0": + version: 5.1.0 + resolution: "@npmcli/package-json@npm:5.1.0" + dependencies: + "@npmcli/git": "npm:^5.0.0" + glob: "npm:^10.2.2" + hosted-git-info: "npm:^7.0.0" + json-parse-even-better-errors: "npm:^3.0.0" + normalize-package-data: "npm:^6.0.0" + proc-log: "npm:^4.0.0" + semver: "npm:^7.5.3" + checksum: 10/0e5cb5eff32cf80234525160a702c91a38e4b98ab74e34e2632b43c4350dbad170bd835989cc7d6e18d24798e3242e45b60f3d5e26bd128fe1c4529931105f8e + languageName: node + linkType: hard + "@npmcli/promise-spawn@npm:^7.0.0": version: 7.0.2 resolution: "@npmcli/promise-spawn@npm:7.0.2" @@ -8533,7 +8548,7 @@ __metadata: languageName: node linkType: hard -"dedent@npm:1.5.3, dedent@npm:^1.0.0": +"dedent@npm:1.5.3": version: 1.5.3 resolution: "dedent@npm:1.5.3" peerDependencies: @@ -8545,6 +8560,18 @@ __metadata: languageName: node linkType: hard +"dedent@npm:^1.0.0": + version: 1.5.1 + resolution: "dedent@npm:1.5.1" + peerDependencies: + babel-plugin-macros: ^3.1.0 + peerDependenciesMeta: + babel-plugin-macros: + optional: true + checksum: 10/fc00a8bc3dfb7c413a778dc40ee8151b6c6ff35159d641f36ecd839c1df5c6e0ec5f4992e658c82624a1a62aaecaffc23b9c965ceb0bbf4d698bfc16469ac27d + languageName: node + linkType: hard + "deep-extend@npm:^0.6.0": version: 0.6.0 resolution: "deep-extend@npm:0.6.0" @@ -8818,7 +8845,14 @@ __metadata: languageName: node linkType: hard -"dotenv@npm:^16.0.1, dotenv@npm:^16.0.3, dotenv@npm:^16.4.4, dotenv@npm:~16.4.5": +"dotenv@npm:^16.0.1, dotenv@npm:^16.0.3": + version: 16.3.1 + resolution: "dotenv@npm:16.3.1" + checksum: 10/dbb778237ef8750e9e3cd1473d3c8eaa9cc3600e33a75c0e36415d0fa0848197f56c3800f77924c70e7828f0b03896818cd52f785b07b9ad4d88dba73fbba83f + languageName: node + linkType: hard + +"dotenv@npm:^16.4.4, dotenv@npm:~16.4.5": version: 16.4.5 resolution: "dotenv@npm:16.4.5" checksum: 10/55a3134601115194ae0f924e54473459ed0d9fc340ae610b676e248cca45aa7c680d86365318ea964e6da4e2ea80c4514c1adab5adb43d6867fb57ff068f95c8 @@ -11511,6 +11545,13 @@ __metadata: languageName: node linkType: hard +"immediate@npm:~3.0.5": + version: 3.0.6 + resolution: "immediate@npm:3.0.6" + checksum: 10/f9b3486477555997657f70318cc8d3416159f208bec4cca3ff3442fd266bc23f50f0c9bd8547e1371a6b5e82b821ec9a7044a4f7b944798b25aa3cc6d5e63e62 + languageName: node + linkType: hard + "immutable@npm:^4.0.0, immutable@npm:^4.3.6": version: 4.3.6 resolution: "immutable@npm:4.3.6" @@ -13121,6 +13162,18 @@ __metadata: languageName: node linkType: hard +"jszip@npm:^3.10.1": + version: 3.10.1 + resolution: "jszip@npm:3.10.1" + dependencies: + lie: "npm:~3.3.0" + pako: "npm:~1.0.2" + readable-stream: "npm:~2.3.6" + setimmediate: "npm:^1.0.5" + checksum: 10/bfbfbb9b0a27121330ac46ab9cdb3b4812433faa9ba4a54742c87ca441e31a6194ff70ae12acefa5fe25406c432290e68003900541d948a169b23d30c34dd984 + languageName: node + linkType: hard + "just-diff-apply@npm:^5.2.0": version: 5.5.0 resolution: "just-diff-apply@npm:5.5.0" @@ -13386,6 +13439,15 @@ __metadata: languageName: node linkType: hard +"lie@npm:~3.3.0": + version: 3.3.0 + resolution: "lie@npm:3.3.0" + dependencies: + immediate: "npm:~3.0.5" + checksum: 10/f335ce67fe221af496185d7ce39c8321304adb701e122942c495f4f72dcee8803f9315ee572f5f8e8b08b9e8d7195da91b9fad776e8864746ba8b5e910adf76e + languageName: node + linkType: hard + "lilconfig@npm:3.0.0": version: 3.0.0 resolution: "lilconfig@npm:3.0.0" @@ -15191,6 +15253,7 @@ __metadata: "@ourworldindata/grapher": "workspace:^" "@ourworldindata/utils": "workspace:^" itty-router: "npm:^5.0.17" + jszip: "npm:^3.10.1" stripe: "npm:^14.20.0" svg2png-wasm: "npm:^1.4.1" languageName: unknown @@ -15366,6 +15429,13 @@ __metadata: languageName: node linkType: hard +"pako@npm:~1.0.2": + version: 1.0.11 + resolution: "pako@npm:1.0.11" + checksum: 10/1ad07210e894472685564c4d39a08717e84c2a68a70d3c1d9e657d32394ef1670e22972a433cbfe48976cb98b154ba06855dcd3fcfba77f60f1777634bec48c0 + languageName: node + linkType: hard + "papaparse@npm:^5.3.1": version: 5.3.1 resolution: "papaparse@npm:5.3.1" @@ -17967,6 +18037,13 @@ __metadata: languageName: node linkType: hard +"setimmediate@npm:^1.0.5": + version: 1.0.5 + resolution: "setimmediate@npm:1.0.5" + checksum: 10/76e3f5d7f4b581b6100ff819761f04a984fa3f3990e72a6554b57188ded53efce2d3d6c0932c10f810b7c59414f85e2ab3c11521877d1dea1ce0b56dc906f485 + languageName: node + linkType: hard + "setprototypeof@npm:1.2.0": version: 1.2.0 resolution: "setprototypeof@npm:1.2.0"