diff --git a/package.json b/package.json index 2639f1b..df34fb4 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "bugs": { "url": "https://github.com/fensak-io/reng/issues" }, + "type": "module", "source": "src/index.ts", "main": "./dist/index.cjs", "module": "./dist/module.mjs", @@ -44,10 +45,15 @@ "@babel/preset-typescript": "^7.23.2", "@fensak-io/front-matter": "^1.0.0", "@octokit/rest": "^20.0.2", - "babel-preset-minify": "^0.5.2" + "babel-preset-minify": "^0.5.2", + "json5": "^2.2.3", + "microdiff": "^1.3.2", + "toml": "^3.0.0", + "yaml": "^2.3.3" }, "devDependencies": { "@jest/globals": "^29.7.0", + "@octokit/types": "^12.1.0", "@parcel/config-default": "2.9.3", "@parcel/packager-ts": "2.9.3", "@parcel/transformer-typescript-types": "2.9.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7567f78..41fb661 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,11 +23,26 @@ dependencies: babel-preset-minify: specifier: ^0.5.2 version: 0.5.2 + json5: + specifier: ^2.2.3 + version: 2.2.3 + microdiff: + specifier: ^1.3.2 + version: 1.3.2 + toml: + specifier: ^3.0.0 + version: 3.0.0 + yaml: + specifier: ^2.3.3 + version: 2.3.3 devDependencies: '@jest/globals': specifier: ^29.7.0 version: 29.7.0 + '@octokit/types': + specifier: ^12.1.0 + version: 12.1.0 '@parcel/config-default': specifier: 2.9.3 version: 2.9.3(@parcel/core@2.9.3)(typescript@5.2.2) @@ -1750,7 +1765,7 @@ packages: '@octokit/graphql': 7.0.2 '@octokit/request': 8.1.2 '@octokit/request-error': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 before-after-hook: 2.2.3 universal-user-agent: 6.0.0 @@ -1758,7 +1773,7 @@ packages: resolution: {integrity: sha512-hRlOKAovtINHQPYHZlfyFwaM8OyetxeoC81lAkBy34uLb8exrZB50SQdeW3EROqiY9G9yxQTpp5OHTV54QD+vA==} engines: {node: '>= 18'} dependencies: - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 is-plain-object: 5.0.0 universal-user-agent: 6.0.0 @@ -1767,11 +1782,11 @@ packages: engines: {node: '>= 18'} dependencies: '@octokit/request': 8.1.2 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 universal-user-agent: 6.0.0 - /@octokit/openapi-types@19.0.0: - resolution: {integrity: sha512-PclQ6JGMTE9iUStpzMkwLCISFn/wDeRjkZFIKALpvJQNBGwDoYYi2fFvuHwssoQ1rXI5mfh6jgTgWuddeUzfWw==} + /@octokit/openapi-types@19.0.1: + resolution: {integrity: sha512-zC+73r2HIoRb9rWW5S3Y759hrpadlD5pNnya/QfZv0JZE7mvMu+FUa7nxHqTadi2hZc4BPZjJ8veDTuJnh8+8g==} /@octokit/plugin-paginate-rest@9.0.0(@octokit/core@5.0.1): resolution: {integrity: sha512-oIJzCpttmBTlEhBmRvb+b9rlnGpmFgDtZ0bB6nq39qIod6A5DP+7RkVLMOixIgRCYSHDTeayWqmiJ2SZ6xgfdw==} @@ -1780,7 +1795,7 @@ packages: '@octokit/core': '>=5' dependencies: '@octokit/core': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 /@octokit/plugin-request-log@4.0.0(@octokit/core@5.0.1): resolution: {integrity: sha512-2uJI1COtYCq8Z4yNSnM231TgH50bRkheQ9+aH8TnZanB6QilOnx8RMD2qsnamSOXtDj0ilxvevf5fGsBhBBzKA==} @@ -1798,7 +1813,7 @@ packages: '@octokit/core': '>=5' dependencies: '@octokit/core': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 dev: false /@octokit/plugin-retry@6.0.1(@octokit/core@5.0.1): @@ -1809,7 +1824,7 @@ packages: dependencies: '@octokit/core': 5.0.1 '@octokit/request-error': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 bottleneck: 2.19.5 dev: true @@ -1820,7 +1835,7 @@ packages: '@octokit/core': ^5.0.0 dependencies: '@octokit/core': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 bottleneck: 2.19.5 dev: true @@ -1828,7 +1843,7 @@ packages: resolution: {integrity: sha512-X7pnyTMV7MgtGmiXBwmO6M5kIPrntOXdyKZLigNfQWSEQzVxR4a4vo49vJjTWX70mPndj8KhfT4Dx+2Ng3vnBQ==} engines: {node: '>= 18'} dependencies: - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 deprecation: 2.3.1 once: 1.4.0 @@ -1838,7 +1853,7 @@ packages: dependencies: '@octokit/endpoint': 9.0.1 '@octokit/request-error': 5.0.1 - '@octokit/types': 12.0.0 + '@octokit/types': 12.1.0 is-plain-object: 5.0.0 universal-user-agent: 6.0.0 @@ -1852,10 +1867,10 @@ packages: '@octokit/plugin-rest-endpoint-methods': 10.0.0(@octokit/core@5.0.1) dev: false - /@octokit/types@12.0.0: - resolution: {integrity: sha512-EzD434aHTFifGudYAygnFlS1Tl6KhbTynEWELQXIbTY8Msvb5nEqTZIm7sbPEt4mQYLZwu3zPKVdeIrw0g7ovg==} + /@octokit/types@12.1.0: + resolution: {integrity: sha512-JmjQr5ZbOnpnOLX5drI2O2I1N9suOYZAgINHXTlVVg4lRtUifMv2JssT+RhmNxQwXH153Pc8HaCMdTRkqI1oVQ==} dependencies: - '@octokit/openapi-types': 19.0.0 + '@octokit/openapi-types': 19.0.1 /@parcel/bundler-default@2.9.3(@parcel/core@2.9.3): resolution: {integrity: sha512-JjJK8dq39/UO/MWI/4SCbB1t/qgpQRFnFDetAAAezQ8oN++b24u1fkMDa/xqQGjbuPmGeTds5zxGgYs7id7PYg==} @@ -5711,6 +5726,10 @@ packages: engines: {node: '>= 8'} dev: true + /microdiff@1.3.2: + resolution: {integrity: sha512-pKy60S2febliZIbwdfEQKTtL5bLNxOyiRRmD400gueYl9XcHyNGxzHSlJWn9IMHwYXT0yohPYL08+bGozVk8cQ==} + dev: false + /micromatch@4.0.5: resolution: {integrity: sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==} engines: {node: '>=8.6'} diff --git a/src/engine/interpreter.test.ts b/src/engine/interpreter.test.ts index 57b6f7f..bfcee43 100644 --- a/src/engine/interpreter.test.ts +++ b/src/engine/interpreter.test.ts @@ -28,6 +28,7 @@ test("sanity check", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -51,6 +52,7 @@ test("sanity check old version", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -74,6 +76,7 @@ test("ES5 minify", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -99,6 +102,7 @@ test("ES6 support", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -129,6 +133,7 @@ function main(inp: IPatch[], metadata: IChangeSetMetadata) { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -281,6 +286,7 @@ test("XMLHTTPRequest not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -307,6 +313,7 @@ test("fetch is not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -331,6 +338,7 @@ test("process is not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, @@ -355,6 +363,7 @@ test("Deno is not supported", async () => { additions: 0, deletions: 0, diff: [], + objectDiff: null, }, ], nullMeta, diff --git a/src/engine/patch_types.ts b/src/engine/patch_types.ts index 9f2ad04..bb02e2a 100644 --- a/src/engine/patch_types.ts +++ b/src/engine/patch_types.ts @@ -1,6 +1,8 @@ // Copyright (c) Fensak, LLC. // SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 +import type { Difference } from "microdiff"; + /** * The operation on a line in a hunk of a patch. * @property Unknown Unknown operation. @@ -66,7 +68,10 @@ export enum PatchOp { * :. * @property path The relative path (from the root of the repo) to the file that was updated in the patch. * @property op The operation that was done on the file in the patch. + * @property additions The number of lines that were added in this patch. + * @property deletions The number of lines that were removed in this patch. * @property diff The list of diffs, organized into hunks. + * @property objectDiff If the file represents a parsable data file (e.g., json, yaml, toml), this will contain the object level diff. */ export interface IPatch { contentsID: string; @@ -75,6 +80,24 @@ export interface IPatch { additions: number; deletions: number; diff: IHunk[]; + objectDiff: IObjectDiff | null; +} + +/** + * Represents a diff of the object representation of a file. The specific diff returns a list of object patches that + * contains the keys that were added, removed, or updated. Note that the difference is only populated for updated + * objects - if the file was inserted or deleted, then the diff will be empty. + * @property previous The object representation of the data in the file before the change. + * @property current The object representation of the data in the file after the change. + * @property diff The difference across the two objects. + */ +export interface IObjectDiff { + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + previous: any; + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + current: any; + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + diff: Difference[]; } /** diff --git a/src/sourcer/from_github.test.ts b/src/sourcer/from_github.test.ts index fe66637..b07e269 100644 --- a/src/sourcer/from_github.test.ts +++ b/src/sourcer/from_github.test.ts @@ -1,7 +1,7 @@ import { expect, test } from "@jest/globals"; import { Octokit } from "@octokit/rest"; -import { IPatch, PatchOp, LineOp } from "../engine/patch_types.ts"; +import { IPatch, PatchOp, LineOp, IObjectDiff } from "../engine/patch_types.ts"; import { IGitHubRepository, @@ -31,11 +31,13 @@ test("a single file change from GitHub is parsed correctly", async () => { }); expect(patches.patchList.length).toEqual(1); + // Check top level patch const patch = patches.patchList[0]; expect(patch.path).toEqual("appversions.json"); expect(patch.op).toEqual(PatchOp.Modified); expect(patch.diff.length).toEqual(1); + // Check patch hunks const hunk = patch.diff[0]; expect(hunk.originalStart).toEqual(1); expect(hunk.originalLength).toEqual(5); @@ -68,6 +70,29 @@ test("a single file change from GitHub is parsed correctly", async () => { newText: "", }, ]); + + // Check object diffs + const maybeObjDiff = patch.objectDiff; + expect(maybeObjDiff).not.toBeNull(); + const objDiff = maybeObjDiff as IObjectDiff; + expect(objDiff.previous).toEqual({ + coreapp: "v0.1.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }); + expect(objDiff.current).toEqual({ + coreapp: "v0.1.0", + subapp: "v1.2.0", + logapp: "v100.1.0", + }); + expect(objDiff.diff).toEqual([ + { + type: "CHANGE", + path: ["subapp"], + value: "v1.2.0", + oldValue: "v1.1.0", + }, + ]); }); test("multiple file changes from GitHub is parsed correctly", async () => { @@ -146,6 +171,29 @@ test("multiple file changes from GitHub is parsed correctly", async () => { newText: "", }, ]); + const maybeJSONObjDiff = jsonPatch.objectDiff; + expect(maybeJSONObjDiff).not.toBeNull(); + const jsonObjDiff = maybeJSONObjDiff as IObjectDiff; + expect(jsonObjDiff).toEqual({ + previous: { + coreapp: "v0.1.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }, + current: { + coreapp: "v0.1.0", + subapp: "v1.2.0", + logapp: "v100.1.0", + }, + diff: [ + { + type: "CHANGE", + path: ["subapp"], + value: "v1.2.0", + oldValue: "v1.1.0", + }, + ], + }); // Check tfvars patch expect(tfvarsPatch.op).toEqual(PatchOp.Modified); @@ -172,6 +220,7 @@ test("multiple file changes from GitHub is parsed correctly", async () => { newText: "", }, ]); + expect(tfvarsPatch.objectDiff).toBeNull(); // Check toml patch expect(tomlPatch.op).toEqual(PatchOp.Modified); @@ -198,6 +247,29 @@ test("multiple file changes from GitHub is parsed correctly", async () => { newText: "", }, ]); + const maybeTOMLObjDiff = tomlPatch.objectDiff; + expect(maybeTOMLObjDiff).not.toBeNull(); + const tomlObjDiff = maybeTOMLObjDiff as IObjectDiff; + expect(tomlObjDiff).toEqual({ + previous: { + coreapp: "v0.1.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }, + current: { + coreapp: "v0.2.0", + subapp: "v1.1.0", + logapp: "v100.1.0", + }, + diff: [ + { + type: "CHANGE", + path: ["coreapp"], + value: "v0.2.0", + oldValue: "v0.1.0", + }, + ], + }); }); test("extracts linked PRs in front matter", async () => { diff --git a/src/sourcer/from_github.ts b/src/sourcer/from_github.ts index 8a6a3c0..af95810 100644 --- a/src/sourcer/from_github.ts +++ b/src/sourcer/from_github.ts @@ -2,8 +2,13 @@ // SPDX-License-Identifier: AGPL-3.0-or-later OR BUSL-1.1 import * as nodecrypto from "crypto"; +import YAML from "yaml"; +import toml from "toml"; +import JSON5 from "json5"; +import diff from "microdiff"; import { Octokit } from "@octokit/rest"; +import { Endpoints } from "@octokit/types"; import { hasParsableFrontMatter, extract as extractFrontMatter, @@ -14,6 +19,7 @@ import { ILinkedPR, IChangeSetMetadata, IPatch, + IObjectDiff, PatchOp, } from "../engine/patch_types.ts"; @@ -21,6 +27,16 @@ import { SourcePlatform } from "./from.ts"; const crypto = nodecrypto.webcrypto; +// A type utility to unpack the element type from an array type +// See https://stackoverflow.com/questions/43537520/how-do-i-extract-a-type-from-an-array-in-typescript +type EleTypeUnpacked = T extends (infer U)[] ? U : T; + +type PRFile = EleTypeUnpacked< + Endpoints["GET /repos/{owner}/{repo}/pulls/{pull_number}/files"]["response"]["data"] +>; +type PullReq = + Endpoints["GET /repos/{owner}/{repo}/pulls/{pull_number}"]["response"]["data"]; + /** * Represents a repository hosted on GitHub. * @property owner The owner of the repository. @@ -97,69 +113,95 @@ export async function patchFromGitHubPullRequest( const fContentsURL = new URL(f.contents_url); const fContentsHash = await getGitHubPRFileID(fetchMapSalt, fContentsURL); out.patchFetchMap[fContentsHash] = fContentsURL; - const fid = `${SourcePlatform.GitHub}:${fContentsHash}`; - - let op = PatchOp.Unknown; - switch (f.status) { - // This should never happen, so we throw an error - default: - throw new Error( - `unknown status for file ${f.filename} in PR ${prNum} of repo ${repo.owner}/${repo.name}: ${f.status}`, - ); - - // A rename is a delete and then an insert, so special case it - case "renamed": - if (!f.previous_filename) { - // This shouldn't happen because of the way the GitHub API works, so we throw an error. - throw new Error("previous filename not available for a rename"); - } - out.patchList.push({ - contentsID: fid, - path: f.previous_filename, - op: PatchOp.Delete, - // TODO: this requires pulling down the file contents - additions: 0, - deletions: 0, - diff: [], - }); - out.patchList.push({ - contentsID: fid, - path: f.filename, - op: PatchOp.Insert, - // TODO: this requires pulling down the file contents - additions: 0, - deletions: 0, - diff: [], - }); - continue; - - // The rest only needs to set the op - - case "added": - case "copied": // a copy is the same as a file insert. - op = PatchOp.Insert; - break; - case "removed": - op = PatchOp.Delete; - break; - case "changed": - case "modified": - op = PatchOp.Modified; - break; - } - out.patchList.push({ - contentsID: fid, - path: f.filename, - op: op, - additions: f.additions, - deletions: f.deletions, - diff: parseUnifiedDiff(f.patch || ""), - }); + const patches = await getPatchesFromPRFile( + clt, + f, + fContentsHash, + pullReq, + `${repo.owner}/${repo.name}`, + ); + out.patchList.push(...patches); } } return out; } +async function getPatchesFromPRFile( + clt: Octokit, + f: PRFile, + fContentsHash: string, + pullReq: PullReq, + + // The following is only needed for error messaging + repoName: string, +): Promise { + const fid = `${SourcePlatform.GitHub}:${fContentsHash}`; + + let op = PatchOp.Unknown; + switch (f.status) { + // This should never happen, so we throw an error + default: + throw new Error( + `unknown status for file ${f.filename} in PR ${pullReq.number} of repo ${repoName}: ${f.status}`, + ); + + // A rename is a delete and then an insert, so special case it + case "renamed": + if (!f.previous_filename) { + // This shouldn't happen because of the way the GitHub API works, so we throw an error. + throw new Error("previous filename not available for a rename"); + } + return [ + { + contentsID: fid, + path: f.previous_filename, + op: PatchOp.Delete, + // TODO: this requires pulling down the file contents + additions: 0, + deletions: 0, + diff: [], + objectDiff: null, + }, + { + contentsID: fid, + path: f.filename, + op: PatchOp.Insert, + // TODO: this requires pulling down the file contents + additions: 0, + deletions: 0, + diff: [], + objectDiff: null, + }, + ]; + + // The rest only needs to set the op + + case "added": + case "copied": // a copy is the same as a file insert. + op = PatchOp.Insert; + break; + case "removed": + op = PatchOp.Delete; + break; + case "changed": + case "modified": + op = PatchOp.Modified; + break; + } + + return [ + { + contentsID: fid, + path: f.filename, + op: op, + additions: f.additions, + deletions: f.deletions, + diff: parseUnifiedDiff(f.patch || ""), + objectDiff: await getObjectDiff(clt, f, pullReq, op), + }, + ]; +} + async function getGitHubPRFileID(salt: string, url: URL): Promise { const toHash = `${salt}:${url}`; const digest = await crypto.subtle.digest( @@ -226,6 +268,130 @@ async function extractLinkedPRs( return out; } +/** + * Returns a diff of the object representation of the PR file if it can be parsed as a object. This representation is + * more ergonomical to work with than the textual patch representation, as you can traverse the keys of the object to + * see which data has changed. + * + * Currently we support pulling down the object representation for the following file types: + * - JSON + * - JSON5 + * - YAML + * - TOML + * + * Returns null if the file can not be turned into an object type. + */ +async function getObjectDiff( + clt: Octokit, + f: PRFile, + pullReq: PullReq, + op: PatchOp, + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any +): Promise { + // Get the file extension to determine the file type + const m = /(?:\.([^.]+))?$/.exec(f.filename); + if (m === null) { + return null; + } + const ext = m[1]; + + const supportedObjectExtensions = ["json", "json5", "yaml", "yml", "toml"]; + if (!supportedObjectExtensions.includes(ext)) { + return null; + } + + // At this point, we know the object can be parsed out of the file so start to pull down the contents. + // eslint-disable-next-line no-var,@typescript-eslint/no-explicit-any + let parser: (s: string) => any; + switch (ext) { + default: + // Throw error becauset this should never happen given the check for supportedObjectExtensions. + throw new Error(`unsupported file extension ${ext} for ${f.filename}`); + + case "json": + parser = JSON.parse; + break; + + case "json5": + parser = JSON5.parse; + break; + + case "yaml": + case "yml": + parser = YAML.parse; + break; + + case "toml": + parser = toml.parse; + break; + } + + switch (op) { + default: + return null; + + case PatchOp.Insert: { + const curContents = await getPRFileContent(clt, f, pullReq, "head"); + const cur = parser(curContents); + return { + previous: null, + current: cur, + diff: [], + }; + } + + case PatchOp.Delete: { + const prevContents = await getPRFileContent(clt, f, pullReq, "base"); + const prev = parser(prevContents); + return { + previous: prev, + current: null, + diff: [], + }; + } + + case PatchOp.Modified: { + const prevContents = await getPRFileContent(clt, f, pullReq, "base"); + const prev = parser(prevContents); + const curContents = await getPRFileContent(clt, f, pullReq, "head"); + const cur = parser(curContents); + return { + previous: prev, + current: cur, + diff: diff(prev, cur), + }; + } + } +} + +async function getPRFileContent( + clt: Octokit, + f: PRFile, + pullReq: PullReq, + refSrc: "base" | "head", +): Promise { + let repoOwner = pullReq.base.repo.owner.login; + let repoName = pullReq.base.repo.name; + let ref = pullReq.base.ref; + if (refSrc === "head") { + const repo = pullReq.head.repo || pullReq.base.repo; + repoOwner = repo.owner.login; + repoName = repo.name; + ref = pullReq.head.ref; + } + + const { data: fileRep } = await clt.repos.getContent({ + owner: repoOwner, + repo: repoName, + path: f.filename, + ref: ref, + }); + if (Array.isArray(fileRep) || fileRep.type !== "file") { + throw new Error(`${f.filename} is not a file`); + } + return Buffer.from(fileRep.content, "base64").toString(); +} + function hexEncode(hb: Uint8Array): string { const hashArray = Array.from(hb); const hashHex = hashArray