From 306f3378f8dc1203ce19345af12a7b851853aefe Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Fri, 8 Nov 2024 16:26:13 -0800 Subject: [PATCH] feat(eventBridge): moving corpus indexing to use event bridge types (#927) * feat(eventBridge): moving corpus indexing to use event bridge types * feat(eventBridge): removing empty objects * feat(eventBridge): updating corpus parser hydration to use event package * fix(nit): updating variable name --- .../package.json | 1 + .../src/commands/Collection.ts | 2 +- .../src/index.integration.ts | 32 +- .../src/index.ts | 22 +- .../src/types.ts | 142 +---- .../package.json | 3 +- .../src/index.ts | 12 +- .../src/types.ts | 138 +---- packages/event-bridge/package.json | 6 +- packages/event-bridge/src/events/events.ts | 7 + .../src/events/generated/schema.json | 546 ++++++++++++++++++ packages/event-bridge/src/events/index.ts | 16 +- .../src/events/types/collection.spec.ts | 66 +++ .../src/events/types/collection.ts | 106 ++++ .../event-bridge/src/events/types/corpus.ts | 62 ++ .../event-bridge/src/events/types/index.ts | 2 + packages/event-bridge/src/jsonUtils.spec.ts | 82 +++ packages/event-bridge/src/jsonUtils.ts | 36 ++ packages/event-bridge/src/utils.ts | 6 + pnpm-lock.yaml | 12 + servers/account-data-deleter/package.json | 2 +- servers/shares-api/package.json | 2 +- servers/user-list-search/package.json | 2 +- 23 files changed, 1040 insertions(+), 265 deletions(-) create mode 100644 packages/event-bridge/src/events/types/collection.spec.ts create mode 100644 packages/event-bridge/src/events/types/collection.ts create mode 100644 packages/event-bridge/src/events/types/corpus.ts create mode 100644 packages/event-bridge/src/jsonUtils.spec.ts create mode 100644 packages/event-bridge/src/jsonUtils.ts diff --git a/lambdas/user-list-search-corpus-indexing/package.json b/lambdas/user-list-search-corpus-indexing/package.json index d1850eef7..9e9467d1f 100644 --- a/lambdas/user-list-search-corpus-indexing/package.json +++ b/lambdas/user-list-search-corpus-indexing/package.json @@ -18,6 +18,7 @@ }, "dependencies": { "@opensearch-project/opensearch": "^2.10.0", + "@pocket-tools/event-bridge": "workspace:*", "@pocket-tools/ts-logger": "workspace:*", "@sentry/aws-serverless": "8.37.1", "fetch-retry": "^5.0.6", diff --git a/lambdas/user-list-search-corpus-indexing/src/commands/Collection.ts b/lambdas/user-list-search-corpus-indexing/src/commands/Collection.ts index 1ee12066f..58c4c1748 100644 --- a/lambdas/user-list-search-corpus-indexing/src/commands/Collection.ts +++ b/lambdas/user-list-search-corpus-indexing/src/commands/Collection.ts @@ -1,4 +1,4 @@ -import { CollectionPayload } from '../types'; +import { CollectionPayload } from '@pocket-tools/event-bridge'; import { config } from '../config'; import { buildCollectionUrl } from '../utils'; diff --git a/lambdas/user-list-search-corpus-indexing/src/index.integration.ts b/lambdas/user-list-search-corpus-indexing/src/index.integration.ts index b8c65db5c..c3fd9e7ea 100644 --- a/lambdas/user-list-search-corpus-indexing/src/index.integration.ts +++ b/lambdas/user-list-search-corpus-indexing/src/index.integration.ts @@ -4,6 +4,7 @@ import { config } from './config'; import * as oci from './queries/originalCorpusId'; import * as ci from './queries/collectionId'; import * as api from './commands/ApprovedItem'; +import { PocketEventType } from '@pocket-tools/event-bridge'; /** * Test cleanup: delete all documents in corpus indices @@ -159,9 +160,9 @@ describe('bulk indexer', () => { const payloads: EventPayload[] = [ { messageId: '123abc', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://some-url.com', approvedItemExternalId: 'aaaaa', language: 'en', @@ -184,9 +185,9 @@ describe('bulk indexer', () => { const payloads: EventPayload[] = [ { messageId: '123abc', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://some-url.com', approvedItemExternalId: 'bbbbbbb', language: 'en', @@ -194,9 +195,9 @@ describe('bulk indexer', () => { }, { messageId: '456def', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://eine-url.de', approvedItemExternalId: 'ccccccc', language: 'de', @@ -205,7 +206,7 @@ describe('bulk indexer', () => { // Collection { messageId: '456def', - detailType: 'add-collection', + detailType: PocketEventType.COLLECTION_CREATED, detail: { collection: { externalId: '999rsk', @@ -215,6 +216,7 @@ describe('bulk indexer', () => { language: 'de', createdAt: 123456, updatedAt: 123456, + collection_iab_child_category_id: '1', authors: [ { name: 'anonym anonym', @@ -287,9 +289,9 @@ describe('bulk indexer', () => { const payloads: EventPayload[] = [ { messageId: '123abc', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://some-url.com', approvedItemExternalId: 'dddddd', language: 'not-language-code', @@ -297,9 +299,9 @@ describe('bulk indexer', () => { }, { messageId: '456def', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://eine-url.de', approvedItemExternalId: 'eeeee', language: 'de', @@ -444,10 +446,10 @@ describe('bulk indexer', () => { const payloads: EventPayload[] = [ { messageId: '123abc', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { // Doesn't matter, overwritten by mock - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://some-url.com', approvedItemExternalId: 'aaaaa', language: 'en', @@ -455,10 +457,10 @@ describe('bulk indexer', () => { }, { messageId: 'xijk-kel', - detailType: 'add-approved-item', + detailType: PocketEventType.CORPUS_ITEM_ADDED, detail: { // Doesn't matter, overwritten by mock - eventType: 'add-approved-item', + eventType: PocketEventType.CORPUS_ITEM_ADDED, url: 'http://some-url.com', approvedItemExternalId: 'aaaaa', language: 'en', diff --git a/lambdas/user-list-search-corpus-indexing/src/index.ts b/lambdas/user-list-search-corpus-indexing/src/index.ts index 17fe8b835..be8ec664c 100644 --- a/lambdas/user-list-search-corpus-indexing/src/index.ts +++ b/lambdas/user-list-search-corpus-indexing/src/index.ts @@ -11,11 +11,11 @@ import type { SQSEvent, } from 'aws-lambda'; import { - EventPayload, ValidatedEventPayload, validDetailTypes, CollectionApprovedItemPayload, SyndicatedItemPayload, + EventPayload, } from './types'; import { upsertCollection } from './commands/Collection'; import { mergeCollection } from './commands/ApprovedItemCollection'; @@ -24,6 +24,10 @@ import { upsertApprovedItem } from './commands/ApprovedItem'; import { postRetry } from './postRetry'; import { serverLogger } from '@pocket-tools/ts-logger'; import { removeApprovedItem } from './commands/RemoveItem'; +import { + PocketEventType, + sqsEventBridgeEvent, +} from '@pocket-tools/event-bridge'; /** * The main handler function which will be wrapped by Sentry prior to export. @@ -34,13 +38,19 @@ import { removeApprovedItem } from './commands/RemoveItem'; */ export async function processor(event: SQSEvent): Promise { const validPayloads: Array = event.Records.map((record) => { - const message = JSON.parse(JSON.parse(record.body).Message); + const pocketEvent = sqsEventBridgeEvent(record); + if ( + pocketEvent == null || + !validDetailTypes.includes(pocketEvent['detail-type']) + ) { + return null; + } return { messageId: record.messageId, - detailType: message['detail-type'], - detail: message['detail'], + detailType: event['detail-type'], + detail: pocketEvent.detail, }; - }).filter((message) => validDetailTypes.includes(message['detailType'])); + }).filter((message) => message != null); const result = await bulkIndex(validPayloads); return result; } @@ -79,7 +89,7 @@ export async function bulkIndex( for await (const validItem of validItems) { // Deleting if ( - validItem.detailType === 'remove-approved-item' && + validItem.detailType === PocketEventType.CORPUS_ITEM_REMOVED && // Not possible, but just for typescript... !('collection' in validItem.detail) ) { diff --git a/lambdas/user-list-search-corpus-indexing/src/types.ts b/lambdas/user-list-search-corpus-indexing/src/types.ts index 385e80708..1e5af471d 100644 --- a/lambdas/user-list-search-corpus-indexing/src/types.ts +++ b/lambdas/user-list-search-corpus-indexing/src/types.ts @@ -1,13 +1,31 @@ +import { + CorpusEvent, + CorpusItemPayload, + CollectionPayload, + CollectionEvent, + PocketEventType, + IncomingBaseEvent, +} from '@pocket-tools/event-bridge'; + +type BaseEventPayload = (CollectionEvent | CorpusEvent) & IncomingBaseEvent; + export type EventPayload = { messageId: string; - detailType: string; - detail: ApprovedItemPayload | CollectionPayload; + detail: BaseEventPayload['detail']; + detailType: PocketEventType; }; - export type ValidatedEventPayload = Omit & { detail: ValidLanguageApprovedItemPayload | CollectionPayload; }; +export const validDetailTypes: Array = [ + PocketEventType.CORPUS_ITEM_ADDED, + PocketEventType.CORPUS_ITEM_UPDATED, + PocketEventType.CORPUS_ITEM_REMOVED, + PocketEventType.COLLECTION_CREATED, + PocketEventType.COLLECTION_UPDATED, +]; + // See indices .docker/aws-resources/elasticsearch export type CorpusItemIndex = { meta: { _id: string; _index: string }; @@ -34,125 +52,13 @@ export type CorpusItemIndex = { }>; }; -// See infrastructure/pocket-event-bridge/src/event-rules/corpus-events/eventConfig.ts -// and infrastructure/pocket-event-bridge/src/event-rules/collection-events/eventConfig.ts -export const validDetailTypes = [ - 'add-approved-item', - 'update-approved-item', - 'collection-created', - 'collection-updated', - 'remove-approved-item', -]; - -type Author = { name: string; sortOrder: number }; - -// Types below are all copied from: -// https://github.com/Pocket/content-monorepo/blob/7342cb5468f11fc0b3ffdddf8693b6aeeb64f26e/servers/curated-corpus-api/src/events/types.ts#L95 -export type ApprovedItemPayload = { - eventType: string; - approvedItemExternalId: string; - url: string; - authors?: Author[]; - title?: string | null; - excerpt?: string | null; - language?: string | null; - publisher?: string | null; - imageUrl?: string | null; - topic?: string | null; - createdAt?: string | null; // UTC timestamp string - createdBy?: string | null; // UTC timestamp string - updatedAt?: string | null; // UTC timestamp string - datePublished?: string; // UTC timestamp string - isSyndicated?: boolean; - isCollection?: boolean; - domainName?: string; - isTimeSensitive?: boolean; - source?: string | null; - grade?: string | null; -}; - export type ValidLanguageApprovedItemPayload = Omit< - ApprovedItemPayload, + CorpusItemPayload, 'language' -> & { language: string }; - -// servers/shared-snowplow-consumer/src/eventConsumer/collectionEvents/types.ts -export type CollectionPayload = { - collection: { - externalId: string; - slug: string; - title: string; - status: string; - language: string; - authors: CollectionAuthor[]; - stories: CollectionStory[]; - createdAt: number; // in seconds - updatedAt: number; // in seconds - - imageUrl?: string; - labels?: Label[]; - intro?: string; - curationCategory?: CurationCategory; - excerpt?: string; - partnership?: CollectionPartnership; - publishedAt?: number; // in seconds - IABParentCategory?: IABParentCategory; - IABChildCategory?: IABChildCategory; - }; -}; - -export type CollectionStoryAuthor = { name: string; sort_order: number }; - -export type CurationCategory = { - collection_curation_category_id: string; - name: string; - slug: string; -}; - -export type CollectionPartnership = { - collection_partnership_id: string; - name: string; - blurb: string; - image_url: string; - type: string; - url: string; -}; - -export type CollectionAuthor = { - collection_author_id: string; - name: string; - active: boolean; - slug?: string; - bio?: string; - image_url?: string; -}; - -export type CollectionStory = { - collection_story_id: string; - url: string; - title: string; - excerpt: string; - image_url?: string; - publisher?: string; - authors: CollectionStoryAuthor[]; - is_from_partner: boolean; - sort_order?: number; -}; - -export type IABParentCategory = { - collection_iab_parent_category_id: string; - name: string; - slug: string; -}; - -export type IABChildCategory = { - collection_iab_child_category_id: string; - name: string; - slug: string; +> & { + language: string; }; -export type Label = { collection_label_id: string; name: string }; - // Corpus items which are not collections, to be indexed export type CollectionApprovedItemPayload = Omit< ValidLanguageApprovedItemPayload, diff --git a/lambdas/user-list-search-corpus-parser-hydration/package.json b/lambdas/user-list-search-corpus-parser-hydration/package.json index 2a3285fde..bd8f0368f 100644 --- a/lambdas/user-list-search-corpus-parser-hydration/package.json +++ b/lambdas/user-list-search-corpus-parser-hydration/package.json @@ -18,6 +18,7 @@ }, "dependencies": { "@aws-sdk/client-sagemaker-runtime": "3.679.0", + "@pocket-tools/event-bridge": "workspace:*", "@pocket-tools/ts-logger": "workspace:*", "@sentry/aws-serverless": "8.37.1", "@smithy/node-http-handler": "3.2.0", @@ -39,4 +40,4 @@ "tsconfig": "workspace:*", "typescript": "5.6.3" } -} +} \ No newline at end of file diff --git a/lambdas/user-list-search-corpus-parser-hydration/src/index.ts b/lambdas/user-list-search-corpus-parser-hydration/src/index.ts index 2545a69b6..e59ca26d2 100644 --- a/lambdas/user-list-search-corpus-parser-hydration/src/index.ts +++ b/lambdas/user-list-search-corpus-parser-hydration/src/index.ts @@ -16,6 +16,7 @@ import { parserRequest, parserResultToDoc } from './parserRequest'; import { bulkIndex } from './bulkIndex'; import { buildCollectionUrl, hasExcerptOrIsCollection } from './utils'; import { getEmbeddings } from './embeddingsRequest'; +import { sqsEventBridgeEvent } from '@pocket-tools/event-bridge'; /** * The main handler function which will be wrapped by Sentry prior to export. @@ -29,15 +30,18 @@ export async function processor(event: SQSEvent): Promise { const validPayloads: Array = event.Records.map( (record) => { - const message = JSON.parse(JSON.parse(record.body).Message); + const event = sqsEventBridgeEvent(record); + if (event == null || !validDetailTypes.includes(event['detail-type'])) { + return null; + } return { messageId: record.messageId, - detailType: message['detail-type'], - detail: message['detail'], + detailType: event['detail-type'], + detail: event['detail'], }; }, ) - .filter((message) => validDetailTypes.includes(message['detailType'])) + .filter((message) => message != null) .filter((message) => { const language = 'collection' in message.detail diff --git a/lambdas/user-list-search-corpus-parser-hydration/src/types.ts b/lambdas/user-list-search-corpus-parser-hydration/src/types.ts index 7d2013a70..138ebd2ac 100644 --- a/lambdas/user-list-search-corpus-parser-hydration/src/types.ts +++ b/lambdas/user-list-search-corpus-parser-hydration/src/types.ts @@ -1,18 +1,32 @@ // Note: These are reused (copied) from user-list-search-corpus-indexing/src/types // since they listen to the same event +import { + CorpusEvent, + CorpusItemPayload, + CollectionPayload, + CollectionEvent, + PocketEventType, + IncomingBaseEvent, +} from '@pocket-tools/event-bridge'; + +type BaseEventPayload = (CollectionEvent | CorpusEvent) & IncomingBaseEvent; + export type EventPayload = { messageId: string; - detailType: string; - detail: ApprovedItemPayload | CollectionPayload; + detail: BaseEventPayload['detail']; + detailType: PocketEventType; +}; +export type ValidatedEventPayload = Omit & { + detail: ValidLanguageApprovedItemPayload | CollectionPayload; }; export type ValidLangEventPayload = Omit & { - detail: ValidLangApprovedItemPayload | CollectionPayload; + detail: ValidLanguageApprovedItemPayload | CollectionPayload; }; -export type ValidLangApprovedItemPayload = Omit< - ApprovedItemPayload, +export type ValidLanguageApprovedItemPayload = Omit< + CorpusItemPayload, 'language' > & { language: string; @@ -73,113 +87,9 @@ export type ParserResult = { // See infrastructure/pocket-event-bridge/src/event-rules/corpus-events/eventConfig.ts // and infrastructure/pocket-event-bridge/src/event-rules/collection-events/eventConfig.ts -export const validDetailTypes = [ - 'add-approved-item', - 'update-approved-item', - 'collection-created', - 'collection-updated', +export const validDetailTypes: Array = [ + PocketEventType.CORPUS_ITEM_ADDED, + PocketEventType.CORPUS_ITEM_UPDATED, + PocketEventType.COLLECTION_CREATED, + PocketEventType.COLLECTION_UPDATED, ]; - -type Author = { name: string; sortOrder: number }; - -// Types below are all copied from: -// https://github.com/Pocket/content-monorepo/blob/7342cb5468f11fc0b3ffdddf8693b6aeeb64f26e/servers/curated-corpus-api/src/events/types.ts#L95 -export type ApprovedItemPayload = { - eventType: string; - approvedItemExternalId: string; - url: string; - authors?: Author[]; - title?: string | null; - excerpt?: string | null; - language?: string | null; - publisher?: string | null; - imageUrl?: string | null; - topic?: string | null; - createdAt?: string | null; // UTC timestamp string - createdBy?: string | null; // UTC timestamp string - updatedAt?: string | null; // UTC timestamp string - datePublished?: string; // UTC timestamp string - isSyndicated?: boolean; - isCollection?: boolean; - domainName?: string; - isTimeSensitive?: boolean; - source?: string | null; - grade?: string | null; -}; - -// servers/shared-snowplow-consumer/src/eventConsumer/collectionEvents/types.ts -export type CollectionPayload = { - collection: { - externalId: string; - slug: string; - title: string; - status: string; - language: string; - authors: CollectionAuthor[]; - stories: CollectionStory[]; - createdAt: number; // in seconds - updatedAt: number; // in seconds - - imageUrl?: string; - labels?: Label[]; - intro?: string; - curationCategory?: CurationCategory; - excerpt?: string; - partnership?: CollectionPartnership; - publishedAt?: number; // in seconds - IABParentCategory?: IABParentCategory; - IABChildCategory?: IABChildCategory; - }; -}; - -export type CollectionStoryAuthor = { name: string; sort_order: number }; - -export type CurationCategory = { - collection_curation_category_id: string; - name: string; - slug: string; -}; - -export type CollectionPartnership = { - collection_partnership_id: string; - name: string; - blurb: string; - image_url: string; - type: string; - url: string; -}; - -export type CollectionAuthor = { - collection_author_id: string; - name: string; - active: boolean; - slug?: string; - bio?: string; - image_url?: string; -}; - -export type CollectionStory = { - collection_story_id: string; - url: string; - title: string; - excerpt: string; - image_url?: string; - publisher?: string; - authors: CollectionStoryAuthor[]; - is_from_partner: boolean; - sort_order?: number; -}; - -export type IABParentCategory = { - collection_iab_parent_category_id: string; - name: string; - slug: string; -}; - -export type IABChildCategory = { - collection_iab_child_category_id: string; - name: string; - slug: string; -}; - -export type Label = { collection_label_id: string; name: string }; diff --git a/packages/event-bridge/package.json b/packages/event-bridge/package.json index 6d2c6161a..ff3bdb914 100644 --- a/packages/event-bridge/package.json +++ b/packages/event-bridge/package.json @@ -23,7 +23,7 @@ "dev": "pnpm run build --watch", "format": "eslint --fix", "lint": "eslint --fix-dry-run", - "prebuild": "pnpm ts-json-schema-generator --additional-properties --path './src/events/types/index.ts' > ./src/events/generated/schema.json", + "prebuild": "pnpm ts-json-schema-generator --additional-properties --validation-keywords removeEmptyObject --path './src/events/types/index.ts' > ./src/events/generated/schema.json", "semantic-release": "semantic-release", "test": "jest", "test:watch": "pnpm run test -- --watch" @@ -81,7 +81,8 @@ "@pocket-tools/ts-logger": "workspace:*", "@sentry/node": "8.37.1", "ajv": "8.17.1", - "ajv-formats": "3.0.1" + "ajv-formats": "3.0.1", + "lodash": "4.17.21" }, "devDependencies": { "@jest/globals": "29.7.0", @@ -89,6 +90,7 @@ "@semantic-release/exec": "6.0.3", "@types/aws-lambda": "8.10.145", "@types/jest": "29.5.14", + "@types/lodash": "4.17.13", "@types/node": "^22.8.2", "jest": "29.7.0", "jest-extended": "4.0.2", diff --git a/packages/event-bridge/src/events/events.ts b/packages/event-bridge/src/events/events.ts index 12f75e8c8..c25a2d322 100644 --- a/packages/event-bridge/src/events/events.ts +++ b/packages/event-bridge/src/events/events.ts @@ -43,4 +43,11 @@ export enum PocketEventType { SHARE_CONTEXT_UPDATED = 'pocket_share_context_updated', SEARCH_RESPONSE_GENERATED = 'search_response_generated', + + CORPUS_ITEM_ADDED = 'add-approved-item', + CORPUS_ITEM_UPDATED = 'update-approved-item', + CORPUS_ITEM_REMOVED = 'remove-approved-item', + + COLLECTION_CREATED = 'collection-created', + COLLECTION_UPDATED = 'collection-updated', } diff --git a/packages/event-bridge/src/events/generated/schema.json b/packages/event-bridge/src/events/generated/schema.json index fd224ac7e..9a2a657f2 100644 --- a/packages/event-bridge/src/events/generated/schema.json +++ b/packages/event-bridge/src/events/generated/schema.json @@ -1212,6 +1212,499 @@ ], "type": "object" }, + "CollectionAuthor": { + "properties": { + "active": { + "type": "boolean" + }, + "bio": { + "type": "string" + }, + "collection_author_id": { + "type": "string" + }, + "image_url": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + }, + "required": [ + "collection_author_id", + "name", + "active" + ], + "type": "object" + }, + "CollectionCreated": { + "properties": { + "detail": { + "$ref": "#/definitions/CollectionPayload" + }, + "detail-type": { + "const": "collection-created", + "type": "string" + }, + "source": { + "type": "string" + } + }, + "required": [ + "detail", + "detail-type", + "source" + ], + "type": "object" + }, + "CollectionEvent": { + "anyOf": [ + { + "$ref": "#/definitions/CollectionCreated" + }, + { + "$ref": "#/definitions/CollectionUpdated" + } + ] + }, + "CollectionPartnership": { + "properties": { + "blurb": { + "type": "string" + }, + "collection_partnership_id": { + "type": "string" + }, + "image_url": { + "type": "string" + }, + "name": { + "type": "string" + }, + "type": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "required": [ + "collection_partnership_id", + "name", + "blurb", + "image_url", + "type", + "url" + ], + "type": "object" + }, + "CollectionPayload": { + "description": "NOTE: The following is from the Content monorepo", + "properties": { + "collection": { + "properties": { + "IABChildCategory": { + "$ref": "#/definitions/IABChildCategory" + }, + "IABParentCategory": { + "$ref": "#/definitions/IABParentCategory" + }, + "authors": { + "items": { + "$ref": "#/definitions/CollectionAuthor" + }, + "type": "array" + }, + "createdAt": { + "type": "number" + }, + "curationCategory": { + "$ref": "#/definitions/CurationCategory" + }, + "excerpt": { + "type": "string" + }, + "externalId": { + "type": "string" + }, + "imageUrl": { + "type": "string" + }, + "intro": { + "type": "string" + }, + "labels": { + "items": { + "$ref": "#/definitions/Label" + }, + "type": "array" + }, + "language": { + "type": "string" + }, + "partnership": { + "$ref": "#/definitions/CollectionPartnership" + }, + "publishedAt": { + "type": "number" + }, + "slug": { + "type": "string" + }, + "status": { + "type": "string" + }, + "stories": { + "items": { + "$ref": "#/definitions/CollectionStory" + }, + "type": "array" + }, + "title": { + "type": "string" + }, + "updatedAt": { + "type": "number" + } + }, + "required": [ + "externalId", + "slug", + "title", + "status", + "language", + "authors", + "stories", + "createdAt", + "updatedAt" + ], + "type": "object" + } + }, + "required": [ + "collection" + ], + "type": "object" + }, + "CollectionPocketEventType": { + "anyOf": [ + { + "const": "collection-created", + "type": "string" + }, + { + "const": "collection-updated", + "type": "string" + } + ] + }, + "CollectionStory": { + "properties": { + "authors": { + "items": { + "$ref": "#/definitions/CollectionStoryAuthor" + }, + "type": "array" + }, + "collection_story_id": { + "type": "string" + }, + "excerpt": { + "type": "string" + }, + "image_url": { + "type": "string" + }, + "is_from_partner": { + "type": "boolean" + }, + "publisher": { + "type": "string" + }, + "sort_order": { + "type": "number" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "required": [ + "collection_story_id", + "url", + "title", + "excerpt", + "authors", + "is_from_partner" + ], + "type": "object" + }, + "CollectionStoryAuthor": { + "properties": { + "name": { + "type": "string" + }, + "sort_order": { + "type": "number" + } + }, + "required": [ + "name", + "sort_order" + ], + "type": "object" + }, + "CollectionUpdated": { + "properties": { + "detail": { + "$ref": "#/definitions/CollectionPayload" + }, + "detail-type": { + "const": "collection-updated", + "type": "string" + }, + "source": { + "type": "string" + } + }, + "required": [ + "detail", + "detail-type", + "source" + ], + "type": "object" + }, + "CorpusEvent": { + "anyOf": [ + { + "$ref": "#/definitions/CorpusItemAdded" + }, + { + "$ref": "#/definitions/CorpusItemUpdated" + }, + { + "$ref": "#/definitions/CorpusItemRemoved" + } + ] + }, + "CorpusItemAdded": { + "properties": { + "detail": { + "$ref": "#/definitions/CorpusItemPayload" + }, + "detail-type": { + "const": "add-approved-item", + "type": "string" + }, + "source": { + "type": "string" + } + }, + "required": [ + "detail", + "detail-type", + "source" + ], + "type": "object" + }, + "CorpusItemPayload": { + "properties": { + "approvedItemExternalId": { + "type": "string" + }, + "authors": { + "items": { + "properties": { + "name": { + "type": "string" + }, + "sortOrder": { + "type": "number" + } + }, + "required": [ + "name", + "sortOrder" + ], + "type": "object" + }, + "type": "array" + }, + "createdAt": { + "type": [ + "string", + "null" + ] + }, + "createdBy": { + "type": [ + "string", + "null" + ] + }, + "datePublished": { + "type": "string" + }, + "domainName": { + "type": "string" + }, + "eventType": { + "type": "string" + }, + "excerpt": { + "type": [ + "string", + "null" + ] + }, + "grade": { + "type": [ + "string", + "null" + ] + }, + "imageUrl": { + "type": [ + "string", + "null" + ] + }, + "isCollection": { + "type": "boolean" + }, + "isSyndicated": { + "type": "boolean" + }, + "isTimeSensitive": { + "type": "boolean" + }, + "language": { + "type": [ + "string", + "null" + ] + }, + "publisher": { + "type": [ + "string", + "null" + ] + }, + "source": { + "type": [ + "string", + "null" + ] + }, + "title": { + "type": [ + "string", + "null" + ] + }, + "topic": { + "type": [ + "string", + "null" + ] + }, + "updatedAt": { + "type": [ + "string", + "null" + ] + }, + "url": { + "type": "string" + } + }, + "required": [ + "eventType", + "approvedItemExternalId", + "url" + ], + "type": "object" + }, + "CorpusItemRemoved": { + "properties": { + "detail": { + "$ref": "#/definitions/CorpusItemPayload" + }, + "detail-type": { + "const": "remove-approved-item", + "type": "string" + }, + "source": { + "type": "string" + } + }, + "required": [ + "detail", + "detail-type", + "source" + ], + "type": "object" + }, + "CorpusItemUpdated": { + "properties": { + "detail": { + "$ref": "#/definitions/CorpusItemPayload" + }, + "detail-type": { + "const": "update-approved-item", + "type": "string" + }, + "source": { + "type": "string" + } + }, + "required": [ + "detail", + "detail-type", + "source" + ], + "type": "object" + }, + "CorpusPocketEventType": { + "anyOf": [ + { + "const": "add-approved-item", + "type": "string" + }, + { + "const": "update-approved-item", + "type": "string" + }, + { + "const": "remove-approved-item", + "type": "string" + } + ] + }, + "CurationCategory": { + "properties": { + "collection_curation_category_id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + }, + "required": [ + "collection_curation_category_id", + "name", + "slug" + ], + "type": "object" + }, "DeleteItem": { "properties": { "detail": { @@ -2140,6 +2633,44 @@ ], "type": "object" }, + "IABChildCategory": { + "properties": { + "collection_iab_child_category_id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + }, + "required": [ + "collection_iab_child_category_id", + "name", + "slug" + ], + "type": "object" + }, + "IABParentCategory": { + "properties": { + "collection_iab_parent_category_id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "slug": { + "type": "string" + } + }, + "required": [ + "collection_iab_parent_category_id", + "name", + "slug" + ], + "type": "object" + }, "IncomingBaseEvent": { "description": "A base event that all events should extend from when they are Received only, not sent.", "properties": { @@ -2179,6 +2710,21 @@ ], "type": "object" }, + "Label": { + "properties": { + "collection_label_id": { + "type": "string" + }, + "name": { + "type": "string" + } + }, + "required": [ + "collection_label_id", + "name" + ], + "type": "object" + }, "ListEvent": { "anyOf": [ { diff --git a/packages/event-bridge/src/events/index.ts b/packages/event-bridge/src/events/index.ts index d5edcb6f7..2943f7dbd 100644 --- a/packages/event-bridge/src/events/index.ts +++ b/packages/event-bridge/src/events/index.ts @@ -39,6 +39,13 @@ import { AccountEvent, SearchEvent, SearchResponseGenerated, + CorpusItemAdded, + CorpusItemUpdated, + CorpusItemRemoved, + CorpusEvent, + CollectionEvent, + CollectionCreated, + CollectionUpdated, } from './types'; export * from './types'; export * from './events'; @@ -53,7 +60,9 @@ export type PocketEvent = | ShareableListEvent | ShareableListItemEvent | ShareEvent - | SearchEvent; + | SearchEvent + | CorpusEvent + | CollectionEvent; export type PocketEventTypeMap = { [PocketEventType.FORGOT_PASSWORD]: ForgotPasswordRequest; @@ -89,4 +98,9 @@ export type PocketEventTypeMap = { [PocketEventType.SHARE_CREATED]: ShareCreated; [PocketEventType.SHARE_CONTEXT_UPDATED]: ShareContextUpdated; [PocketEventType.SEARCH_RESPONSE_GENERATED]: SearchResponseGenerated; + [PocketEventType.CORPUS_ITEM_ADDED]: CorpusItemAdded; + [PocketEventType.CORPUS_ITEM_UPDATED]: CorpusItemUpdated; + [PocketEventType.CORPUS_ITEM_REMOVED]: CorpusItemRemoved; + [PocketEventType.COLLECTION_CREATED]: CollectionCreated; + [PocketEventType.COLLECTION_UPDATED]: CollectionUpdated; }; diff --git a/packages/event-bridge/src/events/types/collection.spec.ts b/packages/event-bridge/src/events/types/collection.spec.ts new file mode 100644 index 000000000..b76539cfb --- /dev/null +++ b/packages/event-bridge/src/events/types/collection.spec.ts @@ -0,0 +1,66 @@ +import { SQSRecord } from 'aws-lambda'; +import { sqsEventBridgeEvent } from '../../utils'; +import { PocketEventType } from '../events'; +import { CollectionCreated } from './collection'; + +describe('collection event', () => { + it('throw an error if collection event payload is missing collection', async () => { + const recordWithoutEmail = { + body: JSON.stringify({ + Message: JSON.stringify({ + account: '12345', + id: '12345', + region: 'us-west-2', + time: '2021-08-12T20:05:00Z', + version: '1.0', + source: 'collection-event', + 'detail-type': PocketEventType.COLLECTION_CREATED, + detail: {}, + }), + }), + }; + expect.assertions(1); // since it's in a try/catch, make sure we assert + try { + const event = sqsEventBridgeEvent(recordWithoutEmail as SQSRecord); + console.log(event); + } catch (e) { + expect(e.message).toContain( + "data/detail must have required property 'collection'", + ); + } + }); + + it('removes empty objects', async () => { + const recordWithNumberBool = { + body: JSON.stringify({ + Message: JSON.stringify({ + account: '12345', + id: '12345', + region: 'us-west-2', + time: '2021-08-12T20:05:00Z', + version: '1.0', + source: 'collection-event', + 'detail-type': PocketEventType.COLLECTION_CREATED, + detail: { + collection: { + externalId: '12', + slug: 'a-cool-slug', + title: 'A cool title', + status: 'published', + language: 'en', + authors: [], + stories: [], + createdAt: 123456789, + updatedAt: 123456789, + IABParentCategory: {}, + }, + }, + }), + }), + }; + const event: CollectionCreated = sqsEventBridgeEvent( + recordWithNumberBool as SQSRecord, + ) as CollectionCreated; + expect(event.detail.collection.IABChildCategory).toBe(undefined); + }); +}); diff --git a/packages/event-bridge/src/events/types/collection.ts b/packages/event-bridge/src/events/types/collection.ts new file mode 100644 index 000000000..e1c1f113c --- /dev/null +++ b/packages/event-bridge/src/events/types/collection.ts @@ -0,0 +1,106 @@ +import { PocketEventType } from '../events'; +import { BaseEvent } from './base'; + +export type CollectionPocketEventType = + | PocketEventType.COLLECTION_CREATED + | PocketEventType.COLLECTION_UPDATED; + +export type CollectionEvent = CollectionCreated | CollectionUpdated; + +interface CollectionBaseEvent extends BaseEvent { + 'detail-type': CollectionPocketEventType; + detail: CollectionPayload; +} + +export interface CollectionCreated extends CollectionBaseEvent { + 'detail-type': PocketEventType.COLLECTION_CREATED; +} + +export interface CollectionUpdated extends CollectionBaseEvent { + 'detail-type': PocketEventType.COLLECTION_UPDATED; +} + +/** + * NOTE: The following is from the Content monorepo + */ + +export interface CollectionPayload { + collection: { + externalId: string; + slug: string; + title: string; + status: string; + language: string; + authors: CollectionAuthor[]; + stories: CollectionStory[]; + createdAt: number; // in seconds + updatedAt: number; // in seconds + imageUrl?: string; + labels?: Label[]; + intro?: string; + curationCategory?: CurationCategory; + excerpt?: string; + partnership?: CollectionPartnership; + publishedAt?: number; // in seconds + IABParentCategory?: IABParentCategory; + IABChildCategory?: IABChildCategory; + }; +} + +export interface CollectionStoryAuthor { + name: string; + sort_order: number; +} + +export interface CurationCategory { + collection_curation_category_id: string; + name: string; + slug: string; +} + +export interface CollectionPartnership { + collection_partnership_id: string; + name: string; + blurb: string; + image_url: string; + type: string; + url: string; +} + +export interface CollectionAuthor { + collection_author_id: string; + name: string; + active: boolean; + slug?: string; + bio?: string; + image_url?: string; +} + +export interface CollectionStory { + collection_story_id: string; + url: string; + title: string; + excerpt: string; + image_url?: string; + publisher?: string; + authors: CollectionStoryAuthor[]; + is_from_partner: boolean; + sort_order?: number; +} + +export interface IABParentCategory { + collection_iab_parent_category_id: string; + name: string; + slug: string; +} + +export interface IABChildCategory { + collection_iab_child_category_id: string; + name: string; + slug: string; +} + +export interface Label { + collection_label_id: string; + name: string; +} diff --git a/packages/event-bridge/src/events/types/corpus.ts b/packages/event-bridge/src/events/types/corpus.ts new file mode 100644 index 000000000..c9593f3d5 --- /dev/null +++ b/packages/event-bridge/src/events/types/corpus.ts @@ -0,0 +1,62 @@ +// Types below are all copied from: + +import { PocketEventType } from '../events'; +import { BaseEvent } from './base'; + +export type CorpusPocketEventType = + | PocketEventType.CORPUS_ITEM_ADDED + | PocketEventType.CORPUS_ITEM_UPDATED + | PocketEventType.CORPUS_ITEM_REMOVED; + +export type CorpusEvent = + | CorpusItemAdded + | CorpusItemUpdated + | CorpusItemRemoved; + +interface BaseCorpusEvent extends BaseEvent { + 'detail-type': CorpusPocketEventType; + detail: CorpusItemPayload; +} + +export interface CorpusItemAdded extends BaseCorpusEvent { + 'detail-type': PocketEventType.CORPUS_ITEM_ADDED; +} + +export interface CorpusItemUpdated extends BaseCorpusEvent { + 'detail-type': PocketEventType.CORPUS_ITEM_UPDATED; +} + +export interface CorpusItemRemoved extends BaseCorpusEvent { + 'detail-type': PocketEventType.CORPUS_ITEM_REMOVED; +} + +// https://github.com/Pocket/content-monorepo/blob/7342cb5468f11fc0b3ffdddf8693b6aeeb64f26e/servers/curated-corpus-api/src/events/types.ts#L95 +interface Author { + name: string; + sortOrder: number; +} + +// TODO: Validate with the content team that these fields really can be null. +// Looking at the database schema, none of these fields are nullable. +export interface CorpusItemPayload { + eventType: string; + approvedItemExternalId: string; + url: string; + authors?: Author[]; + title?: string | null; + excerpt?: string | null; + language?: string | null; + publisher?: string | null; + imageUrl?: string | null; + topic?: string | null; + createdAt?: string | null; // UTC timestamp string + createdBy?: string | null; // UTC timestamp string + updatedAt?: string | null; // UTC timestamp string + datePublished?: string; // UTC timestamp string + isSyndicated?: boolean; + isCollection?: boolean; + domainName?: string; + isTimeSensitive?: boolean; + source?: string | null; + grade?: string | null; +} diff --git a/packages/event-bridge/src/events/types/index.ts b/packages/event-bridge/src/events/types/index.ts index fcc9c1688..746d5f3a7 100644 --- a/packages/event-bridge/src/events/types/index.ts +++ b/packages/event-bridge/src/events/types/index.ts @@ -6,3 +6,5 @@ export * from './list'; export * from './shareableList'; export * from './share'; export * from './search'; +export * from './corpus'; +export * from './collection'; diff --git a/packages/event-bridge/src/jsonUtils.spec.ts b/packages/event-bridge/src/jsonUtils.spec.ts new file mode 100644 index 000000000..f726ccd6b --- /dev/null +++ b/packages/event-bridge/src/jsonUtils.spec.ts @@ -0,0 +1,82 @@ +import { removeEmptyObjects } from './jsonUtils'; + +describe('jsonUtils - removeEmptyObjects', () => { + it.each([ + { + subject: { + collection: { + externalId: '12', + slug: 'a-cool-slug', + title: 'A cool title', + status: 'published', + }, + }, + expected: { + collection: { + externalId: '12', + slug: 'a-cool-slug', + title: 'A cool title', + status: 'published', + }, + }, + }, + { + subject: { + collection: { + externalId: '12', + IABParentCategory: {}, + IABChildCategory: {}, + }, + }, + expected: { + collection: { + externalId: '12', + }, + }, + }, + { + subject: { + collection: { + externalId: '12', + authors: [ + { testing: {}, name: 'remove me' }, + { tesing: { test: 'test' }, name: 'keep me' }, + ], + }, + }, + expected: { + collection: { + externalId: '12', + authors: [ + { name: 'remove me' }, + { tesing: { test: 'test' }, name: 'keep me' }, + ], + }, + }, + }, + { + subject: { + collection: { + nestedObject: { + inANestedObject: { + shouldbeRemoved: {}, + testing: 'but i should stay', + }, + }, + }, + }, + expected: { + collection: { + nestedObject: { + inANestedObject: { + testing: 'but i should stay', + }, + }, + }, + }, + }, + ])('should remove empty objects', ({ subject, expected }) => { + const result = removeEmptyObjects(subject); + expect(result).toEqual(expected); + }); +}); diff --git a/packages/event-bridge/src/jsonUtils.ts b/packages/event-bridge/src/jsonUtils.ts new file mode 100644 index 000000000..d083cffe9 --- /dev/null +++ b/packages/event-bridge/src/jsonUtils.ts @@ -0,0 +1,36 @@ +import { isArray, isEmpty, isObject, transform } from 'lodash'; + +/** + * For a given object, remove all empty objects from it + * @param obj Object to remove empty objects from + * @returns Object that has all empty objects removed {} + */ +export const removeEmptyObjects = (obj: Record): any => { + if (isArray(obj)) { + // Recursively clean each element in the array + return obj + .map(removeEmptyObjects) // Apply cleaning to each element + .filter( + (value) => !(isObject(value) && !isArray(value) && isEmpty(value)), + ); // Remove empty objects + } else if (isObject(obj) && !isArray(obj)) { + // Recursively clean each property in the object + return transform( + obj, + (result, value, key) => { + const cleanedValue = removeEmptyObjects(value); + if ( + !( + isObject(cleanedValue) && + !isArray(cleanedValue) && + isEmpty(cleanedValue) + ) + ) { + result[key] = cleanedValue; + } + }, + {}, + ); + } + return obj; // Return the value if it's neither an array nor an object +}; diff --git a/packages/event-bridge/src/utils.ts b/packages/event-bridge/src/utils.ts index c5575451d..378b644fe 100644 --- a/packages/event-bridge/src/utils.ts +++ b/packages/event-bridge/src/utils.ts @@ -8,6 +8,7 @@ import { Ajv } from 'ajv'; import addFormats from 'ajv-formats'; import schema from './events/generated/schema.json'; import { MissingFieldsError } from './errors'; +import { removeEmptyObjects } from './jsonUtils'; /** * For a given detail type, return the validation schema from our schema.json file @@ -47,6 +48,10 @@ const parsePocketEvent = ( throw new Error(`Unsupported type: ${json['detail-type']}`); } + // Note we are removing empty objects because the schema does not allow them, but some services (cough.. collections) send empty objects. + // We could do this data modifiation within AJV but AJV ends up validating sub schemas before we are able to remove the empty objects, hence the removal will never occur. + json.detail = removeEmptyObjects(json.detail); + // https://ajv.js.org/coercion.html // Some data comes from Web repo which.. treats everything as a string or bools as 0/1 const ajv = new Ajv({ coerceTypes: true }); @@ -85,6 +90,7 @@ const parsePocketEvent = ( export const sqsEventBridgeEvent = ( record: SQSRecord, ): (PocketEventTypeMap[T] & IncomingBaseEvent) | null => { + // Note: We have to double parse the record body because it is a stringified JSON object when it comes via SNS from Event Bridge to SQS. const message = JSON.parse(JSON.parse(record.body).Message); return parsePocketEvent(message) as PocketEventTypeMap[T] & IncomingBaseEvent; }; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6a32e6608..dca48a68e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1542,6 +1542,9 @@ importers: '@opensearch-project/opensearch': specifier: ^2.10.0 version: 2.10.0 + '@pocket-tools/event-bridge': + specifier: workspace:* + version: link:../../packages/event-bridge '@pocket-tools/ts-logger': specifier: workspace:* version: link:../../packages/ts-logger @@ -1597,6 +1600,9 @@ importers: '@aws-sdk/client-sagemaker-runtime': specifier: 3.679.0 version: 3.679.0 + '@pocket-tools/event-bridge': + specifier: workspace:* + version: link:../../packages/event-bridge '@pocket-tools/ts-logger': specifier: workspace:* version: link:../../packages/ts-logger @@ -1978,6 +1984,9 @@ importers: ajv-formats: specifier: 3.0.1 version: 3.0.1(ajv@8.17.1) + lodash: + specifier: 4.17.21 + version: 4.17.21 devDependencies: '@jest/globals': specifier: 29.7.0 @@ -1994,6 +2003,9 @@ importers: '@types/jest': specifier: 29.5.14 version: 29.5.14 + '@types/lodash': + specifier: 4.17.13 + version: 4.17.13 '@types/node': specifier: ^22.8.2 version: 22.9.0 diff --git a/servers/account-data-deleter/package.json b/servers/account-data-deleter/package.json index 69231fe10..dddbcbd82 100644 --- a/servers/account-data-deleter/package.json +++ b/servers/account-data-deleter/package.json @@ -66,4 +66,4 @@ "typescript": "5.6.3", "unleash-client": "6.1.2" } -} \ No newline at end of file +} diff --git a/servers/shares-api/package.json b/servers/shares-api/package.json index ac931f02d..c844f2e82 100644 --- a/servers/shares-api/package.json +++ b/servers/shares-api/package.json @@ -66,4 +66,4 @@ "tsconfig": "workspace:*", "typescript": "5.6.3" } -} \ No newline at end of file +} diff --git a/servers/user-list-search/package.json b/servers/user-list-search/package.json index f156af1b5..69808765b 100644 --- a/servers/user-list-search/package.json +++ b/servers/user-list-search/package.json @@ -76,4 +76,4 @@ "tsconfig": "workspace:*", "typescript": "5.6.3" } -} \ No newline at end of file +}