-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move "crawl installation" to a single Lambda call
Adding public repos brings more business logic in, which is always a pain with Step Functions. Also since Github "best practices" recommend against parallel API calls we probably don't want to parallelize various tasks anyway. There's some benefit to being able to do retries at a lower level, but errors should be rare anyway. I may roll this back in future, but for now I think this is easier.
- Loading branch information
1 parent
c6f8ad2
commit 5fdebe6
Showing
10 changed files
with
62 additions
and
187 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import { AppState } from '../../../environment/AppState' | ||
import { GithubInstallation } from '../../types/GithubInstallation' | ||
import { crawlUsers } from './crawlUsers' | ||
import { crawlRepositories } from './crawlRepositories' | ||
import { crawlPushes } from './crawlPushes' | ||
import { crawlWorkflowRunEvents } from './crawlRunEvents' | ||
import { logger } from '../../../util/logging' | ||
|
||
export async function crawlInstallation( | ||
appState: AppState, | ||
installation: GithubInstallation, | ||
lookbackDays: number | ||
) { | ||
logger.info(`Crawling Installation for ${installation.accountLogin}`) | ||
await crawlUsers(appState, installation) | ||
const repos = await crawlRepositories(appState, installation) | ||
// Eventually consider doing some parallelization here (or move back to step function) but | ||
// need to be careful since GitHub gets twitchy about concurrent requests to the API | ||
// Their "best practice" doc says don't do it, but their rate limit doc says it's supported | ||
// Only really need to care if things start getting slow | ||
for (const repo of repos) { | ||
await crawlPushes(appState, installation, repo) | ||
await crawlWorkflowRunEvents(appState, installation, repo, lookbackDays) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 15 additions & 54 deletions
69
src/app/lambdaFunctions/githubCrawlTask/githubCrawlTaskEvents.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,76 +1,37 @@ | ||
import { GithubInstallation, isGithubInstallation } from '../../domain/types/GithubInstallation' | ||
import { GithubRepositorySummary, isGithubRepositorySummary } from '../../domain/types/GithubRepository' | ||
import { throwError } from '@symphoniacloud/dynamodb-entity-store' | ||
import { | ||
CRAWLABLE_RESOURCES, | ||
CrawlableResource, | ||
isCrawlableResource | ||
} from '../../../multipleContexts/githubCrawler' // TOEventually - safer type checking here | ||
|
||
// TOEventually - safer type checking here | ||
} from '../../../multipleContexts/githubCrawler' | ||
import { isNotNullObject } from '../../util/types' // TOEventually - safer type checking here | ||
|
||
export type CrawlEvent = { resourceType: CrawlableResource } | ||
type CrawlEventWithInstallation = CrawlEvent & { installation: GithubInstallation } | ||
type CrawlEventWithRepositorySummary = CrawlEvent & { repository: GithubRepositorySummary } | ||
|
||
export function isCrawlEvent(x: unknown): x is CrawlEvent { | ||
return x !== undefined && isCrawlableResource((x as CrawlEvent).resourceType) | ||
} | ||
|
||
export function isCrawlEventWithInstallation(x: CrawlEvent): x is CrawlEventWithInstallation { | ||
const candidate = x as CrawlEventWithInstallation | ||
return candidate.installation && isGithubInstallation(candidate.installation) | ||
} | ||
|
||
export function isCrawlEventWithRepositorySummary(x: CrawlEvent): x is CrawlEventWithRepositorySummary { | ||
const candidate = x as CrawlEventWithRepositorySummary | ||
return candidate.repository && isGithubRepositorySummary(candidate.repository) | ||
return isNotNullObject(x) && 'resourceType' in x && isCrawlableResource(x.resourceType) | ||
} | ||
|
||
export type CrawlInstallationsEvent = { resourceType: 'installations' } | ||
export type CrawlUsersEvent = { resourceType: 'users' } & CrawlEventWithInstallation | ||
export type CrawlRepositoriesEvent = { resourceType: 'repositories' } & CrawlEventWithInstallation | ||
export type CrawlPushesEvent = { resourceType: 'pushes' } & CrawlEventWithInstallation & | ||
CrawlEventWithRepositorySummary | ||
export type CrawlWorkflowRunEventsEvent = { | ||
resourceType: 'pushes' | ||
|
||
export type CrawlInstallationEvent = { | ||
resourceType: 'installation' | ||
installation: GithubInstallation | ||
lookbackDays: number | ||
} & CrawlEventWithInstallation & | ||
CrawlEventWithRepositorySummary | ||
} | ||
|
||
export function isCrawlInstallationsEvent(x: CrawlEvent): x is CrawlInstallationsEvent { | ||
return x.resourceType === CRAWLABLE_RESOURCES.INSTALLATIONS | ||
} | ||
|
||
export function isCrawlUsersEvent(x: CrawlEvent): x is CrawlUsersEvent { | ||
if (x.resourceType !== CRAWLABLE_RESOURCES.USERS) return false | ||
return ( | ||
isCrawlEventWithInstallation(x) || | ||
throwError(`Invalid object for ${CRAWLABLE_RESOURCES.USERS} : ${JSON.stringify(x)}`)() | ||
) | ||
} | ||
|
||
export function isCrawlRepositoriesEvent(x: CrawlEvent): x is CrawlRepositoriesEvent { | ||
if (x.resourceType !== CRAWLABLE_RESOURCES.REPOSITORIES) return false | ||
return ( | ||
isCrawlEventWithInstallation(x) || | ||
throwError(`Invalid object for ${CRAWLABLE_RESOURCES.REPOSITORIES} : ${JSON.stringify(x)}`)() | ||
) | ||
} | ||
|
||
export function isCrawlPushesEvent(x: CrawlEvent): x is CrawlPushesEvent { | ||
if (x.resourceType !== CRAWLABLE_RESOURCES.PUSHES) return false | ||
return ( | ||
(isCrawlEventWithInstallation(x) && isCrawlEventWithRepositorySummary(x)) || | ||
throwError(`Invalid object for ${CRAWLABLE_RESOURCES.PUSHES} : ${JSON.stringify(x)}`)() | ||
) | ||
} | ||
|
||
export function isCrawlWorkflowRunEventsEvent(x: CrawlEvent): x is CrawlWorkflowRunEventsEvent { | ||
if (x.resourceType !== CRAWLABLE_RESOURCES.WORKFLOW_RUN_EVENTS) return false | ||
const hasLookBackDays = typeof (x as CrawlWorkflowRunEventsEvent).lookbackDays !== undefined | ||
export function isCrawlInstallationEvent(x: CrawlEvent): x is CrawlInstallationEvent { | ||
if (x.resourceType !== CRAWLABLE_RESOURCES.INSTALLATION) return false | ||
return ( | ||
(hasLookBackDays && isCrawlEventWithInstallation(x) && isCrawlEventWithRepositorySummary(x)) || | ||
throwError(`Invalid object for ${CRAWLABLE_RESOURCES.WORKFLOW_RUN_EVENTS} : ${JSON.stringify(x)}`)() | ||
('installation' in x && | ||
isGithubInstallation(x.installation) && | ||
'lookbackDays' in x && | ||
typeof x.lookbackDays === 'number') || | ||
throwError(`Invalid object for ${CRAWLABLE_RESOURCES.INSTALLATION} : ${JSON.stringify(x)}`)() | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters