Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File scanning connector to handle multiple AV tools #1514

Merged
merged 34 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
ac9e617
wip for file scanning connector
ARADDCC002 Sep 11, 2024
0199b64
wip for config changes for filescanning
ARADDCC002 Sep 18, 2024
cde5410
updated unit tests
ARADDCC002 Sep 20, 2024
d947e29
merged main and adjust av popover to handle multiple results
ARADDCC002 Sep 24, 2024
93560e1
updated av scanning config
ARADDCC002 Sep 25, 2024
801b5a7
updated av scanning config
ARADDCC002 Sep 25, 2024
a00b6c7
updated config to disable file scanning by default
ARADDCC002 Sep 27, 2024
7244593
added check for required config
ARADDCC002 Sep 27, 2024
82a492e
changes to file scanning connector
ARADDCC002 Sep 27, 2024
48bc81c
added error checking for clamav config
ARADDCC002 Sep 27, 2024
dbe8651
updated docker compose prod file
ARADDCC002 Sep 27, 2024
57d1fb0
added awaits for getfilescanconnectors call
ARADDCC002 Sep 27, 2024
42fc570
added awaits for getfilescanconnectors call
ARADDCC002 Sep 27, 2024
607bd6b
reverted logic for checking if av is initialised
ARADDCC002 Sep 27, 2024
5c5bcde
added the file av results popover for successful av scans
ARADDCC002 Sep 27, 2024
358d87e
fixed an issue that was causing only one file scan to store results
ARADDCC002 Oct 1, 2024
c577734
fixed some pr comments
ARADDCC002 Oct 2, 2024
46b6bba
changed structure of av file scan
ARADDCC002 Oct 2, 2024
19abdc4
various improvements to how we handle the AV connector. Added info en…
ARADDCC002 Oct 10, 2024
fdfa056
added frontend actions file for filescanning info call. updated ui to…
ARADDCC002 Oct 10, 2024
6dabbae
updated config and unit tests
ARADDCC002 Oct 14, 2024
c1c7548
fixed a typo
ARADDCC002 Oct 14, 2024
f1ccde5
various improvements including change to how we manage the initialisa…
ARADDCC002 Oct 15, 2024
bbdfe2b
updated ui to better reflect av results
ARADDCC002 Oct 16, 2024
1bbaabb
Merge branch 'main' into feature/BAI-1458-add-file-scanning-connector
ARADDCC002 Oct 16, 2024
e9b6c42
added backed log
ARADDCC002 Oct 16, 2024
08b85d9
awaited clam av init call
ARADDCC002 Oct 16, 2024
5a18054
Update and simplify scanning process
JR40159 Oct 22, 2024
ab64bd1
Fix config that was removed by mistake
JR40159 Oct 22, 2024
32e743d
removed uneeded config mocking
ARADDCC002 Oct 29, 2024
7e6b5f6
updated bailo config map for filescanners
ARADDCC002 Oct 30, 2024
62a66c2
improved checking for scanners info before show av chip on file displays
ARADDCC002 Oct 30, 2024
610791b
readded clamav to config
ARADDCC002 Oct 30, 2024
c3362fa
updated test mocking
ARADDCC002 Oct 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions backend/config/default.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,6 @@ module.exports = {
text: '',
startTimestamp: '',
},

avScanning: {
enabled: false,
},
},

connectors: {
Expand All @@ -208,6 +204,10 @@ module.exports = {
audit: {
kind: 'silly',
},

fileScanners: {
kinds: [],
},
},

s3: {
Expand Down
6 changes: 3 additions & 3 deletions backend/config/docker_compose.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ module.exports = {
},
},

ui: {
avScanning: {
enabled: false,
connectors: {
fileScanners: {
kinds: ['clamAV'],
},
},
}
12 changes: 12 additions & 0 deletions backend/src/connectors/fileScanning/Base.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { FileInterface, ScanStateKeys } from '../../models/File.js'
export interface FileScanResult {
toolName: string
state: ScanStateKeys
isInfected?: boolean
viruses?: string[]
}

export abstract class BaseFileScanningConnector {
abstract info(): string[]
abstract scan(file: FileInterface): Promise<FileScanResult[]>
}
67 changes: 67 additions & 0 deletions backend/src/connectors/fileScanning/clamAv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import NodeClam from 'clamscan'
import { Readable } from 'stream'

import { getObjectStream } from '../../clients/s3.js'
import { FileInterfaceDoc, ScanState } from '../../models/File.js'
import log from '../../services/log.js'
import config from '../../utils/config.js'
import { ConfigurationError } from '../../utils/error.js'
import { BaseFileScanningConnector, FileScanResult } from './Base.js'

let av: NodeClam
export const clamAvToolName = 'Clam AV'

export class ClamAvFileScanningConnector extends BaseFileScanningConnector {
constructor() {
super()
}

info() {
return [clamAvToolName]
}

async init() {
try {
av = await new NodeClam().init({ clamdscan: config.avScanning.clamdscan })
} catch (error) {
throw ConfigurationError('Could not scan file as Clam AV is not running.', {
clamAvConfig: config.avScanning,
})
}
}

async scan(file: FileInterfaceDoc): Promise<FileScanResult[]> {
if (!av) {
throw ConfigurationError(
'Clam AV does not look like it is running. Check that it has been correctly initialised by calling the init function.',
{
clamAvConfig: config.avScanning,
},
)
}
const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable
try {
const { isInfected, viruses } = await av.scanStream(s3Stream)
log.info(
{ modelId: file.modelId, fileId: file._id, name: file.name, result: { isInfected, viruses } },
'Scan complete.',
)
return [
{
toolName: clamAvToolName,
state: ScanState.Complete,
isInfected,
viruses,
},
]
} catch (error) {
log.error({ error, modelId: file.modelId, fileId: file._id, name: file.name }, 'Scan errored.')
return [
{
toolName: clamAvToolName,
state: ScanState.Error,
},
]
}
}
}
39 changes: 39 additions & 0 deletions backend/src/connectors/fileScanning/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import config from '../../utils/config.js'
import { ConfigurationError } from '../../utils/error.js'
import { BaseFileScanningConnector } from './Base.js'
import { ClamAvFileScanningConnector } from './clamAv.js'
import { FileScanningWrapper } from './wrapper.js'

export const FileScanKind = {
ClamAv: 'clamAV',
} as const
export type FileScanKindKeys = (typeof FileScanKind)[keyof typeof FileScanKind]

const fileScanConnectors: BaseFileScanningConnector[] = []
let scannerWrapper: undefined | BaseFileScanningConnector = undefined
export function runFileScanners(cache = true) {
if (scannerWrapper && cache) {
return scannerWrapper
}
config.connectors.fileScanners.kinds.forEach(async (fileScanner) => {
switch (fileScanner) {
case FileScanKind.ClamAv:
try {
const scanner = new ClamAvFileScanningConnector()
await scanner.init()
fileScanConnectors.push(scanner)
} catch (error) {
throw ConfigurationError('Could not configure or initialise Clam AV')
}
break
default:
throw ConfigurationError(`'${fileScanner}' is not a valid file scanning kind.`, {
validKinds: Object.values(FileScanKind),
})
}
})
scannerWrapper = new FileScanningWrapper(fileScanConnectors)
return scannerWrapper
}

export default runFileScanners()
34 changes: 34 additions & 0 deletions backend/src/connectors/fileScanning/wrapper.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { FileInterface } from '../../models/File.js'
import log from '../../services/log.js'
import { BaseFileScanningConnector, FileScanResult } from './Base.js'

export class FileScanningWrapper extends BaseFileScanningConnector {
scanners: BaseFileScanningConnector[] = []

constructor(scanners: BaseFileScanningConnector[]) {
super()
this.scanners = scanners
}

info() {
const scannerNames: string[] = []
for (const scanner of this.scanners) {
scannerNames.push(...scanner.info())
}
return scannerNames
}

async scan(file: FileInterface) {
const results: FileScanResult[] = []
for (const scanner of this.scanners) {
log.info(
{ modelId: file.modelId, fileId: file._id, name: file.name, toolName: scanner.info().pop() },
'Scan started.',
)
const scannerResults = await scanner.scan(file)
results.push(...scannerResults)
}

return results
}
}
21 changes: 11 additions & 10 deletions backend/src/models/File.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { Document, model, ObjectId, Schema } from 'mongoose'
import MongooseDelete from 'mongoose-delete'

import { FileScanResult } from '../connectors/fileScanning/Base.js'

// This interface stores information about the properties on the base object.
// It should be used for plain object representations, e.g. for sending to the
// client.
Expand All @@ -17,11 +19,7 @@ export interface FileInterface {

complete: boolean

avScan: {
state: ScanStateKeys
isInfected?: boolean
viruses?: Array<unknown>
}
avScan: Array<FileScanResult>

createdAt: Date
updatedAt: Date
Expand Down Expand Up @@ -51,11 +49,14 @@ const FileSchema = new Schema<FileInterface>(
bucket: { type: String, required: true },
path: { type: String, required: true },

avScan: {
state: { type: String, enum: Object.values(ScanState), default: 'notScanned' },
isInfected: { type: Boolean },
viruses: [{ type: String }],
},
avScan: [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't forget, this will require a migration. I don't think a migration was done when this field was originally added so this migration will have to account for when this field doesn't exist

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it does but feel free to double check. From testing it looks like it will return single results as arrays anyway

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From looking at this diff it looks like the previous type of avScan was an object and now avScan is an array? That's why I thought this would require a migration

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It becomes an array of the same type of object as it was before and it seems to handle it automatically thankfully it seems

{
toolName: { type: String },
state: { type: String, enum: Object.values(ScanState) },
isInfected: { type: Boolean },
viruses: [{ type: String }],
},
],

complete: { type: Boolean, default: false },
},
Expand Down
3 changes: 3 additions & 0 deletions backend/src/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { getDockerRegistryAuth } from './routes/v1/registryAuth.js'
import { getCurrentUser } from './routes/v2/entities/getCurrentUser.js'
import { getEntities } from './routes/v2/entities/getEntities.js'
import { getEntityLookup } from './routes/v2/entities/getEntityLookup.js'
import { getFilescanningInfo } from './routes/v2/filescanning/getFilescanningInfo.js'
import { deleteAccessRequest } from './routes/v2/model/accessRequest/deleteAccessRequest.js'
import { getAccessRequest } from './routes/v2/model/accessRequest/getAccessRequest.js'
import { getAccessRequestCurrentUserPermissions } from './routes/v2/model/accessRequest/getAccessRequestCurrentUserPermissions.js'
Expand Down Expand Up @@ -217,6 +218,8 @@ server.delete('/api/v2/user/token/:accessKey', ...deleteUserToken)

server.get('/api/v2/specification', ...getSpecification)

server.get('/api/v2/filescanning/info', ...getFilescanningInfo)

// Python docs
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
Expand Down
15 changes: 15 additions & 0 deletions backend/src/routes/v2/filescanning/getFilescanningInfo.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import bodyParser from 'body-parser'
import { Request, Response } from 'express'

import scanners from '../../../connectors/fileScanning/index.js'

interface GetFileScanningInfoResponse {
scanners: string[]
}

export const getFilescanningInfo = [
bodyParser.json(),
async (req: Request, res: Response<GetFileScanningInfoResponse>) => {
return res.json({ scanners: scanners.info() })
},
]
65 changes: 29 additions & 36 deletions backend/src/services/file.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import NodeClam from 'clamscan'
import { Schema } from 'mongoose'
import { Readable } from 'stream'

import { getObjectStream, putObjectStream } from '../clients/s3.js'
import { FileAction } from '../connectors/authorisation/actions.js'
import authorisation from '../connectors/authorisation/index.js'
import { FileScanResult } from '../connectors/fileScanning/Base.js'
import scanners from '../connectors/fileScanning/index.js'
import FileModel, { ScanState } from '../models/File.js'
import { UserInterface } from '../models/User.js'
import config from '../utils/config.js'
import { BadReq, Forbidden, NotFound } from '../utils/error.js'
import { longId } from '../utils/id.js'
import log from './log.js'
import { getModelById } from './model.js'
import { removeFileFromReleases } from './release.js'

let av: NodeClam

export async function uploadFile(user: UserInterface, modelId: string, name: string, mime: string, stream: Readable) {
const model = await getModelById(user, modelId)
if (model.settings.mirror.sourceModelId) {
Expand All @@ -38,43 +37,37 @@ export async function uploadFile(user: UserInterface, modelId: string, name: str

await file.save()

if (config.ui.avScanning.enabled) {
if (!av) {
try {
av = await new NodeClam().init({ clamdscan: config.avScanning.clamdscan })
} catch (error) {
log.error(error, 'Unable to connect to ClamAV.')
return file
}
}
const avStream = av.passthrough()
const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable
s3Stream.pipe(avStream)
log.info({ modelId, fileId: file._id, name }, 'Scan started.')

avStream.on('scan-complete', async (result) => {
log.info({ result, modelId, fileId: file._id, name }, 'Scan complete.')
await file.update({
avScan: { state: ScanState.Complete, isInfected: result.isInfected, viruses: result.viruses },
})
})
avStream.on('error', async (error) => {
log.error({ error, modelId, fileId: file._id, name }, 'Scan errored.')
await file.update({
avScan: { state: ScanState.Error },
})
})
avStream.on('timeout', async (error) => {
log.error({ error, modelId, fileId: file._id, name }, 'Scan timed out.')
await file.update({
avScan: { state: ScanState.Error },
})
})
if (scanners.info()) {
const resultsInprogress = scanners.info().map((scannerName) => ({
toolName: scannerName,
state: ScanState.InProgress,
}))
await updateFileWithResults(file._id, resultsInprogress)
scanners.scan(file).then((resultsArray) => updateFileWithResults(file._id, resultsArray))
}

return file
}

async function updateFileWithResults(_id: Schema.Types.ObjectId, results: FileScanResult[]) {
for (const result of results) {
const updateExistingResult = await FileModel.updateOne(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be interested to look to see if Mongo can handle this logic for you and add new elements to the array if a an element with a tool doesn't exist and update the existing element if a element with that tool does exist

{ _id, 'avScan.toolName': result.toolName },
{
$set: { 'avScan.$': result },
},
)
if (updateExistingResult.modifiedCount === 0) {
await FileModel.updateOne(
{ _id },
{
$set: { avScan: { toolName: result.toolName, state: result.state } },
},
)
}
}
}

export async function downloadFile(user: UserInterface, fileId: string, range?: { start: number; end: number }) {
const file = await getFileById(user, fileId)
const model = await getModelById(user, file.modelId)
Expand Down
7 changes: 4 additions & 3 deletions backend/src/services/mirroredModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { sign } from '../clients/kms.js'
import { getObjectStream, putObjectStream } from '../clients/s3.js'
import { ModelAction } from '../connectors/authorisation/actions.js'
import authorisation from '../connectors/authorisation/index.js'
import scanners from '../connectors/fileScanning/index.js'
import { FileInterfaceDoc, ScanState } from '../models/File.js'
import { ModelDoc } from '../models/Model.js'
import { ModelCardRevisionInterface } from '../models/ModelCardRevision.js'
Expand Down Expand Up @@ -354,7 +355,7 @@ async function checkReleaseFiles(user: UserInterface, modelId: string, semvers:
}
}

if (config.ui.avScanning.enabled) {
if (scanners.info()) {
const files: FileInterfaceDoc[] = await getFilesByIds(user, modelId, fileIds)
const scanErrors: {
missingScan: Array<{ name: string; id: string }>
Expand All @@ -364,9 +365,9 @@ async function checkReleaseFiles(user: UserInterface, modelId: string, semvers:
for (const file of files) {
if (!file.avScan) {
scanErrors.missingScan.push({ name: file.name, id: file.id })
} else if (file.avScan.state !== ScanState.Complete) {
} else if (file.avScan.some((scanResult) => scanResult.state !== ScanState.Complete)) {
scanErrors.incompleteScan.push({ name: file.name, id: file.id })
} else if (file.avScan.isInfected) {
} else if (file.avScan.some((scanResult) => scanResult.isInfected)) {
scanErrors.failedScan.push({ name: file.name, id: file.id })
}
}
Expand Down
4 changes: 0 additions & 4 deletions backend/src/types/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,4 @@ export interface UiConfig {
text: string
startTimestamp: string
}

avScanning: {
enabled: boolean
}
}
Loading
Loading