Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LocalFileExtractor #519

Merged
merged 4 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libs/extensions/std/exec/src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { ArchiveInterpreterExecutor } from './archive-interpreter-executor';
import { FilePickerExecutor } from './file-picker-executor';
import { GtfsRTInterpreterExecutor } from './gtfs-rt-interpreter-executor';
import { HttpExtractorExecutor } from './http-extractor-executor';
import { LocalFileExtractorExecutor } from './local-file-extractor-executor';
import { TextFileInterpreterExecutor } from './text-file-interpreter-executor';
import { TextLineDeleterExecutor } from './text-line-deleter-executor';
import { TextRangeSelectorExecutor } from './text-range-selector-executor';
Expand All @@ -33,6 +34,7 @@ export class StdExecExtension implements JayveeExecExtension {
ArchiveInterpreterExecutor,
FilePickerExecutor,
GtfsRTInterpreterExecutor,
LocalFileExtractorExecutor,
];
}
}
131 changes: 131 additions & 0 deletions libs/extensions/std/exec/src/local-file-extractor-executor.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

import * as path from 'path';

import * as R from '@jvalue/jayvee-execution';
import { getTestExecutionContext } from '@jvalue/jayvee-execution/test';
import {
BlockDefinition,
IOType,
createJayveeServices,
} from '@jvalue/jayvee-language-server';
import {
expectNoParserAndLexerErrors,
loadTestExtensions,
parseHelper,
readJvTestAssetHelper,
} from '@jvalue/jayvee-language-server/test';
import { AstNode, AstNodeLocator, LangiumDocument } from 'langium';
import { NodeFileSystem } from 'langium/node';
import * as nock from 'nock';

import { LocalFileExtractorExecutor } from './local-file-extractor-executor';

describe('Validation of LocalFileExtractorExecutor', () => {
let parse: (input: string) => Promise<LangiumDocument<AstNode>>;

let locator: AstNodeLocator;

const readJvTestAsset = readJvTestAssetHelper(
__dirname,
'../test/assets/local-file-extractor-executor/',
);

async function parseAndExecuteExecutor(
input: string,
): Promise<R.Result<R.BinaryFile>> {
const document = await parse(input);
expectNoParserAndLexerErrors(document);

const block = locator.getAstNode<BlockDefinition>(
document.parseResult.value,
'pipelines@0/blocks@1',
) as BlockDefinition;

return new LocalFileExtractorExecutor().doExecute(
R.NONE,
getTestExecutionContext(locator, document, [block]),
);
}

beforeAll(async () => {
// Create language services
const services = createJayveeServices(NodeFileSystem).Jayvee;
await loadTestExtensions(services, [
path.resolve(__dirname, '../test/test-extension/TestBlockTypes.jv'),
]);
locator = services.workspace.AstNodeLocator;
// Parse function for Jayvee (without validation)
parse = parseHelper(services);
});

afterEach(() => {
nock.restore();
});

beforeEach(() => {
if (!nock.isActive()) {
nock.activate();
}
nock.cleanAll();
});

it('should diagnose no error on valid local file path', async () => {
const text = readJvTestAsset('valid-local-file.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right).toEqual(
expect.objectContaining({
name: 'local-file-test.csv',
extension: 'csv',
ioType: IOType.FILE,
mimeType: R.MimeType.TEXT_CSV,
}),
);
}
});

it('should diagnose error on file not found', async () => {
const text = readJvTestAsset('invalid-file-not-found.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(true);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
`File './does-not-exist.csv' not found.`,
);
}
});

it('should diagnose error on path traversal at the start of the path', async () => {
const text = readJvTestAsset('invalid-path-traversal-at-start.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(true);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
`File path cannot include "..". Path traversal is restricted.`,
);
}
});

it('should diagnose error on path traversal in the path', async () => {
const text = readJvTestAsset('invalid-path-traversal-in-path.jv');

const result = await parseAndExecuteExecutor(text);

expect(R.isErr(result)).toEqual(true);
if (R.isErr(result)) {
expect(result.left.message).toEqual(
`File path cannot include "..". Path traversal is restricted.`,
);
}
});
});
82 changes: 82 additions & 0 deletions libs/extensions/std/exec/src/local-file-extractor-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

import * as fs from 'fs/promises';
import * as path from 'path';

import * as R from '@jvalue/jayvee-execution';
import {
AbstractBlockExecutor,
BinaryFile,
BlockExecutorClass,
ExecutionContext,
FileExtension,
MimeType,
None,
implementsStatic,
inferFileExtensionFromFileExtensionString,
inferMimeTypeFromFileExtensionString,
} from '@jvalue/jayvee-execution';
import { IOType, PrimitiveValuetypes } from '@jvalue/jayvee-language-server';

@implementsStatic<BlockExecutorClass>()
export class LocalFileExtractorExecutor extends AbstractBlockExecutor<
IOType.NONE,
IOType.FILE
> {
public static readonly type = 'LocalFileExtractor';

constructor() {
super(IOType.NONE, IOType.FILE);
}

async doExecute(
input: None,
context: ExecutionContext,
): Promise<R.Result<BinaryFile>> {
const filePath = context.getPropertyValue(
'filePath',
PrimitiveValuetypes.Text,
);

if (filePath.includes('..')) {
return R.err({
message: 'File path cannot include "..". Path traversal is restricted.',
diagnostic: { node: context.getCurrentNode(), property: 'filePath' },
});
}

try {
const rawData = await fs.readFile(filePath);

// Infer FileName and FileExtension from filePath
const fileName = path.basename(filePath);
const extName = path.extname(fileName);
const fileExtension =
inferFileExtensionFromFileExtensionString(extName) ??
FileExtension.NONE;

// Infer Mimetype from FileExtension, if not inferrable, then default to application/octet-stream
const mimeType: MimeType | undefined =
inferMimeTypeFromFileExtensionString(fileExtension) ??
MimeType.APPLICATION_OCTET_STREAM;

// Create file and return file
const file = new BinaryFile(
fileName,
fileExtension,
mimeType,
rawData.buffer as ArrayBuffer,
);

context.logger.logDebug(`Successfully extraced file ${filePath}`);
return R.ok(file);
} catch (error) {
return R.err({
message: `File '${filePath}' not found.`,
diagnostic: { node: context.getCurrentNode(), property: 'filePath' },
});
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: './does-not-exist.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: '../non-existent-file.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: './../non-existent-file.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
HeaderExample1,HeaderExample2,HeaderExample3,HeaderExample4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Example1,Example2,Example3,Example4
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg

SPDX-License-Identifier: AGPL-3.0-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

pipeline TestPipeline {

block TestExtractor oftype TestFileExtractor {
}

block TestBlock oftype LocalFileExtractor {
filePath: './libs/extensions/std/exec/test/assets/local-file-extractor-executor/local-file-test.csv';
}

block TestLoader oftype TestSheetLoader {
}

TestExtractor -> TestBlock -> TestLoader;
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,33 @@ describe('Validation of blocktype specific properties', () => {
});
});

describe('LocalFileExtractor blocktype', () => {
it('should diagnose no error on valid filePath parameter value', async () => {
const text = readJvTestAsset(
'property-assignment/blocktype-specific/local-file-extractor/valid-valid-filepath-param.jv',
);

await parseAndValidatePropertyAssignment(text);

expect(validationAcceptorMock).toHaveBeenCalledTimes(0);
});

it('should diagnose error on invalid filePath parameter value', async () => {
const text = readJvTestAsset(
'property-assignment/blocktype-specific/local-file-extractor/invalid-invalid-filepath-param.jv',
);

await parseAndValidatePropertyAssignment(text);

expect(validationAcceptorMock).toHaveBeenCalledTimes(1);
expect(validationAcceptorMock).toHaveBeenCalledWith(
'error',
'File path cannot include "..". Path traversal is restricted.',
expect.any(Object),
);
});
});

describe('RowDeleter blocktype', () => {
it('should diagnose error on deleting partial row', async () => {
const text = readJvTestAsset(
Expand Down
Loading
Loading