Skip to content

Commit

Permalink
Merge pull request #642 from jvalue/608-csv-newlines
Browse files Browse the repository at this point in the history
[FIX] Cannot parse CSV with newlines
  • Loading branch information
TungstnBallon authored Jan 28, 2025
2 parents 186dcb7 + 2663484 commit 902ba93
Show file tree
Hide file tree
Showing 21 changed files with 292 additions and 167 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
} from './io-type-implementation';

export class TextFile
extends FileSystemFile<string[]>
extends FileSystemFile<string>
implements IOTypeImplementation<IOType.TEXT_FILE>
{
public readonly ioType = IOType.TEXT_FILE;
Expand Down
65 changes: 64 additions & 1 deletion libs/execution/src/lib/util/file-util.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,25 @@
//
// SPDX-License-Identifier: AGPL-3.0-only

import { FileExtension, MimeType } from '../types';
import * as R from '../blocks';
import { FileExtension, MimeType, TextFile } from '../types';

import {
inferFileExtensionFromContentTypeString,
inferFileExtensionFromFileExtensionString,
inferMimeTypeFromFileExtensionString,
transformTextFileLines,
} from './file-util';

function exampleTextFile(content: string): TextFile {
return new TextFile(
'exampleTextFile',
FileExtension.TXT,
MimeType.TEXT_PLAIN,
content,
);
}

describe('Validation of file-util', () => {
describe('Function inferMimeTypeFromContentTypeString', () => {
it('should diagnose no error on known mimeType', () => {
Expand Down Expand Up @@ -68,4 +79,56 @@ describe('Validation of file-util', () => {
expect(result).toEqual(undefined);
});
});
describe('Function transformTextFileLines', () => {
it('should diagnose no error without newline', async () => {
const file = exampleTextFile('some text content without a newline');
// eslint-disable-next-line @typescript-eslint/require-await
const spy = vi.fn(async (lines: string[]) => R.ok(lines));
const result = await transformTextFileLines(file, /\r?\n/, spy);

expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith(['some text content without a newline']);

expect(R.isOk(result)).toBe(true);
assert(R.isOk(result));

expect(result.right).toStrictEqual(file);
});
it('should diagnose no error on empty file', async () => {
const file = exampleTextFile('');

// eslint-disable-next-line @typescript-eslint/require-await
const spy = vi.fn(async (lines: string[]) => R.ok(lines));
const result = await transformTextFileLines(file, /\r?\n/, spy);

expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith([]);

expect(R.isOk(result)).toBe(true);
assert(R.isOk(result));

expect(result.right).toStrictEqual(file);
});
it('should diagnose no error on file with trailing newline', async () => {
const file = exampleTextFile(`some text content
with a
trailing newline
`);
// eslint-disable-next-line @typescript-eslint/require-await
const spy = vi.fn(async (lines: string[]) => R.ok(lines));
const result = await transformTextFileLines(file, /\r?\n/, spy);

expect(spy).toHaveBeenCalledOnce();
expect(spy).toHaveBeenCalledWith([
'some text content',
'with a ',
'trailing newline',
]);

expect(R.isOk(result)).toBe(true);
assert(R.isOk(result));

expect(result.right).toStrictEqual(file);
});
});
});
33 changes: 32 additions & 1 deletion libs/execution/src/lib/util/file-util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import * as mime from 'mime-types';

import { FileExtension, MimeType } from '../types';
import * as R from '../blocks';
import { FileExtension, MimeType, TextFile } from '../types';

export function inferMimeTypeFromFileExtensionString(
fileExtension: string | undefined,
Expand Down Expand Up @@ -50,3 +51,33 @@ export function inferFileExtensionFromContentTypeString(
}
return undefined;
}

export async function transformTextFileLines(
file: TextFile,
lineBreakPattern: RegExp,
transformFn: (lines: string[]) => Promise<R.Result<string[]>>,
): Promise<R.Result<TextFile>> {
const lines = file.content.split(lineBreakPattern);
const lineBreak = file.content.match(lineBreakPattern)?.at(0) ?? '';

// There may be an additional empty line due to the previous splitting
let emptyNewline = false;
if (lines[lines.length - 1] === '') {
emptyNewline = true;
lines.pop();
}

const newLines = await transformFn(lines);
if (R.isErr(newLines)) {
return newLines;
}

let newContent = newLines.right.join(lineBreak);
if (emptyNewline) {
newContent += lineBreak;
}

return R.ok(
new TextFile(file.name, file.extension, file.mimeType, newContent),
);
}
1 change: 0 additions & 1 deletion libs/execution/src/lib/util/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@

export * from './implements-static-decorator';
export * from './file-util';
export * from './string-util';
14 changes: 0 additions & 14 deletions libs/execution/src/lib/util/string-util.ts

This file was deleted.

3 changes: 1 addition & 2 deletions libs/execution/test/utils/file-util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import {
TextFile,
inferFileExtensionFromFileExtensionString,
inferMimeTypeFromFileExtensionString,
splitLines,
} from '../../src';

export function createBinaryFileFromLocalFile(fileName: string): BinaryFile {
Expand All @@ -39,6 +38,6 @@ export function createTextFileFromLocalFile(fileName: string): TextFile {
path.basename(fileName),
fileExtension,
mimeType,
splitLines(fileContent, /\r?\n/),
fileContent,
);
}
36 changes: 15 additions & 21 deletions libs/extensions/std/exec/src/text-file-interpreter-executor.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ describe('Validation of TextFileInterpreterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Multiline ', 'Test File']),
);
expect(result.right.content).toBe(`Multiline
Test File
`);
}
});

Expand All @@ -107,24 +107,18 @@ describe('Validation of TextFileInterpreterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['vehicle:268435857"0']),
);
}
});

it('should diagnose no error on custom lineBreak', async () => {
const text = readJvTestAsset('valid-custom-line-break.jv');

const testFile = readTestFile('test.txt');
const result = await parseAndExecuteExecutor(text, testFile);

expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Multiline \nTest', 'File\n']),
);
const expectedBytes = Buffer.from([
0xa, 0xd, 0xa, 0x3, 0x32, 0x2e, 0x30, 0x10, 0x0, 0x18, 0xe9, 0xa9, 0xba,
0xef, 0xbf, 0xbd, 0x6, 0x12, 0x45, 0xa, 0x11, 0x76, 0x65, 0x68, 0x69,
0x63, 0x6c, 0x65, 0x3a, 0x32, 0x36, 0x38, 0x34, 0x33, 0x35, 0x38, 0x35,
0x37, 0x22, 0x30, 0xa, 0xe, 0xa, 0x8, 0x31, 0x35, 0x39, 0x32, 0x33,
0x34, 0x37, 0x34, 0x2a, 0x2, 0x31, 0x30, 0x12, 0xf, 0xd, 0x27, 0xef,
0xbf, 0xbd, 0x39, 0x42, 0x15, 0xef, 0xbf, 0xbd, 0xf, 0x1f, 0xef, 0xbf,
0xbd, 0x1d, 0x0, 0x0, 0x2c, 0x43, 0x28, 0x0, 0x42, 0xb, 0xa, 0x9, 0x32,
0x36, 0x38, 0x34, 0x33, 0x35, 0x38, 0x35, 0x37,
]);
const actualBytes = Buffer.from(result.right.content);
expect(actualBytes).toStrictEqual(expectedBytes);
}
});
});
15 changes: 2 additions & 13 deletions libs/extensions/std/exec/src/text-file-interpreter-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import {
type ExecutionContext,
TextFile,
implementsStatic,
splitLines,
} from '@jvalue/jayvee-execution';
import { IOType } from '@jvalue/jayvee-language-server';

Expand All @@ -36,25 +35,15 @@ export class TextFileInterpreterExecutor extends AbstractBlockExecutor<
'encoding',
context.valueTypeProvider.Primitives.Text,
);
const lineBreak = context.getPropertyValue(
'lineBreak',
context.valueTypeProvider.Primitives.Regex,
);

const decoder = new TextDecoder(encoding);
context.logger.logDebug(
`Decoding file content using encoding "${encoding}"`,
);
const textContent = decoder.decode(file.content);

context.logger.logDebug(
`Splitting lines using line break /${lineBreak.source}/`,
return R.ok(
new TextFile(file.name, file.extension, file.mimeType, textContent),
);
const lines = splitLines(textContent, lineBreak);
context.logger.logDebug(
`Lines were split successfully, the resulting text file has ${lines.length} lines`,
);

return R.ok(new TextFile(file.name, file.extension, file.mimeType, lines));
}
}
16 changes: 10 additions & 6 deletions libs/extensions/std/exec/src/text-line-deleter-executor.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ describe('Validation of TextLineDeleterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Test File']),
expect(result.right.content).toBe(
`Test File
`,
);
}
});
Expand All @@ -107,8 +108,10 @@ describe('Validation of TextLineDeleterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Multiline', 'Test File']),
expect(result.right.content).toBe(
`Multiline
Test File
`,
);
}
});
Expand Down Expand Up @@ -136,8 +139,9 @@ describe('Validation of TextLineDeleterExecutor', () => {
expect(R.isErr(result)).toEqual(false);
if (R.isOk(result)) {
expect(result.right.ioType).toEqual(IOType.TEXT_FILE);
expect(result.right.content).toEqual(
expect.arrayContaining(['Test File']),
expect(result.right.content).toBe(
`Test File
`,
);
}
});
Expand Down
73 changes: 41 additions & 32 deletions libs/extensions/std/exec/src/text-line-deleter-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,40 @@ import {
} from '@jvalue/jayvee-execution';
import { IOType } from '@jvalue/jayvee-language-server';

// eslint-disable-next-line @typescript-eslint/require-await
async function deleteLines(
lines: string[],
deleteIdxs: number[],
context: ExecutionContext,
): Promise<R.Result<string[]>> {
let lineIdx = 0;
for (const deleteIdx of deleteIdxs) {
if (deleteIdx > lines.length) {
return R.err({
message: `Line ${deleteIdx} does not exist in the text file, only ${lines.length} line(s) are present`,
diagnostic: {
node: context.getOrFailProperty('lines').value,
property: 'values',
index: lineIdx,
},
});
}
++lineIdx;
}

const distinctLines = new Set(deleteIdxs);
const sortedLines = [...distinctLines].sort((a, b) => a - b);

context.logger.logDebug(`Deleting line(s) ${sortedLines.join(', ')}`);

const reversedLines = sortedLines.reverse();
for (const lineToDelete of reversedLines) {
lines.splice(lineToDelete - 1, 1);
}

return R.ok(lines);
}

@implementsStatic<BlockExecutorClass>()
export class TextLineDeleterExecutor extends AbstractBlockExecutor<
IOType.TEXT_FILE,
Expand All @@ -23,48 +57,23 @@ export class TextLineDeleterExecutor extends AbstractBlockExecutor<
super(IOType.TEXT_FILE, IOType.TEXT_FILE);
}

// eslint-disable-next-line @typescript-eslint/require-await
async doExecute(
file: TextFile,
context: ExecutionContext,
): Promise<R.Result<TextFile>> {
const lines = context.getPropertyValue(
const deleteIdxs = context.getPropertyValue(
'lines',
context.valueTypeProvider.createCollectionValueTypeOf(
context.valueTypeProvider.Primitives.Integer,
),
);
const numberOfLines = file.content.length;

let lineIndex = 0;
for (const lineNumber of lines) {
if (lineNumber > numberOfLines) {
return R.err({
message: `Line ${lineNumber} does not exist in the text file, only ${file.content.length} line(s) are present`,
diagnostic: {
node: context.getOrFailProperty('lines').value,
property: 'values',
index: lineIndex,
},
});
}

++lineIndex;
}

const distinctLines = new Set(lines);
const sortedLines = [...distinctLines].sort((a, b) => a - b);

context.logger.logDebug(`Deleting line(s) ${sortedLines.join(', ')}`);

const reversedLines = sortedLines.reverse();
const newContent = [...file.content];
for (const lineToDelete of reversedLines) {
newContent.splice(lineToDelete - 1, 1);
}
const lineBreakPattern = context.getPropertyValue(
'lineBreak',
context.valueTypeProvider.Primitives.Regex,
);

return R.ok(
new TextFile(file.name, file.extension, file.mimeType, newContent),
return R.transformTextFileLines(file, lineBreakPattern, (lines) =>
deleteLines(lines, deleteIdxs, context),
);
}
}
Loading

0 comments on commit 902ba93

Please sign in to comment.