Skip to content

Commit

Permalink
fix/non-ascii character support for derivatives & stream logging (#564)
Browse files Browse the repository at this point in the history
(new) remove all non-ascii characters for filename sanitization (#562)
(new) remove all apostrophe and single-quotes for filename sanitization
(new) streams always log any errors now
(new) failed stream operations now handle errors and/or return the error
  • Loading branch information
EMaslowskiQ authored Jan 3, 2024
1 parent eb9c381 commit 627d8d2
Show file tree
Hide file tree
Showing 10 changed files with 38 additions and 44 deletions.
11 changes: 6 additions & 5 deletions server/graphql/schema/asset/resolvers/mutations/uploadAsset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,18 +104,19 @@ class UploadAssetWorker extends ResolverBase {
const stream = fileStream.pipe(writeStream);

return new Promise(resolve => {
fileStream.on('error', () => {
stream.emit('error');
fileStream.on('error', (error) => {
LOG.error('uploadAsset', LOG.LS.eGQL, error);
stream.emit('error', error);
});

stream.on('finish', async () => {
resolve(this.uploadWorkerOnFinish(storageKey, filename, vocabulary.idVocabulary));
});

stream.on('error', async () => {
await this.appendToWFReport('uploadAsset Upload failed', true, true);
stream.on('error', async (error) => {
await this.appendToWFReport(`uploadAsset Upload failed (${error.message})`, true, true);
await storage.discardWriteStream({ storageKey });
resolve({ status: UploadStatus.Failed, error: 'Upload failed' });
resolve({ status: UploadStatus.Failed, error: `Upload failed (${error.message})` });
});

// stream.on('close', async () => { });
Expand Down
2 changes: 1 addition & 1 deletion server/job/impl/Cook/JobCookSIPackratInspect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ export class JobCookSIPackratInspect extends JobCook<JobCookSIPackratInspectPara
LOG.error(`JobCookSIPackratInspect.fetchZip unable to copy asset version ${assetVersion.idAssetVersion} locally to ${tempFile.path}: ${res.error}`, LOG.LS.eJOB);
return null;
}
return new ZipFile(tempFile.path, true);
return new ZipFile(tempFile.path);
} catch (err) {
LOG.error(`JobCookSIPackratInspect.fetchZip unable to copy asset version ${assetVersion.idAssetVersion} locally to ${tempFile.path}`, LOG.LS.eJOB, err);
return null;
Expand Down
4 changes: 2 additions & 2 deletions server/storage/interface/AssetStorageAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1232,11 +1232,11 @@ export class AssetStorageAdapter {
if (AFOSR.stream) { // ingested content
reader = (isBulkIngest) /* istanbul ignore next */ // We don't ingest bulk ingest files as is -- they end up getting cracked apart, so we're unlikely to hit this branch of code
? new BagitReader({ zipFileName: null, zipStream: AFOSR.stream, directory: null, validate: true, validateContent: false })
: new ZipStream(AFOSR.stream, isZipFilename); // use isZipFilename to determine if errors should be logged
: new ZipStream(AFOSR.stream);
} else if (AFOSR.fileName) { // non-ingested content is staged locally
reader = (isBulkIngest)
? new BagitReader({ zipFileName: AFOSR.fileName, zipStream: null, directory: null, validate: true, validateContent: false })
: new ZipFile(AFOSR.fileName, isZipFilename); // use isZipFilename to determine if errors should be logged
: new ZipFile(AFOSR.fileName);
} else
return { success: false, error: 'AssetStorageAdapter.crackAsset unable to determine filename or stream', zip: null, asset: null, isBagit: false };

Expand Down
2 changes: 1 addition & 1 deletion server/tests/storage/impl/LocalStorage/OCFL.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ async function createRandomFile(directoryName: string, fileName: string, fileSiz

stream.end();
stream.on('finish', () => { resolve(fullPath); });
stream.on('error', reject);
stream.on('error', (error) => { reject(error); });
} catch (error) {
LOG.error('OCFL.test.ts createRandomFile() error', LOG.LS.eTEST, error);
reject(error);
Expand Down
2 changes: 1 addition & 1 deletion server/tests/utils/helpers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ const streamToFile = (inputStream: Stream, filePath: string) => {
inputStream
.pipe(fileWriteStream)
.on('finish', resolve)
.on('error', reject);
.on('error', (error) => reject(error));
});
};
*/
11 changes: 9 additions & 2 deletions server/utils/nameHelpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as CACHE from '../cache';
import * as H from './helpers';
import * as LOG from './logger';
import * as COMMON from '@dpo-packrat/common';
import sanitize from 'sanitize-filename';
// import sanitize from 'sanitize-filename';

export const UNKNOWN_NAME: string = '<UNKNOWN>';

Expand Down Expand Up @@ -47,10 +47,17 @@ export class NameHelpers {
return scene.Name;
}

/* eslint-disable no-control-regex */
static sanitizeFileName(fileName: string): string {
return sanitize(fileName.replace(/[\s,]/g, '_').replace(/[^a-zA-Z0-9\-_.]/g, '-'));
//basic_clean: return sanitize(fileName.replace(/[\s,]/g, '_').replace(/[^a-zA-Z0-9\-_.]/g, '-'));
//legacy: return sanitize(fileName.replace(/:/g, '-').replace(/ /g, '_'), { replacement: '_' });

return fileName.replace(/[\s,]/g, '_') // replace spaces and commas
.replace(/[^\x00-\x7F]/g, '') // remove non-ascii characters
.replace(/['`]/g, '') // remove all apostrophes and single quotes
.replace(/[^a-zA-Z0-9\-_.]/g, '-'); // replace special characters except for certain ones
}
/* eslint-enable no-control-regex */

static computeBaseTitle(name: string, subtitle: string | undefined | null): string {
return (subtitle) ? name.replace(`: ${subtitle}`, '') : name; // base title is the display name, with its subtitle removed, if any
Expand Down
6 changes: 3 additions & 3 deletions server/utils/parser/csvParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ export class CSVParser {
return header;
};

fileStream.on('error', () => reject());
fileStream.on('error', (error) => reject(error));

const stream = fileStream.pipe(csv({ mapHeaders }));
const rows: T[] = [];

stream.on('data', (data: T) => rows.push(data));
stream.on('error', () => reject());
stream.on('error', (error) => reject(error));
stream.on('end', () => resolve(rows));
} catch (error) /* istanbul ignore next */ {
LOG.error('CSVParser.parse', LOG.LS.eSYS, error);
reject();
reject(error);
}
});
}
Expand Down
21 changes: 7 additions & 14 deletions server/utils/zipFile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,13 @@ import { IZip, zipFilterResults } from './IZip';
*/
export class ZipFile implements IZip {
private _fileName: string;
private _logErrors: boolean = true;
private _zip: StreamZip | null = null;
private _entries: string[] = [];
private _files: string[] = [];
private _dirs: string[] = [];

constructor(fileName: string, logErrors: boolean = true) {
constructor(fileName: string) {
this._fileName = fileName;
this._logErrors = logErrors;
}

async load(): Promise<H.IOResults> {
Expand All @@ -31,7 +29,7 @@ export class ZipFile implements IZip {
return new Promise<H.IOResults>((resolve) => {
/* istanbul ignore else */
if (this._zip) {
this._zip.on('error', () => resolve({ success: false, error: `Error unzipping ${this._fileName}` }));
this._zip.on('error', (error) => resolve({ success: false, error: `Error unzipping ${this._fileName} (${error.message})` }));
this._zip.on('ready', () => {
/* istanbul ignore else */
if (this._zip) {
Expand All @@ -55,8 +53,7 @@ export class ZipFile implements IZip {
resolve({ success: false, error: 'Zip not initialized' });
});
} catch (error) /* istanbul ignore next */ {
if (this._logErrors)
LOG.error('ZipFile.load', LOG.LS.eSYS, error);
LOG.error('ZipFile.load', LOG.LS.eSYS, error);
return { success: false, error: JSON.stringify(error) };
}
}
Expand All @@ -78,8 +75,7 @@ export class ZipFile implements IZip {
resolve({ success: true });
else {
const error: string = `ZipFile.close ${err}`;
if (this._logErrors)
LOG.error(error, LOG.LS.eSYS);
LOG.error(error, LOG.LS.eSYS);
resolve({ success: false, error });
}
});
Expand All @@ -91,8 +87,7 @@ export class ZipFile implements IZip {
async getJustFiles(filter: string | null): Promise<string[]> { return zipFilterResults(this._files, filter); }
async getJustDirectories(filter: string | null): Promise<string[]> { return zipFilterResults(this._dirs, filter); }

async streamContent(entry: string | null, doNotLogErrors?: boolean | undefined): Promise<NodeJS.ReadableStream | null> {
const logErrors = this._logErrors && (doNotLogErrors !== true);
async streamContent(entry: string | null): Promise<NodeJS.ReadableStream | null> {
return new Promise<NodeJS.ReadableStream | null>((resolve) => {
if (!this._zip)
resolve(null);
Expand All @@ -105,15 +100,13 @@ export class ZipFile implements IZip {
if (!error && stream)
resolve(stream);
else {
if (logErrors)
LOG.error(`ZipFile.streamContent ${entry}`, LOG.LS.eSYS, error);
LOG.error(`ZipFile.streamContent ${entry}`, LOG.LS.eSYS, error);
resolve(null);
}
});
}
} catch (error) /* istanbul ignore next */ {
if (logErrors)
LOG.error(`ZipFile.streamContent ${entry}`, LOG.LS.eSYS, error);
LOG.error(`ZipFile.streamContent ${entry}`, LOG.LS.eSYS, error);
resolve(null);
}
}
Expand Down
21 changes: 7 additions & 14 deletions server/utils/zipStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ import { IZip, zipFilterResults } from './IZip';
*/
export class ZipStream implements IZip {
private _inputStream: NodeJS.ReadableStream | null;
private _logErrors: boolean = true;
private _zip: JSZip | null = null;
private _entries: Set<string> = new Set<string>();
private _files: Set<string> = new Set<string>();
private _dirs: Set<string> = new Set<string>();

constructor(inputStream: NodeJS.ReadableStream | null = null, logErrors: boolean = true) {
constructor(inputStream: NodeJS.ReadableStream | null = null) {
this._inputStream = inputStream;
this._logErrors = logErrors;
}

async load(): Promise<H.IOResults> {
Expand All @@ -32,15 +30,14 @@ export class ZipStream implements IZip {

const P = new Promise<Buffer>((resolve, reject) => {
this._inputStream!.on('data', (chunk: Buffer) => chunks.push(chunk)); /* istanbul ignore next */ // eslint-disable-line @typescript-eslint/no-non-null-assertion
this._inputStream!.on('error', () => reject()); // eslint-disable-line @typescript-eslint/no-non-null-assertion
this._inputStream!.on('error', (error) => reject(error)); // eslint-disable-line @typescript-eslint/no-non-null-assertion
this._inputStream!.on('end', () => resolve(Buffer.concat(chunks))); // eslint-disable-line @typescript-eslint/no-non-null-assertion
});

this._zip = await JSZ.loadAsync(await P);
return this.extractEntries();
} catch (err) /* istanbul ignore next */ {
if (this._logErrors)
LOG.error('ZipStream.load', LOG.LS.eSYS, err);
LOG.error('ZipStream.load', LOG.LS.eSYS, err);
return { success: false, error: 'ZipStream.load' };
}
}
Expand All @@ -54,8 +51,7 @@ export class ZipStream implements IZip {
this._zip.file(fileNameAndPath, H.Helpers.readFileFromStreamThrowErrors(inputStream), { binary: true });
return this.extractEntries(); // Order n^2 if we're add()'ing lots of entries.
} catch (err) /* istanbul ignore next */ {
if (this._logErrors)
LOG.error('ZipStream.add', LOG.LS.eSYS, err);
LOG.error('ZipStream.add', LOG.LS.eSYS, err);
return { success: false, error: 'ZipStream.add' };
}
}
Expand All @@ -70,8 +66,7 @@ export class ZipStream implements IZip {
this.extractEntry(entry);
} catch (err) /* istanbul ignore next */ {
const error: string = `ZipStream.extractEntries: ${JSON.stringify(err)}`;
if (this._logErrors)
LOG.error(error, LOG.LS.eSYS,);
LOG.error(error, LOG.LS.eSYS,);
return { success: false, error };
}
return { success: true };
Expand Down Expand Up @@ -101,8 +96,7 @@ export class ZipStream implements IZip {
async getJustFiles(filter: string | null): Promise<string[]> { return zipFilterResults(Array.from(this._files.values()), filter); }
async getJustDirectories(filter: string | null): Promise<string[]> { return zipFilterResults(Array.from(this._dirs.values()), filter); }

async streamContent(entry: string | null, doNotLogErrors?: boolean | undefined): Promise<NodeJS.ReadableStream | null> {
const logErrors = this._logErrors && (doNotLogErrors !== true);
async streamContent(entry: string | null): Promise<NodeJS.ReadableStream | null> {
try {
if (!this._zip)
return null;
Expand All @@ -114,8 +108,7 @@ export class ZipStream implements IZip {
}
} catch (err) /* istanbul ignore next */ {
const error: string = `ZipStream.streamContent: ${JSON.stringify(err)}`;
if (logErrors)
LOG.error(error, LOG.LS.eSYS);
LOG.error(error, LOG.LS.eSYS);
return null;
}
}
Expand Down
2 changes: 1 addition & 1 deletion server/workflow/impl/Packrat/WorkflowUpload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ export class WorkflowUpload implements WF.IWorkflow {
// it's not a model (e.g. Capture Data)
// use ZipFile so we don't need to load it all into memory
const filePath: string = Config.storage.rootStaging+'/'+assetVersion.StorageKeyStaging;
const ZS: ZipFile = new ZipFile(filePath,true);
const ZS: ZipFile = new ZipFile(filePath);
const zipRes: H.IOResults = await ZS.load();
if (!zipRes.success)
return this.handleError(`WorkflowUpload.validateFiles unable to unzip asset version ${RSR.fileName}: ${zipRes.error}`);
Expand Down

0 comments on commit 627d8d2

Please sign in to comment.