diff --git a/esm/btree.js b/esm/btree.js index 26ae794..a6cf644 100644 --- a/esm/btree.js +++ b/esm/btree.js @@ -1,12 +1,5 @@ import {_unpack_struct_from, _structure_size, struct, dtype_getter, bitSize, DataView64} from './core.js'; -import pako from '../node_modules/pako/dist/pako.esm.mjs'; - -const zlib = { - decompress: function(buf) { - let input_array = new Uint8Array(buf); - return pako.inflate(input_array).buffer; - } -}; +import { Filters } from './filters.js'; class AbstractBTree { //B_LINK_NODE = null; @@ -302,7 +295,7 @@ export class BTreeV1RawDataChunks extends BTreeV1 { _filter_chunk(chunk_buffer, filter_mask, filter_pipeline, itemsize) { //""" Apply decompression filters to a chunk of data. """ let num_filters = filter_pipeline.length; - var chunk_buffer_out = chunk_buffer.slice(); + let buf = chunk_buffer.slice(); for (var filter_index=num_filters-1; filter_index >=0; filter_index--) { //for i, pipeline_entry in enumerate(filter_pipeline[::-1]): @@ -313,32 +306,15 @@ export class BTreeV1RawDataChunks extends BTreeV1 { } let pipeline_entry = filter_pipeline[filter_index]; let filter_id = pipeline_entry.get('filter_id'); - if (filter_id == GZIP_DEFLATE_FILTER) { - chunk_buffer_out = zlib.decompress(chunk_buffer_out); - } - - else if (filter_id == SHUFFLE_FILTER) { - let buffer_size = chunk_buffer_out.byteLength; - var unshuffled_view = new Uint8Array(buffer_size); - let step = Math.floor(buffer_size / itemsize); - let shuffled_view = new DataView(chunk_buffer_out); - for (var j=0; jHH', chunk_buffer, data_length); // .fromstring(chunk_buffer[-4:], '>u2') - ref_sum1 = ref_sum1 % 65535 - ref_sum2 = ref_sum2 % 65535 - - //# compare - if (sum1 != ref_sum1 || sum2 != ref_sum2) { - throw 'ValueError("fletcher32 checksum invalid")'; - } - return true -} - - - -//# IV.A.2.l The Data Storage - Filter Pipeline message -var RESERVED_FILTER = 0; -export const GZIP_DEFLATE_FILTER = 1; -export const SHUFFLE_FILTER = 2; -export const FLETCH32_FILTER = 3; -var SZIP_FILTER = 4; -var NBIT_FILTER = 5; -var SCALEOFFSET_FILTER = 6; diff --git a/esm/dataobjects.js b/esm/dataobjects.js index 85cc134..3924078 100644 --- a/esm/dataobjects.js +++ b/esm/dataobjects.js @@ -1,6 +1,6 @@ import { DatatypeMessage } from './datatype-msg.js'; import { _structure_size, _padded_size, _unpack_struct_from, struct, dtype_getter, DataView64, assert } from './core.js'; -import { BTreeV1Groups, BTreeV1RawDataChunks, BTreeV2GroupNames, BTreeV2GroupOrders, GZIP_DEFLATE_FILTER, SHUFFLE_FILTER, FLETCH32_FILTER } from './btree.js'; +import { BTreeV1Groups, BTreeV1RawDataChunks, BTreeV2GroupNames, BTreeV2GroupOrders } from './btree.js'; import { Heap, SymbolTable, GlobalHeap, FractalHeap } from './misc-low-level.js'; export class DataObjects { diff --git a/esm/filters.js b/esm/filters.js new file mode 100644 index 0000000..65ea36f --- /dev/null +++ b/esm/filters.js @@ -0,0 +1,76 @@ +import * as pako from '../node_modules/pako/dist/pako.esm.mjs'; +import { struct } from './core.js'; + +const zlib_decompress = function (buf, itemsize) { + let input_array = new Uint8Array(buf); + return pako.inflate(input_array).buffer; +} + +const unshuffle = function (buf, itemsize) { + let buffer_size = buf.byteLength; + let unshuffled_view = new Uint8Array(buffer_size); + let step = Math.floor(buffer_size / itemsize); + let shuffled_view = new DataView(buf); + for (var j = 0; j < itemsize; j++) { + for (var i = 0; i < step; i++) { + unshuffled_view[j + i * itemsize] = shuffled_view.getUint8(j * step + i); + } + } + return unshuffled_view.buffer; +} + +const fletch32 = function (buf, itemsize) { + _verify_fletcher32(buf); + //# strip off 4-byte checksum from end of buffer + return buf.slice(0, -4); +} + +function _verify_fletcher32(chunk_buffer) { + //""" Verify a chunk with a fletcher32 checksum. """ + //# calculate checksums + var odd_chunk_buffer = ((chunk_buffer.byteLength % 2) != 0); + var data_length = chunk_buffer.byteLength - 4; + var view = new DataView(chunk_buffer); + + var sum1 = 0; + var sum2 = 0; + for (var offset=0; offset<(data_length-1); offset+=2) { + let datum = view.getUint16(offset, true); // little-endian + sum1 = (sum1 + datum) % 65535 + sum2 = (sum2 + sum1) % 65535 + } + if (odd_chunk_buffer) { + // process the last item: + let datum = view.getUint8(data_length-1); + sum1 = (sum1 + datum) % 65535 + sum2 = (sum2 + sum1) % 65535 + } + + //# extract stored checksums + var [ref_sum1, ref_sum2] = struct.unpack_from('>HH', chunk_buffer, data_length); // .fromstring(chunk_buffer[-4:], '>u2') + ref_sum1 = ref_sum1 % 65535 + ref_sum2 = ref_sum2 % 65535 + + //# compare + if (sum1 != ref_sum1 || sum2 != ref_sum2) { + throw 'ValueError("fletcher32 checksum invalid")'; + } + return true +} + +//# IV.A.2.l The Data Storage - Filter Pipeline message +var RESERVED_FILTER = 0; +const GZIP_DEFLATE_FILTER = 1; +const SHUFFLE_FILTER = 2; +const FLETCH32_FILTER = 3; +var SZIP_FILTER = 4; +var NBIT_FILTER = 5; +var SCALEOFFSET_FILTER = 6; + +// To register a new filter, add a function (ArrayBuffer) => ArrayBuffer +// the the following map, using a key that corresponds to filter_id (int) +export const Filters = new Map([ + [GZIP_DEFLATE_FILTER, zlib_decompress], + [SHUFFLE_FILTER, unshuffle], + [FLETCH32_FILTER, fletch32] +]); \ No newline at end of file diff --git a/esm/high-level.js b/esm/high-level.js index 9afab45..a559768 100644 --- a/esm/high-level.js +++ b/esm/high-level.js @@ -1,5 +1,6 @@ import {DataObjects} from './dataobjects.js'; import {SuperBlock} from './misc-low-level.js'; +export { Filters } from './filters.js'; export class Group { /* @@ -371,4 +372,4 @@ function posix_dirname(p) { function normpath(path) { return path.replace(/\/(\/)+/g, '/'); // path = posixpath.normpath(y) -} \ No newline at end of file +}