Skip to content

Commit

Permalink
fix: Fix fromArrow nested null value extraction.
Browse files Browse the repository at this point in the history
  • Loading branch information
jheer committed Dec 9, 2020
1 parent 7de8b53 commit bdb22f3
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 28 deletions.
29 changes: 13 additions & 16 deletions src/format/from-arrow.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,35 +44,32 @@ export default function(arrowTable, options = {}) {
}

function arrayFromNested(vector) {
const create = vector.typeId === LIST ? listExtractor(vector)
const extract = vector.typeId === LIST ? i => arrayExtractor(vector.get(i))
: vector.typeId === STRUCT ? structExtractor(vector)
: error(`Unsupported Arrow type: ${toString(vector.VectorName)}`);

// generate and return objects for each nested value
return Array.from({ length: vector.length }, create);
return Array.from(
{ length: vector.length },
(_, i) => vector.isValid(i) ? extract(i) : null
);
}

function listExtractor(vector) {
// extract a list value. recurse if nested, otherwise convert to array
return (_, i) => {
const v = vector.get(i);
return v.numChildren ? arrayFromNested(v) : arrayFromVector(v);
};
function arrayExtractor(vector) {
// extract an array, recurse if nested.
return vector.numChildren
? arrayFromNested(vector)
: arrayFromVector(vector);
}

function structExtractor(vector) {
// extract struct field names
// extract struct field names and values
const names = vector.type.children.map(field => field.name);

// extract struct field values into parallel arrays
const data = names.map((_, i) => {
const v = vector.getChildAt(i);
return v.numChildren ? arrayFromNested(v) : arrayFromVector(v);
});
const values = names.map((_, i) => arrayExtractor(vector.getChildAt(i)));

// function to generate objects with field name properties
return unroll(
data, '_,i',
values, 'i',
'({' + names.map((_, d) => `${toString(_)}:_${d}[i]`) + '})'
);
}
Expand Down
26 changes: 14 additions & 12 deletions test/format/arrow-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ function arrowColumn(data, nullCount = 0) {
length: data.length,
get: row => data[row],
toArray: () => data,
isValid: row => data[row] != null,
[Symbol.iterator]: () => data[Symbol.iterator](),
nullCount,
_data: data
Expand Down Expand Up @@ -51,19 +52,20 @@ function arrowDictionary(data) {
}

function arrowListColumn(data) {
const c = arrowColumn(data.map(d => arrowColumn(d)));
const c = arrowColumn(data.map(d => d ? arrowColumn(d) : null));
c.typeId = LIST;
c.numChildren = 1;
return c;
}

function arrowStructColumn(names, children) {
function arrowStructColumn(valid, names, children) {
return {
type: { children: names.map(name => ({ name })) },
typeId: STRUCT,
length: children[0].length,
length: valid.length,
numChildren: names.length,
getChildAt: i => children[i]
getChildAt: i => children[i],
isValid: row => !!valid[row]
};
}

Expand Down Expand Up @@ -104,7 +106,7 @@ tape('fromArrow can unpack Apache Arrow tables', t => {
});

tape('fromArrow can read Apache Arrow lists', t => {
const d = [[1, 2, 3], [4, 5]];
const d = [[1, 2, 3], null, [4, 5]];
const l = arrowListColumn(d);
const at = arrowTable({ l });
const dt = fromArrow(at);
Expand All @@ -114,10 +116,10 @@ tape('fromArrow can read Apache Arrow lists', t => {
});

tape('fromArrow can read Apache Arrow structs', t => {
const d = [{ foo: 1, bar: [2, 3] }, { foo: 2, bar: [4] }];
const s = arrowStructColumn(Object.keys(d[0]), [
arrowColumn(d.map(v => v.foo)),
arrowListColumn(d.map(v => v.bar))
const d = [{ foo: 1, bar: [2, 3] }, null, { foo: 2, bar: [4] }];
const s = arrowStructColumn(d, Object.keys(d[0]), [
arrowColumn(d.map(v => v ? v.foo : null)),
arrowListColumn(d.map(v => v ? v.bar : null))
]);
const at = arrowTable({ s });
const dt = fromArrow(at);
Expand All @@ -128,9 +130,9 @@ tape('fromArrow can read Apache Arrow structs', t => {

tape('fromArrow can read nested Apache Arrow structs', t => {
const d = [{ foo: 1, bar: { bop: 2 } }, { foo: 2, bar: { bop: 3 } }];
const s = arrowStructColumn(Object.keys(d[0]), [
arrowColumn(d.map(v => v.foo)),
arrowStructColumn(['bop'], [ arrowColumn([2, 3]) ])
const s = arrowStructColumn(d, Object.keys(d[0]), [
arrowColumn(d.map(v => v ? v.foo : null)),
arrowStructColumn([1, 1], ['bop'], [ arrowColumn([2, 3]) ])
]);
const at = arrowTable({ s });
const dt = fromArrow(at);
Expand Down

0 comments on commit bdb22f3

Please sign in to comment.