Skip to content

Commit

Permalink
Allow completely unknown binary sequences
Browse files Browse the repository at this point in the history
Fixes #179
  • Loading branch information
danieledler committed Mar 29, 2021
1 parent 44f228c commit 0b92a81
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 76 deletions.
63 changes: 44 additions & 19 deletions src/common/typecheckAlignment.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
import UserFixError from "./errors";
import UserFixError from './errors';

export const getFinalDataType = (dataTypes) => {
const notUndefinedTypes = dataTypes.filter(d => d !== undefined);
const notUndefinedTypes = dataTypes.filter((d) => d !== undefined);
if (notUndefinedTypes.length === 0) {
return undefined;
}
let firstType = notUndefinedTypes[0];
for (let i = 1; i < notUndefinedTypes.length; ++i) {
const type = notUndefinedTypes[i];
if (type !== firstType) {
if ((type === 'binary' && firstType === 'multistate') || (type === 'multistate' && firstType === 'binary')) {
if (
(type === 'binary' && firstType === 'multistate') ||
(type === 'multistate' && firstType === 'binary')
) {
firstType = 'multistate';
} else {
return 'mixed';
}
}
}
return firstType;
}

};

// Valid characters taken from Standard-RAxML (axml.c)
const reInvalidBinary = /[^01-?]/g;
Expand Down Expand Up @@ -87,14 +89,18 @@ export default function typecheckAlignment(alignment) {
// } else {
// dataType = 'rna';
// }
dataType = 'nucleotide';
if (/[ACG]/i.test(code)) {
// Check that it don't have only N or ?
dataType = 'nucleotide';
} else {
dataType = 'unknown';
}
}

if (!dataType) {
if (!dataType || dataType === 'unknown') {
if (multistateMatch.test(code)) {
dataType = 'multistate';
}
else if (binaryMatch.test(code)) {
} else if (binaryMatch.test(code)) {
dataType = 'binary';
} else if (unknownMatch.test(code)) {
dataType = 'unknown';
Expand All @@ -106,34 +112,53 @@ export default function typecheckAlignment(alignment) {
sequence.dataType = dataType;
++numSequencesTypechecked;
sequenceDataTypes.push(sequence.dataType);
})
});

if (dataTypes.delete('unknown')) {
console.log('At least one sequence have only unknown characters');
if (dataTypes.size === 0) {
throw new Error(`Invalid alignment: cannot determine data type because all ${numSequencesTypechecked} sequences are of type unknown`);
throw new Error(
`Invalid alignment: cannot determine data type because all ${numSequencesTypechecked} sequences are of type unknown`
);
}
}
let dataType = dataTypes.values().next().value;
if (dataTypes.size > 1) {
// Only valid case with different types is binary and multistate as [01] is a subset of [012].
const isMultistate = !sequenceDataTypes.find(type => type !== 'binary' && type !== 'multistate');
const isMultistate = !sequenceDataTypes.find(
(type) => type !== 'binary' && type !== 'multistate'
);
if (isMultistate) {
dataType = 'multistate';
}
else {
} else {
dataType = 'invalid';
console.log('Illegal mix of data types among sequences:', sequenceDataTypes);
throw new Error(`Invalid alignment: sequences must be of same data type, but found [${Array.from(dataTypes.keys())}].`);
console.log(
'Illegal mix of data types among sequences:',
sequenceDataTypes
);
throw new Error(
`Invalid alignment: sequences must be of same data type, but found [${Array.from(
dataTypes.keys()
)}].`
);
}
}
alignment.sequences.forEach((seq, index) => {
const invalidSiteIndex = findInvalidCharacter(seq.code, dataType);
if (invalidSiteIndex !== -1) {
const sample = seq.code.length <= 8 ? `'${seq.code}'` : `'${seq.code.substring(0, 8)}'...`;
throw new UserFixError(`Invalid character '${seq.code[invalidSiteIndex]}' at site ${invalidSiteIndex + 1} in sequence ${index + 1} (${sample}) for inferred data type '${dataType}'`);
const sample =
seq.code.length <= 8
? `'${seq.code}'`
: `'${seq.code.substring(0, 8)}'...`;
throw new UserFixError(
`Invalid character '${seq.code[invalidSiteIndex]}' at site ${
invalidSiteIndex + 1
} in sequence ${
index + 1
} (${sample}) for inferred data type '${dataType}'`
);
}
})
});
alignment.hasInvariantSites = hasInvariantSites(alignment.sequences);
alignment.dataType = dataType;
alignment.typecheckingComplete = true;
Expand Down
Loading

0 comments on commit 0b92a81

Please sign in to comment.