Skip to content

Commit

Permalink
solver: add IntersectionSolver
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Apr 1, 2019
1 parent 68cd415 commit 8043856
Show file tree
Hide file tree
Showing 13 changed files with 230 additions and 19 deletions.
3 changes: 2 additions & 1 deletion .jshintrc
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
"undef": true,
"unused": true,
"trailing": true,
"asi": true
"asi": true,
"loopfunc": true
}
10 changes: 8 additions & 2 deletions bin/cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetClassifier = require('../classifier/StreetClassifier')
const DirectionalClassifier = require('../classifier/DirectionalClassifier')
const OrdinalClassifier = require('../classifier/OrdinalClassifier')
const IntersectionClassifier = require('../classifier/IntersectionClassifier')
const MultiStreetClassifier = require('../classifier/MultiStreetClassifier')
const MultiWordStreetClassifier = require('../classifier/MultiWordStreetClassifier')
const ExclusiveCarseianSolver = require('../solver/ExclusiveCarseianSolver')
const MultiStreetSolver = require('../solver/MultiStreetSolver')
const input = process.argv.slice(2).join(' ')

// tokenizer
Expand All @@ -28,9 +31,11 @@ const classifiers = [
new StreetClassifier(),
new DirectionalClassifier(),
new OrdinalClassifier(),
new IntersectionClassifier(),

// multi-word classifiers
new MultiWordStreetClassifier()
new MultiWordStreetClassifier(),
new MultiStreetClassifier()
]

// run all classifiers
Expand All @@ -39,7 +44,8 @@ classifiers.forEach(c => c.classify(t))
pretty.classifications(t, util.format('(%sms)', new Date() - start))

const solvers = [
new ExclusiveCarseianSolver()
new ExclusiveCarseianSolver(),
new MultiStreetSolver()
]

// run all solvers
Expand Down
2 changes: 1 addition & 1 deletion bin/pretty.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ function permutationClassifications (tokenizer) {
process.stdout.write(perm.body.padEnd(32) + '➜ ')
for (let k in perm.classifications) {
let classification = perm.classifications[k]
let block = chalk.bgGreen.bold(classification.label + `=${classification.confidence.toFixed(1)}`)
let block = chalk.bgRed.bold(classification.label + `=${classification.confidence.toFixed(1)}`)
process.stdout.write(block)
if (k !== keys.slice(-1)) {
process.stdout.write(' ')
Expand Down
8 changes: 8 additions & 0 deletions classification/Classification.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ class Classification {
this.confidence = confidence || 1.0
this.meta = meta || {}
}

equals (classification) {
// @todo: compare meta?
return (
this.constructor.name === classification.constructor.name &&
this.confidence === classification.confidence
)
}
}

module.exports = Classification
10 changes: 10 additions & 0 deletions classification/IntersectionClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('../classification/Classification')

class IntersectionClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'intersection'
}
}

module.exports = IntersectionClassification
11 changes: 11 additions & 0 deletions classification/MultiStreetClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
const Classification = require('./Classification')

class MultiStreetClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.public = false
this.label = 'multistreet'
}
}

module.exports = MultiStreetClassification
27 changes: 27 additions & 0 deletions classifier/IntersectionClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const PermutationClassifier = require('./super/PermutationClassifier')
const IntersectionClassification = require('../classification/IntersectionClassification')
const libpostal = require('../resources/libpostal/libpostal')

// dictionaries sourced from the libpostal project
// see: https://github.com/openvenues/libpostal

const languages = libpostal.languages

class IntersectionClassifier extends PermutationClassifier {
setup () {
this.index = {}
libpostal.load(this.index, languages, 'cross_streets.txt')
}

each (span) {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm)) {
span.classify(new IntersectionClassification(1))
}
}
}

module.exports = IntersectionClassifier
79 changes: 79 additions & 0 deletions classifier/MultiStreetClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
const SectionClassifier = require('./super/SectionClassifier')
const MultiStreetClassification = require('../classification/MultiStreetClassification')

class MultiStreetClassifier extends SectionClassifier {
each (section) {
let children = {
all: [],
street: [],
intersection: []
}
let lastOffset = 0

// find a span of multiple children in this section
// who are either classified as street or intersection /or
// are part of a permutation classes as such
section.child.forEach((c, o) => {
if (
c.classifications.hasOwnProperty('StreetClassification') ||
section.permutation.some(p => {
return (
p.classifications.hasOwnProperty('StreetClassification') &&
p.child.some(pc => pc === c)
)
})
) {
if (children.street.length === 0 || o === lastOffset + 1) {
children.all.push(c)
children.street.push(c)
lastOffset = o
}
} else if (
c.classifications.hasOwnProperty('IntersectionClassification') ||
section.permutation.some(p => {
return (
p.classifications.hasOwnProperty('IntersectionClassification') &&
p.child.some(pc => pc === c)
)
})
) {
if (children.intersection.length === 0 || o === lastOffset + 1) {
children.all.push(c)
children.intersection.push(c)
lastOffset = o
}
}
})

// validate the child arrays
if ((
children.all.length < 3 ||
children.intersection.length < 1 ||
children.street.length < 2
)) {
return
}

// @todo: ensure that at least one IntersectionClassification exists

let matches = section.permutation.map(p => {
if (
// every child must be part of the set above
p.child.every(pc => children.all.includes(pc))
) {
return p
}
})

if (matches.length) {
matches.sort((a, b) => {
return (a.end - a.start) > (b.end - b.start)
})

// only classify the longest match
matches[0].classify(new MultiStreetClassification(1.0))
}
}
}

module.exports = MultiStreetClassifier
46 changes: 35 additions & 11 deletions solver/ExclusiveCarseianSolver.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,58 @@ const HashMapSolver = require('./super/HashMapSolver')
class ExclusiveCarseianSolver extends HashMapSolver {
solve (tokenizer) {
let map = this.generateHashMap(tokenizer)
tokenizer.solution = tokenizer.solution.concat(
this.exclusiveCartesian.apply(null, Object.keys(map).map(k => map[k]))
)
let solutions = this.exclusiveCartesian.apply(null, Object.keys(map).map(k => map[k]))
tokenizer.solution = tokenizer.solution.concat(solutions)
}

// compute the unique cartesian product
// (all permutations of non-overlapping tokens from different classifications)
exclusiveCartesian () {
let r = []; let arg = arguments; let max = arg.length - 1
if (!arg.length) { return r }
const helper = (arr, i) => {
for (let j = 0, l = arg[i].length; j < l; j++) {
let a = arr.slice(0) // clone arr

// exclusive - same span cannot appear twice
let exists = false
for (let k = 0; k < a.length; k++) {
if (a[k].span.intersects(arg[i][j].span)) {
exists = true
break
// exclusive - same span range cannot appear twice
if (!isRangeConflict(a, arg[i][j].span)) {
a.push(arg[i][j])
}

if (i === max) {
// duplicates - prevent duplicate solutions
if (!isDuplicateSolutionArray(r, a)) {
r.push(a)
}
} else {
helper(a, i + 1)
}
if (!exists) { a.push(arg[i][j]) }
if (i === max) { r.push(a) } else { helper(a, i + 1) }
}
}
helper([], 0)
return r
}
}

// check that the span does not intersect with existing ranges in arr
function isRangeConflict (arr, span) {
let isUsed = false
for (let i = 0; i < arr.length; i++) {
if (span.intersects(arr[i].span)) {
isUsed = true
break
}
}
return isUsed
}

// check that this is not a duplicate of an existing array of solution
// @todo: deduplicate out-of-order yet the same arrays
function isDuplicateSolutionArray (rows, arr) {
return rows.some(rrow => {
if (arr.length !== rrow.length) { return false }
return rrow.every((v, i) => v.equals(arr[i]))
})
}

module.exports = ExclusiveCarseianSolver
38 changes: 38 additions & 0 deletions solver/MultiStreetSolver.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
const HashMapSolver = require('./super/HashMapSolver')

class MultiStreetSolver extends HashMapSolver {
solve (tokenizer) {
let map = this.generateHashMap(tokenizer, true)

// sanity checking
if (!map.hasOwnProperty('multistreet')) { return }
if (!map.hasOwnProperty('street') || map.street.length < 2) { return }

let multi = map.multistreet[0]
let candidates = map.street.slice(0)

// add the second street to existing solutions
for (let s = 0; s < tokenizer.solution.length; s++) {
let sol = tokenizer.solution[s].slice(0) // make a copy
let success = false

for (let i = 0; i < candidates.length; i++) {
let s = candidates[i]
if ((
s.span.intersects(multi.span) &&
!sol.some(ss => ss.span.intersects(s.span))
)) {
sol.push(s)
success = true
break
}
}
if (success) {
tokenizer.solution.push(sol)
candidates = candidates.filter(c => c === sol[sol.length - 1])
}
}
}
}

module.exports = MultiStreetSolver
7 changes: 7 additions & 0 deletions solver/Solution.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ class Solution {
this.span = span
this.classification = classification
}

equals (solution) {
return (
this.span === solution.span &&
this.classification.equals(solution.classification)
)
}
}

module.exports = Solution
6 changes: 3 additions & 3 deletions solver/super/HashMapSolver.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class HashMapSolver extends BaseSolver {
// you should provide this function in your subclass
// solve() {}

generateHashMap (tokenizer) {
generateHashMap (tokenizer, includePrivate) {
let map = {}
for (let i = 0; i < tokenizer.section.length; i++) {
let section = tokenizer.section[i]
Expand All @@ -17,7 +17,7 @@ class HashMapSolver extends BaseSolver {
if (!keys.length) { continue }
for (let k in perm.classifications) {
let classification = perm.classifications[k]
if (!classification.public) { continue }
if (!includePrivate && !classification.public) { continue }
if (!map.hasOwnProperty(classification.label)) {
map[classification.label] = []
}
Expand All @@ -32,7 +32,7 @@ class HashMapSolver extends BaseSolver {
if (!keys.length) { continue }
for (let k in word.classifications) {
let classification = word.classifications[k]
if (!classification.public) { continue }
if (!includePrivate && !classification.public) { continue }
if (!map.hasOwnProperty(classification.label)) {
map[classification.label] = []
}
Expand Down
2 changes: 1 addition & 1 deletion tokenization/tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Tokenizer {
this.span = new Span(s)
this.segment()
this.split()
this.permute(0, 6)
this.permute(0, 10)
this.solution = []
}

Expand Down

0 comments on commit 8043856

Please sign in to comment.