Skip to content

Commit

Permalink
includeUnknown
Browse files Browse the repository at this point in the history
  • Loading branch information
klappy committed Apr 24, 2020
1 parent 8ee85fc commit e76615a
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 70 deletions.
1 change: 1 addition & 0 deletions src/docs/Tokenize.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ const options = {
includeNumbers: true,
includePunctuation: true,
includeWhitespace: true,
includeUnknown: true,
greedy: true,
verbose: true,
occurrences: true,
Expand Down
4 changes: 3 additions & 1 deletion src/tokenizers.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export const tokenize = ({
includeNumbers = true,
includePunctuation = false,
includeWhitespace = false,
includeUnknown = false,
greedy = false,
verbose = false,
occurrences = false,
Expand All @@ -42,12 +43,13 @@ export const tokenize = ({

const greedyParsers = {...parsers, word: greedyWord, number: greedyNumber};
const _parsers = greedy ? greedyParsers : parsers;
let tokens = classifyTokens(string, _parsers, null, normalize, normalizations);
let tokens = classifyTokens(string, _parsers, 'unknown');
const types = [];
if (includeWords) types.push('word');
if (includeNumbers) types.push('number');
if (includeWhitespace) types.push('whitespace');
if (includePunctuation) types.push('punctuation');
if (includeUnknown) types.push('unknown');
tokens = tokens.filter((token) => types.includes(token.type));
if (occurrences) {
tokens = tokens.map((token, index) => {
Expand Down
72 changes: 3 additions & 69 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1978,7 +1978,7 @@ async-limiter@~1.0.0:
resolved "https://registry.yarnpkg.com/async-limiter/-/async-limiter-1.0.1.tgz#dd379e94f0db8310b08291f9d64c3209766617fd"
integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ==

async@^2.1.4, async@^2.6.1, async@^2.6.2:
async@^2.1.4, async@^2.6.2:
version "2.6.3"
resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff"
integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==
Expand Down Expand Up @@ -2915,7 +2915,7 @@ [email protected]:
resolved "https://registry.yarnpkg.com/commander/-/commander-2.17.1.tgz#bd77ab7de6de94205ceacc72f1716d29f20a77bf"
integrity sha512-wPMUt6FnH2yzG95SA6mzjQOEKUU3aLaDEmzs1ti+1E9h+CsrZghRlqEM/EJ4KscsQVG8uNN4uVreUeT8+drlgg==

commander@^2.11.0, commander@^2.18.0, commander@^2.19.0, commander@^2.20.0, commander@~2.20.3:
commander@^2.11.0, commander@^2.19.0, commander@^2.20.0, commander@~2.20.3:
version "2.20.3"
resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33"
integrity sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==
Expand Down Expand Up @@ -3799,11 +3799,6 @@ elliptic@^6.0.0:
minimalistic-assert "^1.0.0"
minimalistic-crypto-utils "^1.0.0"

email-addresses@^3.0.1:
version "3.1.0"
resolved "https://registry.yarnpkg.com/email-addresses/-/email-addresses-3.1.0.tgz#cabf7e085cbdb63008a70319a74e6136188812fb"
integrity sha512-k0/r7GrWVL32kZlGwfPNgB2Y/mMXVTq/decgLczm/j34whdaspNrZO8CnXPf1laaHxI6ptUlsnAxN+UAPw+fzg==

"emoji-regex@>=6.0.0 <=6.1.1":
version "6.1.1"
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-6.1.1.tgz#c6cd0ec1b0642e2a3c67a1137efc5e796da4f88e"
Expand Down Expand Up @@ -4443,28 +4438,6 @@ [email protected]:
resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd"
integrity sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==

filename-reserved-regex@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/filename-reserved-regex/-/filename-reserved-regex-1.0.0.tgz#e61cf805f0de1c984567d0386dc5df50ee5af7e4"
integrity sha1-5hz4BfDeHJhFZ9A4bcXfUO5a9+Q=

filenamify-url@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/filenamify-url/-/filenamify-url-1.0.0.tgz#b32bd81319ef5863b73078bed50f46a4f7975f50"
integrity sha1-syvYExnvWGO3MHi+1Q9GpPeXX1A=
dependencies:
filenamify "^1.0.0"
humanize-url "^1.0.0"

filenamify@^1.0.0:
version "1.2.1"
resolved "https://registry.yarnpkg.com/filenamify/-/filenamify-1.2.1.tgz#a9f2ffd11c503bed300015029272378f1f1365a5"
integrity sha1-qfL/0RxQO+0wABUCknI3jx8TZaU=
dependencies:
filename-reserved-regex "^1.0.0"
strip-outer "^1.0.0"
trim-repeated "^1.0.0"

[email protected]:
version "3.6.1"
resolved "https://registry.yarnpkg.com/filesize/-/filesize-3.6.1.tgz#090bb3ee01b6f801a8a8be99d31710b3422bb317"
Expand Down Expand Up @@ -4801,18 +4774,6 @@ getpass@^0.1.1:
dependencies:
assert-plus "^1.0.0"

gh-pages@^2.0.1:
version "2.2.0"
resolved "https://registry.yarnpkg.com/gh-pages/-/gh-pages-2.2.0.tgz#74ebeaca8d2b9a11279dcbd4a39ddfff3e6caa24"
integrity sha512-c+yPkNOPMFGNisYg9r4qvsMIjVYikJv7ImFOhPIVPt0+AcRUamZ7zkGRLHz7FKB0xrlZ+ddSOJsZv9XAFVXLmA==
dependencies:
async "^2.6.1"
commander "^2.18.0"
email-addresses "^3.0.1"
filenamify-url "^1.0.0"
fs-extra "^8.1.0"
globby "^6.1.0"

github-slugger@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/github-slugger/-/github-slugger-1.2.1.tgz#47e904e70bf2dccd0014748142d31126cfd49508"
Expand Down Expand Up @@ -5226,14 +5187,6 @@ https-browserify@^1.0.0:
resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73"
integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=

humanize-url@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/humanize-url/-/humanize-url-1.0.1.tgz#f4ab99e0d288174ca4e1e50407c55fbae464efff"
integrity sha1-9KuZ4NKIF0yk4eUEB8VfuuRk7/8=
dependencies:
normalize-url "^1.0.0"
strip-url-auth "^1.0.0"

hyphenate-style-name@^1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/hyphenate-style-name/-/hyphenate-style-name-1.0.3.tgz#097bb7fa0b8f1a9cf0bd5c734cf95899981a9b48"
Expand Down Expand Up @@ -7289,7 +7242,7 @@ normalize-range@^0.1.2:
resolved "https://registry.yarnpkg.com/normalize-range/-/normalize-range-0.1.2.tgz#2d10c06bdfd312ea9777695a4d28439456b75942"
integrity sha1-LRDAa9/TEuqXd2laTShDlFa3WUI=

[email protected], normalize-url@^1.0.0:
[email protected]:
version "1.9.1"
resolved "https://registry.yarnpkg.com/normalize-url/-/normalize-url-1.9.1.tgz#2cc0d66b31ea23036458436e3620d85954c66c3c"
integrity sha1-LMDWazHqIwNkWENuNiDYWVTGbDw=
Expand Down Expand Up @@ -10350,18 +10303,6 @@ strip-json-comments@^3.0.1:
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.0.1.tgz#85713975a91fb87bf1b305cca77395e40d2a64a7"
integrity sha512-VTyMAUfdm047mwKl+u79WIdrZxtFtn+nBxHeb844XBQ9uMNTuTHdx2hc5RiAJYqwTj3wc/xe5HLSdJSkJ+WfZw==

strip-outer@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/strip-outer/-/strip-outer-1.0.1.tgz#b2fd2abf6604b9d1e6013057195df836b8a9d631"
integrity sha512-k55yxKHwaXnpYGsOzg4Vl8+tDrWylxDEpknGjhTiZB8dFRU5rTo9CAzeycivxV3s+zlTKwrs6WxMxR95n26kwg==
dependencies:
escape-string-regexp "^1.0.2"

strip-url-auth@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/strip-url-auth/-/strip-url-auth-1.0.1.tgz#22b0fa3a41385b33be3f331551bbb837fa0cd7ae"
integrity sha1-IrD6OkE4WzO+PzMVUbu4N/oM164=

[email protected]:
version "1.0.0"
resolved "https://registry.yarnpkg.com/style-loader/-/style-loader-1.0.0.tgz#1d5296f9165e8e2c85d24eee0b7caf9ec8ca1f82"
Expand Down Expand Up @@ -10628,13 +10569,6 @@ [email protected]:
resolved "https://registry.yarnpkg.com/transform-runtime/-/transform-runtime-0.0.0.tgz#e714d9b69211dd9537939d50e3aa5788c442b85c"
integrity sha1-5xTZtpIR3ZU3k51Q46pXiMRCuFw=

trim-repeated@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/trim-repeated/-/trim-repeated-1.0.0.tgz#e3646a2ea4e891312bf7eace6cfb05380bc01c21"
integrity sha1-42RqLqTokTEr9+rObPsFOAvAHCE=
dependencies:
escape-string-regexp "^1.0.2"

trim-trailing-lines@^1.0.0:
version "1.1.2"
resolved "https://registry.yarnpkg.com/trim-trailing-lines/-/trim-trailing-lines-1.1.2.tgz#d2f1e153161152e9f02fabc670fb40bec2ea2e3a"
Expand Down

0 comments on commit e76615a

Please sign in to comment.