diff --git a/package-lock.json b/package-lock.json index 6bdf726..978c3f8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "dumpster-dive", - "version": "4.0.1", + "version": "4.0.2", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -45,9 +45,9 @@ "dev": true }, "camelcase": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", - "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=" + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.0.0.tgz", + "integrity": "sha512-faqwZqnWxbxn+F1d399ygeamQNy3lPp/H9H6rNrqYh4FSVCtcY+3cub1MxA8o9mDd55mM8Aghuu/kuyYA6VTsA==" }, "chalk": { "version": "2.4.1", @@ -121,12 +121,9 @@ } }, "decamelize": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-2.0.0.tgz", - "integrity": "sha512-Ikpp5scV3MSYxY39ymh45ZLEecsTdv/Xj2CaQfI8RLMuwi7XvjX9H/fhraiSuU+C5w5NTDu4ZU72xNiZnurBPg==", - "requires": { - "xregexp": "4.0.0" - } + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha1-9lNNFRSCabIDUue+4m9QH5oZEpA=" }, "deep-equal": { "version": "1.0.1", @@ -426,9 +423,9 @@ "dev": true }, "map-age-cleaner": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.2.tgz", - "integrity": "sha512-UN1dNocxQq44IhJyMI4TU8phc2m9BddacHRPRjKGLYaF0jqd3xLz0jS0skpAU9WgYyoR4gHtUpzytNBS385FWQ==", + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/map-age-cleaner/-/map-age-cleaner-0.1.3.tgz", + "integrity": "sha512-bJzx6nMoP6PDLPBFmg7+xRKeFZvFboMrGlxmNj9ClvX53KrmvM5bXFXEWjbz4cz1AFn+jWJ9z/DJSz7hrs0w3w==", "requires": { "p-defer": "^1.0.0" } @@ -470,11 +467,11 @@ "dev": true }, "mongodb": { - "version": "3.1.8", - "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.1.8.tgz", - "integrity": "sha512-yNKwYxQ6m00NV6+pMoWoheFTHSQVv1KkSrfOhRDYMILGWDYtUtQRqHrFqU75rmPIY8hMozVft8zdC4KYMWaM3Q==", + "version": "3.1.10", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-3.1.10.tgz", + "integrity": "sha512-Uml42GeFxhTGQVml1XQ4cD0o/rp7J2ROy0fdYUcVitoE7vFqEhKH4TYVqRDpQr/bXtCJVxJdNQC1ntRxNREkPQ==", "requires": { - "mongodb-core": "3.1.7", + "mongodb-core": "3.1.9", "safe-buffer": "^5.1.2" }, "dependencies": { @@ -486,9 +483,9 @@ } }, "mongodb-core": { - "version": "3.1.7", - "resolved": "https://registry.npmjs.org/mongodb-core/-/mongodb-core-3.1.7.tgz", - "integrity": "sha512-YffpSrLmgFNmrvkGx+yX00KyBNk64C0BalfEn6vHHkXtcMUGXw8nxrMmhq5eXPLLlYeBpD/CsgNxE2Chf0o4zQ==", + "version": "3.1.9", + "resolved": "https://registry.npmjs.org/mongodb-core/-/mongodb-core-3.1.9.tgz", + "integrity": "sha512-MJpciDABXMchrZphh3vMcqu8hkNf/Mi+Gk6btOimVg1XMxLXh87j6FAvRm+KmwD1A9fpu3qRQYcbQe4egj23og==", "requires": { "bson": "^1.1.0", "require_optional": "^1.0.1", @@ -521,7 +518,7 @@ }, "node-fetch": { "version": "2.1.2", - "resolved": "http://registry.npmjs.org/node-fetch/-/node-fetch-2.1.2.tgz", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.1.2.tgz", "integrity": "sha1-q4hOjn5X44qUR1POxwb3iNF2i7U=" }, "npm-run-path": { @@ -783,9 +780,9 @@ "integrity": "sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM=" }, "shelljs": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.2.tgz", - "integrity": "sha512-pRXeNrCA2Wd9itwhvLp5LZQvPJ0wU6bcjaTMywHHGX5XWhVN2nzSu7WV0q+oUY7mGK3mgSkDDzP3MgjqdyIgbQ==", + "version": "0.8.3", + "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.3.tgz", + "integrity": "sha512-fc0BKlAWiLpwZljmOvAOTE/gXawtCoNrP5oaY7KIaQbbyHeQVg01pSEuEGvGh3HEdBU4baCD7wQBwADmM/7f7A==", "dev": true, "requires": { "glob": "^7.0.0", @@ -850,9 +847,9 @@ "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=" }, "sunday-driver": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/sunday-driver/-/sunday-driver-1.0.1.tgz", - "integrity": "sha512-9nDPMZ0IrwTEV1Njm3MT7SCDYW1GzkN1ce7jPER29A8nCLO9ZmN3SYqYj5a7ixidmhKMBN0/yU6W49VfqBrFJA==" + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/sunday-driver/-/sunday-driver-1.0.2.tgz", + "integrity": "sha512-18e+ylymbc7RGr6uEyDSPwSUHKwT0OaZrm7O9zl74dUfOvQdNdYvMHitw4MCa1Re/3iwlVEzvU39SRy9fkvjZg==" }, "supports-color": { "version": "5.4.0", @@ -992,7 +989,7 @@ }, "whatwg-fetch": { "version": "2.0.4", - "resolved": "http://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-2.0.4.tgz", + "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-2.0.4.tgz", "integrity": "sha512-dcQ1GWpOD/eEQ97k66aiEVpNnapVj90/+R+SXTPYGHpYBBypfKJEQjLrvMZ7YXbKm21gXd4NcuxUTjiv1YtLng==" }, "which": { @@ -1072,11 +1069,6 @@ "cross-fetch": "2.2.3" } }, - "xregexp": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz", - "integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg==" - }, "xtend": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz", @@ -1089,12 +1081,12 @@ "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==" }, "yargs": { - "version": "12.0.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-12.0.2.tgz", - "integrity": "sha512-e7SkEx6N6SIZ5c5H22RTZae61qtn3PYUE8JYbBFlK9sYmh3DMQ6E5ygtaG/2BW0JZi4WGgTR2IV5ChqlqrDGVQ==", + "version": "12.0.5", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-12.0.5.tgz", + "integrity": "sha512-Lhz8TLaYnxq/2ObqHDql8dX8CJi97oHxrjUcYtzKbbykPtVW9WB+poxI+NM2UIzsMgNCZTIf0AQwsjK5yMAqZw==", "requires": { "cliui": "^4.0.0", - "decamelize": "^2.0.0", + "decamelize": "^1.2.0", "find-up": "^3.0.0", "get-caller-file": "^1.0.1", "os-locale": "^3.0.0", @@ -1104,15 +1096,16 @@ "string-width": "^2.0.0", "which-module": "^2.0.0", "y18n": "^3.2.1 || ^4.0.0", - "yargs-parser": "^10.1.0" + "yargs-parser": "^11.1.1" } }, "yargs-parser": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-10.1.0.tgz", - "integrity": "sha512-VCIyR1wJoEBZUqk5PA+oOBF6ypbwh5aNB3I50guxAL/quggdfs4TtNHQrSazFA3fYZ+tEqfs0zIGlv0c/rgjbQ==", + "version": "11.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-11.1.1.tgz", + "integrity": "sha512-C6kB/WJDiaxONLJQnF8ccx9SEeoTTLek8RVbaOIsrAUS8VrBEXfmeSnCZxygc+XC2sNMBIwOOnfcxiynjHsVSQ==", "requires": { - "camelcase": "^4.1.0" + "camelcase": "^5.0.0", + "decamelize": "^1.2.0" } } } diff --git a/package.json b/package.json index 82b896a..51031e1 100644 --- a/package.json +++ b/package.json @@ -21,17 +21,17 @@ }, "dependencies": { "chalk": "2.4.1", - "jsonfn": "^0.31.0", - "mongodb": "3.1.8", + "jsonfn": "0.31.0", + "mongodb": "3.1.10", "prettysize": "1.1.0", - "sunday-driver": "1.0.1", + "sunday-driver": "1.0.2", "worker-nodes": "1.6.1", "wtf_wikipedia": "6.2.1", - "yargs": "12.0.2" + "yargs": "12.0.5" }, "devDependencies": { - "shelljs": "0.8.2", - "tap-spec": "^5.0.0", + "shelljs": "0.8.3", + "tap-spec": "5.0.0", "tape": "4.9.1" }, "license": "MIT" diff --git a/scratch.js b/scratch.js index c3527d5..921e4fb 100644 --- a/scratch.js +++ b/scratch.js @@ -2,13 +2,10 @@ const dumpster = require('./src'); const drop = require('./src/lib/drop-db'); //144mb → 2.5 minutes = 57mb per worker per minute -// const path = '/home/spencer/mountain/dumpster-dive/tests/tinywiki-latest-pages-articles.xml'; -// const path = '/media/spencer/07d11766-2ce6-4f8a-8ec0-a3d144a3d4cd/big_data/wikipedia/enwiki-latest-pages-articles.xml'; -// const path = '/Users/spencer/data/wikipedia/enwiki-latest-pages-articles.xml' +const path = '/Users/spencer/data/wikipedia/enwiki-latest-pages-articles.xml' // const path = './tests/smallwiki-latest-pages-articles.xml'; //3s -const path = '/Users/spencer/data/wikipedia/twinpeaks_pages_current.xml'; //3s // const path = './tests/tinywiki-latest-pages-articles.xml'; //2s -const dbName = 'twinpeaks' //path.match(/\/([a-z-]+)-latest-pages/)[1]; +const dbName = path.match(/\/([a-z-]+)-latest-pages/)[1]; //db.pages.find({title:'Doppelgängers'}) diff --git a/src/worker/index.js b/src/worker/index.js index c0e8d8f..c7e280a 100644 --- a/src/worker/index.js +++ b/src/worker/index.js @@ -33,6 +33,7 @@ const doSection = async (optionStr, workerCount, workerNum) => { end: `${end}%`, splitter: '', each: (xml, resume) => { + // console.log(workerNum, xml.substr(0, 200)) //pull-out sections from this xml let page = parsePage(xml, this); if (page !== null) {