From e99757b9fd688749bf566bd90c99aeae74ece0ef Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Mon, 3 Dec 2018 20:23:27 -0500 Subject: [PATCH] 5.0.0 --- changelog.md | 5 +++++ package-lock.json | 10 +++++----- package.json | 4 ++-- scratch.js | 3 ++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/changelog.md b/changelog.md index 4a015b3..ec567ea 100644 --- a/changelog.md +++ b/changelog.md @@ -39,3 +39,8 @@ * get skip_redirects actually working * reduce default batch_size even lower * add `verbose_skip` option, to log disambig/redirect skipping + +## v5 +* more consistent template json, via [wtf_wikipedia@7](https://github.com/spencermountain/wtf_wikipedia/blob/master/changelog.md#700) +* removal of empty `[]` results in `Section`. +* fs fixes for node > 9 diff --git a/package-lock.json b/package-lock.json index 978c3f8..48a5cce 100644 --- a/package-lock.json +++ b/package-lock.json @@ -518,7 +518,7 @@ }, "node-fetch": { "version": "2.1.2", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.1.2.tgz", + "resolved": "http://registry.npmjs.org/node-fetch/-/node-fetch-2.1.2.tgz", "integrity": "sha1-q4hOjn5X44qUR1POxwb3iNF2i7U=" }, "npm-run-path": { @@ -989,7 +989,7 @@ }, "whatwg-fetch": { "version": "2.0.4", - "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-2.0.4.tgz", + "resolved": "http://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-2.0.4.tgz", "integrity": "sha512-dcQ1GWpOD/eEQ97k66aiEVpNnapVj90/+R+SXTPYGHpYBBypfKJEQjLrvMZ7YXbKm21gXd4NcuxUTjiv1YtLng==" }, "which": { @@ -1062,9 +1062,9 @@ "dev": true }, "wtf_wikipedia": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/wtf_wikipedia/-/wtf_wikipedia-6.2.1.tgz", - "integrity": "sha512-ABlngbgO/SAKaIcd5CvSbiUY+ICW5XqFxgNQpfDHChbtmpCOgIpOb9K6q3jT1gXU1BoXxNWFMB+8vN2p+uu12g==", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wtf_wikipedia/-/wtf_wikipedia-7.0.0.tgz", + "integrity": "sha512-F9T0zG5vtf+imdu4jLAo4O3MsR/Lpx9w77LhHTicD2oJSmF+4bDGGdiA7rEPzKQDhJyRZ3Qoc09/+RCLi8w3tg==", "requires": { "cross-fetch": "2.2.3" } diff --git a/package.json b/package.json index 51031e1..b336b88 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "author": "Spencer Kelly (http://spencermounta.in)", "name": "dumpster-dive", "description": "get a wikipedia dump parsed into mongodb", - "version": "4.0.2", + "version": "5.0.0", "repository": { "type": "git", "url": "git://github.com/spencermountain/wikipedia-to-mongodb.git" @@ -26,7 +26,7 @@ "prettysize": "1.1.0", "sunday-driver": "1.0.2", "worker-nodes": "1.6.1", - "wtf_wikipedia": "6.2.1", + "wtf_wikipedia": "7.0.0", "yargs": "12.0.5" }, "devDependencies": { diff --git a/scratch.js b/scratch.js index 921e4fb..81657bc 100644 --- a/scratch.js +++ b/scratch.js @@ -3,6 +3,7 @@ const drop = require('./src/lib/drop-db'); //144mb → 2.5 minutes = 57mb per worker per minute const path = '/Users/spencer/data/wikipedia/enwiki-latest-pages-articles.xml' +// const path = '/Users/spencer/data/wikipedia/simplewiki-latest-pages-articles.xml' // const path = './tests/smallwiki-latest-pages-articles.xml'; //3s // const path = './tests/tinywiki-latest-pages-articles.xml'; //2s const dbName = path.match(/\/([a-z-]+)-latest-pages/)[1]; @@ -22,7 +23,7 @@ let options = { // skip_redirects: true, // skip_disambig: true, // missing_templates: true -// workers: 1 +// workers: 2 // custom: function(doc) { // console.log(doc.title()) // return {