From 47ffba0601b5f4c6948a47867e679fda53756b97 Mon Sep 17 00:00:00 2001 From: Diego Leite Date: Mon, 4 Jun 2018 16:13:38 -0300 Subject: [PATCH] This closes #30 --- .env.example | 2 +- package-lock.json | 156 ++++++++++++++++++++++++++++++++++++++++++++++ package.json | 18 ++---- src/lib/job.js | 42 ++++++------- src/lib/rss.js | 52 ++++++++-------- src/server.js | 2 +- src/worker.js | 74 ++++++++-------------- 7 files changed, 232 insertions(+), 114 deletions(-) diff --git a/.env.example b/.env.example index b4ffef8..557c356 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,5 @@ MONGO_URI=mongodb://mongo:27017/overflow -REDIS_URI=redis +REDIS_URI=redis://redis:6379 CRAWL_QUEUE=feeds-to-crawl FEEDS_FILE=./assets/engineering_blogs.opml SENTRY_DSN=https://HASH@sentry.io/ID \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index fc40897..fee56fe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1447,6 +1447,20 @@ "integrity": "sha1-Jw8HbFpywC9bZaR9+Uxf46J4iS8=", "dev": true }, + "bull": { + "version": "3.4.2", + "resolved": "https://registry.npmjs.org/bull/-/bull-3.4.2.tgz", + "integrity": "sha512-TKQDgwO0xCH/uKPVa9j4CMPBtX7M2bLcfvlGuuKmDeLfykx2uGgKEJi58+Ak7kleTcjSbj2OaRNdrRQBCbYaRQ==", + "requires": { + "bluebird": "3.5.0", + "cron-parser": "2.5.0", + "debuglog": "1.0.1", + "ioredis": "3.2.2", + "lodash": "4.17.5", + "semver": "5.5.0", + "uuid": "3.2.1" + } + }, "bytes": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz", @@ -1708,6 +1722,11 @@ } } }, + "cluster-key-slot": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.0.12.tgz", + "integrity": "sha512-21O0kGmvED5OJ7ZTdqQ5lQQ+sjuez33R+d35jZKLwqUb5mqcPHUsxOSzj61+LHVtxGZd1kShbQM3MjB/gBJkVg==" + }, "co": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", @@ -1892,6 +1911,15 @@ "moment-timezone": "0.5.14" } }, + "cron-parser": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/cron-parser/-/cron-parser-2.5.0.tgz", + "integrity": "sha512-gzmXu16/prizIbKPPKJo+WgBpV7k8Rxxu9FgaANW+vx5DebCXavfRqbROjKkr9ETvVPqs+IO+NXj4GG/eLf8zQ==", + "requires": { + "is-nan": "1.2.1", + "moment-timezone": "0.5.14" + } + }, "cross-spawn": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-5.1.0.tgz", @@ -1974,6 +2002,11 @@ "ms": "2.0.0" } }, + "debuglog": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/debuglog/-/debuglog-1.0.1.tgz", + "integrity": "sha1-qiT/uaw9+aI1GDfPstJ5NgzXhJI=" + }, "decamelize": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz", @@ -2073,6 +2106,11 @@ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" }, + "denque": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/denque/-/denque-1.3.0.tgz", + "integrity": "sha512-4SRaSj+PqmrS1soW5/Avd7eJIM2JJIqLLmwhRqIGleZM/8KwZq80njbSS2Iqas+6oARkSkLDHEk4mm78q3JlIg==" + }, "depd": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", @@ -2743,6 +2781,11 @@ "write": "0.2.1" } }, + "flexbuffer": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/flexbuffer/-/flexbuffer-0.0.6.tgz", + "integrity": "sha1-A5/fI/iCPkQMOPMnfm/vEXQhWzA=" + }, "for-in": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/for-in/-/for-in-1.0.2.tgz", @@ -4220,6 +4263,36 @@ "integrity": "sha1-EEqOSqym09jNFXqO+L+rLXo//bY=", "dev": true }, + "ioredis": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/ioredis/-/ioredis-3.2.2.tgz", + "integrity": "sha512-g+ShTQYLsCcOUkNOK6CCEZbj3aRDVPw3WOwXk+LxlUKvuS9ujEqP2MppBHyRVYrNNFW/vcPaTBUZ2ctGNSiOCA==", + "requires": { + "bluebird": "3.5.0", + "cluster-key-slot": "1.0.12", + "debug": "2.6.9", + "denque": "1.3.0", + "flexbuffer": "0.0.6", + "lodash.assign": "4.2.0", + "lodash.bind": "4.2.1", + "lodash.clone": "4.5.0", + "lodash.clonedeep": "4.5.0", + "lodash.defaults": "4.2.0", + "lodash.difference": "4.5.0", + "lodash.flatten": "4.4.0", + "lodash.foreach": "4.5.0", + "lodash.isempty": "4.4.0", + "lodash.keys": "4.2.0", + "lodash.noop": "3.0.1", + "lodash.partial": "4.2.1", + "lodash.pick": "4.4.0", + "lodash.sample": "4.2.1", + "lodash.shuffle": "4.2.0", + "lodash.values": "4.3.0", + "redis-commands": "1.3.5", + "redis-parser": "2.6.0" + } + }, "ipaddr.js": { "version": "1.6.0", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.6.0.tgz", @@ -4393,6 +4466,14 @@ "is-path-inside": "1.0.1" } }, + "is-nan": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/is-nan/-/is-nan-1.2.1.tgz", + "integrity": "sha1-n69ltvttskt/XAYoR16nH5iEAeI=", + "requires": { + "define-properties": "1.1.2" + } + }, "is-npm": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", @@ -5643,6 +5724,41 @@ "resolved": "https://registry.npmjs.org/lodash.assign/-/lodash.assign-4.2.0.tgz", "integrity": "sha1-DZnzzNem0mHRm9rrkkUAXShYCOc=" }, + "lodash.bind": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/lodash.bind/-/lodash.bind-4.2.1.tgz", + "integrity": "sha1-euMBfpOWIqwxt9fX3LGzTbFpDTU=" + }, + "lodash.clone": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clone/-/lodash.clone-4.5.0.tgz", + "integrity": "sha1-GVhwRQ9aExkkeN9Lw9I9LeoZB7Y=" + }, + "lodash.clonedeep": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", + "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=" + }, + "lodash.defaults": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", + "integrity": "sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw=" + }, + "lodash.difference": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.difference/-/lodash.difference-4.5.0.tgz", + "integrity": "sha1-nMtOUF1Ia5FlE0V3KIWi3yf9AXw=" + }, + "lodash.flatten": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", + "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=" + }, + "lodash.foreach": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.foreach/-/lodash.foreach-4.5.0.tgz", + "integrity": "sha1-Gmo16s5AEoDH8G3d7DUWWrJ+PlM=" + }, "lodash.get": { "version": "4.4.2", "resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz", @@ -5653,6 +5769,41 @@ "resolved": "https://registry.npmjs.org/lodash.has/-/lodash.has-4.5.2.tgz", "integrity": "sha1-0Z9NwQlQWMzL4rDN9O4P5Ko3yGI=" }, + "lodash.isempty": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.isempty/-/lodash.isempty-4.4.0.tgz", + "integrity": "sha1-b4bL7di+TsmHvpqvM8loTbGzHn4=" + }, + "lodash.keys": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.keys/-/lodash.keys-4.2.0.tgz", + "integrity": "sha1-oIYCrBLk+4P5H8H7ejYKTZujUgU=" + }, + "lodash.noop": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/lodash.noop/-/lodash.noop-3.0.1.tgz", + "integrity": "sha1-OBiPTWUKOkdCWEObluxFsyYXEzw=" + }, + "lodash.partial": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/lodash.partial/-/lodash.partial-4.2.1.tgz", + "integrity": "sha1-SfPYz9qjv/izqR0SfpIyRUGJYdQ=" + }, + "lodash.pick": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.pick/-/lodash.pick-4.4.0.tgz", + "integrity": "sha1-UvBWEP/53tQiYRRB7R/BI6AwAbM=" + }, + "lodash.sample": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/lodash.sample/-/lodash.sample-4.2.1.tgz", + "integrity": "sha1-XkKRsMdT+hq+sKq4+ynfG2bwf20=" + }, + "lodash.shuffle": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.shuffle/-/lodash.shuffle-4.2.0.tgz", + "integrity": "sha1-FFtQU8+HX29cKjP0i26ZSMbse0s=" + }, "lodash.sortby": { "version": "4.7.0", "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", @@ -5664,6 +5815,11 @@ "resolved": "https://registry.npmjs.org/lodash.uniq/-/lodash.uniq-4.5.0.tgz", "integrity": "sha1-0CJTc662Uq3BvILklFM5qEJ1R3M=" }, + "lodash.values": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.values/-/lodash.values-4.3.0.tgz", + "integrity": "sha1-o6bCsOvsxcLLocF+bmIP6BtT00c=" + }, "log-driver": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/log-driver/-/log-driver-1.2.7.tgz", diff --git a/package.json b/package.json index 324e4e3..5980357 100644 --- a/package.json +++ b/package.json @@ -9,8 +9,8 @@ "lint": "eslint src/", "start:worker": "node ./src/worker.js", "start:server": "node ./src/server.js", - "start:dev_worker": "nodemon ./src/worker.js --exec \"node -r dotenv/config -r babel-register\"", - "start:dev_server": "nodemon ./src/server.js --exec \"node -r dotenv/config -r babel-register\"", + "start:dev_worker": "nodemon ./src/worker.js", + "start:dev_server": "nodemon ./src/server.js", "test:strict": "jest", "test": "npm run lint && npm run test:strict" }, @@ -44,10 +44,9 @@ } }, "dependencies": { - "bee-queue": "^1.2.2", + "bull": "^3.4.2", "cors": "^2.8.4", "cron": "^1.3.0", - "dotenv": "^5.0.1", "ejs": "^2.5.7", "express": "^4.16.2", "express-graphql": "^0.6.12", @@ -61,18 +60,9 @@ "xml2js": "^0.4.19" }, "devDependencies": { - "babel-cli": "^6.26.0", - "babel-eslint": "^8.2.2", - "babel-jest": "^22.4.3", - "babel-plugin-transform-async-to-generator": "^6.24.1", - "babel-plugin-transform-object-rest-spread": "^6.26.0", - "babel-plugin-transform-runtime": "^6.23.0", - "babel-preset-env": "^1.6.1", - "babel-register": "^6.26.0", "coveralls": "^3.0.0", "eslint": "^4.19.1", "jest": "^22.4.3", - "nodemon": "^1.17.2", - "npm-run-all": "^4.1.2" + "nodemon": "^1.17.2" } } diff --git a/src/lib/job.js b/src/lib/job.js index 9585095..9d14268 100644 --- a/src/lib/job.js +++ b/src/lib/job.js @@ -1,20 +1,21 @@ -function crawlFeed(URL, callback) { - const FeedParser = require('feedparser'); - const request = require('request'); +const FeedParser = require('feedparser'); +const request = require('request'); +const Queue = require('bull'); +const Feed = require('../models/feed'); +function crawlFeed(URL, callback) { const feedparser = new FeedParser(); let posts = []; request(URL) .on('error', function (error) { - console.log('\x1b[31m[ERROR]\x1b[0m', error.message); - callback(error); + return callback(error); }) .on('response', function () { this.pipe(feedparser); feedparser.on('error', function (error) { - console.log('\x1b[31m[ERROR]\x1b[0m', error.message); + return callback(error); }); feedparser.on('readable', function () { @@ -29,30 +30,23 @@ function crawlFeed(URL, callback) { }); } -function fetchLatestPosts() { - const Feed = require('../models/feed'); - +function fetchLatestPosts(callback) { Feed.find({}, function (error, feeds) { if (error) { - console.log('\x1b[31m[ERROR]\x1b[0m', error.message); - } else { - const Queue = require('bee-queue'); - const queue = new Queue(process.env.CRAWL_QUEUE, { - redis: { - host: process.env.REDIS_URI - } - }); - - feeds.forEach(function (feed) { - queue.createJob(feed).save(); - }); - - console.log('\x1b[34m[INFO]\x1b[0m', `${feeds.length} feeds to crawl`); + return callback(error); } + + const queue = new Queue(process.env.CRAWL_QUEUE, process.env.REDIS_URI); + + feeds.forEach(function (feed) { + queue.add(feed); + }); + + callback(null); }); } module.exports = { crawlFeed, fetchLatestPosts -}; +}; \ No newline at end of file diff --git a/src/lib/rss.js b/src/lib/rss.js index cf84bf8..0a46de7 100644 --- a/src/lib/rss.js +++ b/src/lib/rss.js @@ -1,36 +1,34 @@ -function load(RSSFile, callback) { - const fs = require('fs'); +const fs = require('fs'); +const xml2js = require('xml2js'); +const Feed = require('../models/feed'); +function load(RSSFile, callback) { fs.readFile(RSSFile, function (error, data) { if (error) { - console.log('\x1b[31m[ERROR]\x1b[0m', error.message); - callback(error); - } else { - const xml2js = require('xml2js'); - const parser = new xml2js.Parser(); + return callback(error); + } - parser.parseString(data, function (error, result) { - const feedsToParse = result.opml.body[0].outline[0].outline; - const feeds = feedsToParse.map(function (feed) { - return { - title: feed['$']['title'], - url: feed['$']['htmlUrl'], - rss: feed['$']['xmlUrl'] - }; - }); + const parser = new xml2js.Parser(); - const Feed = require('../models/feed'); - Feed.insertMany(feeds, { ordered: false }, function (error) { - if (error) { - console.log('\x1b[34m[INFO]\x1b[0m', `${feeds.length} feeds found, ${feeds.length - error.writeErrors.length} inserted`); - } else { - console.log('\x1b[32m[SUCCESS]\x1b[0m', `${feeds.length} feeds inserted`); - } - callback(null); - }); + parser.parseString(data, function (error, result) { + const feedsToParse = result.opml.body[0].outline[0].outline; + const feeds = feedsToParse.map(function (feed) { + return { + title: feed['$']['title'], + url: feed['$']['htmlUrl'], + rss: feed['$']['xmlUrl'] + }; }); - } + + Feed.insertMany(feeds, { ordered: false }, function (error) { + if (error && error['writeErrors'].length === 0) { + return callback(error); + } + + callback(null); + }); + }); }); } -module.exports = { load }; +module.exports = { load }; \ No newline at end of file diff --git a/src/server.js b/src/server.js index 89d253b..b4077c8 100644 --- a/src/server.js +++ b/src/server.js @@ -56,4 +56,4 @@ app.get('/posts/:page', listPosts); // The error handler must be before any other error middleware app.use(Raven.errorHandler()); -app.listen(PORT); +app.listen(PORT); \ No newline at end of file diff --git a/src/worker.js b/src/worker.js index c282c3a..f566192 100644 --- a/src/worker.js +++ b/src/worker.js @@ -1,4 +1,4 @@ -const Queue = require('bee-queue'); +const Queue = require('bull'); const CronJob = require('cron').CronJob; const Raven = require('raven'); const mongoose = require('mongoose'); @@ -6,6 +6,7 @@ const path = require('path'); const rss = require('./lib/rss'); const job = require('./lib/job'); +const Post = require('./models/post'); // Start raven to catch exceptions Raven.config(process.env.SENTRY_DSN).install(); @@ -15,7 +16,9 @@ rss.load(path.join(__dirname, process.env.FEEDS_FILE), function() { const cron = new CronJob({ cronTime: '0 * * * *', onTick: function() { - job.fetchLatestPosts(); + job.fetchLatestPosts(function(error) { + if (error) Raven.captureException(error); + }); }, start: true, timeZone: 'America/Los_Angeles' @@ -24,57 +27,34 @@ rss.load(path.join(__dirname, process.env.FEEDS_FILE), function() { cron.start(); }); -const queue = new Queue(process.env.CRAWL_QUEUE, { - redis: { - host: process.env.REDIS_URI - } -}); +const queue = new Queue(process.env.CRAWL_QUEUE, process.env.REDIS_URI); queue.process(5, function(task, done) { job.crawlFeed(task['data']['rss'], function(error, posts) { if (error) { - done(error); - } else { - done(null, posts); + return Raven.captureException(error); } - }); -}); -queue.on('succeeded', (task, result) => { - const Post = require('./models/post'); - const posts = result.map(function(post) { - return { - sourceName: post.meta.title, - sourceURL: post.meta.link, - title: post.title, - date: post.date, - author: post.author, - url: post.link, - categories: post.categories - }; - }); - - Post.insertMany(posts, {ordered: false}, function(error) { - if (error) { - console.log('\x1b[34m[INFO]\x1b[0m', `${posts.length} posts found`); - } else { - console.log('\x1b[32m[SUCCESS]\x1b[0m', `${posts.length} posts inserted`); - } + done(null, posts); }); }); -queue.on('retrying', (task, error) => { - console.log(`Job ${task.id} failed with error ${error.message} but is being retried!`); -}); - -queue.on('error', (error) => { - console.log(`A queue error happened: ${error.message}`); -}); - -queue.on('failed', (task, error) => { - console.log(`Job ${task.id} failed with error ${error.message}`); -}); - -queue.on('stalled', (taskId) => { - console.log(`Job ${taskId} stalled and will be reprocessed`); -}); +queue.on('completed', (task, result) => { + if (result) { + const posts = result.map(function(post) { + return { + sourceName: post.meta.title, + sourceURL: post.meta.link, + title: post.title, + date: post.date, + author: post.author, + url: post.link, + categories: post.categories + }; + }); + + Post.insertMany(posts, { ordered: false }, function(error) { + if (error) Raven.captureException(error); + }); + } +}); \ No newline at end of file