Skip to content

Commit

Permalink
get graphiteThreshold to ignore nulls (#93)
Browse files Browse the repository at this point in the history
add comments to encourage the use of graphiteWorking for nulls

refactor graphiteWorking to better deal with wildcarded metrics and to
fail when the last three consecutive datapoints are nulls

 🐿 v2.9.0
  • Loading branch information
bjfletcher authored Jul 9, 2018
1 parent 9f3af09 commit 569d447
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 36 deletions.
14 changes: 10 additions & 4 deletions src/checks/graphiteThreshold.check.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,15 @@ class GraphiteThresholdCheck extends Check {
.then(results => {
const simplifiedResults = results.map(result => {
const isFailing = result.datapoints.some(value => {
return this.direction === 'above' ?
Number(value[0]) > this.threshold :
Number(value[0]) < this.threshold;
if (typeof value[0] === null) {
// metric data is unavailable, we don't fail this threshold check if metric data is unavailable
// if you want a failing check for when metric data is unavailable, use graphiteWorking
return false;
} else {
return this.direction === 'above' ?
Number(value[0]) > this.threshold :
Number(value[0]) < this.threshold;
}
});
return { target: result.target, isFailing };
});
Expand All @@ -65,7 +71,7 @@ class GraphiteThresholdCheck extends Check {

// The metric crossed a threshold
this.checkOutput = failed ?
`In the last ${this.samplePeriod}, the following metric(s) have moved ${this.direction} the threshold value of ${this.threshold}: \t${failingMetrics.join('\t')}` :
`In the last ${this.samplePeriod}, the following metric(s) have moved ${this.direction} the threshold value of ${this.threshold}: ${failingMetrics.join(' ')}` :
`No threshold error detected in graphite data for ${this.metric}.`;
})
.catch(err => {
Expand Down
44 changes: 23 additions & 21 deletions src/checks/graphiteWorking.check.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
'use strict';

const status = require('./status');
const Check = require('./check');
const fetch = require('node-fetch');
Expand All @@ -15,30 +13,29 @@ function badJSON(message, json) {

class GraphiteWorkingCheck extends Check {

constructor(options){
constructor (options) {
options.technicalSummary = options.technicalSummary || 'There has been no metric data for a sustained period of time';
options.panicGuide = options.panicGuide || 'Check this is up and running. Check this has been able to send metrics to Graphite (see Heroku and Splunk logs). Check Graphite has not been dropping metric data.';

super(options);

this.ftGraphiteKey = process.env.FT_GRAPHITE_KEY;
if (!this.ftGraphiteKey) {
throw new Error('You must set FT_GRAPHITE_KEY environment variable');
}

if (!options.key) {
throw new Error(`You must pass in a key for the "${options.name}" check - e.g., "next.heroku.article.*.express.start"`);
this.metric = options.metric || options.key;
if (!this.metric) {
throw new Error(`You must pass in a metric for the "${options.name}" check - e.g., "next.heroku.article.*.express.start"`);
}

if (!/next\./.test(options.key)) {
throw new Error(`You must prepend the key (${options.key}) with "next." for the "${options.name}" check - e.g., "heroku.article.*.express.start" needs to be "next.heroku.article.*.express.start"`);
}
const fromTime = '-5minutes';
this.url = encodeURI(`https://graphite-api.ft.com/render/?target=${this.metric}&from=${fromTime}&format=json`);

const key = options.key;
const time = options.time || '-15minutes';

this.checkOutput = "This check has not yet run";
this.key = key;
this.url = encodeURI(`https://graphite-api.ft.com/render/?target=${key}&from=${time}&format=json`);
}

tick(){
tick () {
return fetch(this.url, { headers: { key: this.ftGraphiteKey } })
.then(response => {
if(!response.ok){
Expand All @@ -60,17 +57,22 @@ class GraphiteWorkingCheck extends Check {
badJSON('Expected at least one datapoint', json);
}

let count = json[0].datapoints.reduce((total, current) => total + (current[0] || 0), 0);
const simplifiedResults = json.map(result => {
const nullsForHowLong = result.datapoints.reduce((xs, x) => x[0] === null ? xs + 1 : 0, 0);
const simplifiedResult = { target: result.target, nullsForHowLong };
log.info({ event: `${logEventPrefix}_NULLS_FOR_HOW_LONG` }, simplifiedResult);
return simplifiedResult;
});

const failedResults = simplifiedResults.filter(r => r.nullsForHowLong > 2);

log.info({ event: `${logEventPrefix}_COUNT`, key: this.key, count });
if(count){
if (failedResults.length === 0) {
this.status = status.PASSED;
this.checkOutput =`${this.key} has received ${count} metrics in the last hour`;
}else{
this.checkOutput =`${this.metric} has data`;
} else {
this.status = status.FAILED;
this.checkOutput = `${this.key} has not receieved any metrics on the last hour`;
this.checkOutput = failedResults.map(r => `${r.target} has been null for ${r.nullsForHowLong} minutes.`).join(' ');
}

})
.catch(err => {
log.error({ event: `${logEventPrefix}_ERROR`, url: this.url }, err);
Expand Down
8 changes: 2 additions & 6 deletions test/fixtures/config/graphiteWorkingFixture.js
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
'use strict';
module.exports = {
name: 'graphite check fixture',
description: '',
checks: [
{
type: 'graphiteWorking',
name: 'test1',
key: 'next.fastly.f8585BOxnGQDMbnkJoM1e.all.requests',
metric: 'next.fastly.f8585BOxnGQDMbnkJoM1e.all.requests',
severity: 2,
businessImpact: 'blah',
technicalSummary: 'god knows',
panicGuide: 'Don\'t Panic',
interval: '1s'
businessImpact: 'loss of millions in pounds'
}
]
};
2 changes: 1 addition & 1 deletion test/graphiteThreshold.check.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ describe('Graphite Threshold Check', function(){
check.start();
setTimeout(() => {
expect(check.getStatus().ok).to.be.false;
expect(check.getStatus().checkOutput).to.equal('In the last 10min, the following metric(s) have moved below the threshold value of 11: \tnext.heroku.cpu.min\tnext.heroku.disk.min');
expect(check.getStatus().checkOutput).to.equal('In the last 10min, the following metric(s) have moved below the threshold value of 11: next.heroku.cpu.min next.heroku.disk.min');
done();
});
});
Expand Down
9 changes: 5 additions & 4 deletions test/graphiteworking.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ describe('Graphite Working Check', function(){
"target": "summarize(next.fastly.133g5BGAc00Hv4v8t0dMry.anzac.requests, \"1h\", \"sum\", true)",
"datapoints": [
[ null, 1459333140 ],
[ null, 1459333200 ],
[ null, 1459333260 ],
[ 1, 1459333200 ],
[ 1, 1459333260 ],
[ null, 1459333320 ],
[ null, 1459333380 ],
[ null, 1459333420 ],
Expand Down Expand Up @@ -61,7 +61,7 @@ describe('Graphite Working Check', function(){
return waitFor(10).then(() => {
sinon.assert.called(mockFetch);
let url = mockFetch.lastCall.args[0];
expect(url).to.contain(fixture.key);
expect(url).to.contain(fixture.metric);
expect(url).to.contain('format=json');
});
});
Expand All @@ -70,6 +70,7 @@ describe('Graphite Working Check', function(){
setup(goodResponse);
check.start();
return waitFor(10).then(() => {
expect(check.getStatus().checkOutput).to.equal('next.fastly.f8585BOxnGQDMbnkJoM1e.all.requests has data');
expect(check.getStatus().ok).to.be.true;
});
});
Expand All @@ -79,6 +80,7 @@ describe('Graphite Working Check', function(){
check.start();
return waitFor(10).then(() => {
expect(check.getStatus().ok).to.be.false;
expect(check.getStatus().checkOutput).to.equal('summarize(next.fastly.133g5BGAc00Hv4v8t0dMry.anzac.requests, "1h", "sum", true) has been null for 3 minutes.');
});
});

Expand All @@ -94,7 +96,6 @@ describe('Graphite Working Check', function(){
it('Can actually call graphite', () => {
check.start();
return waitFor(1000).then(() => {
console.log(check.getStatus());
expect(check.getStatus().ok).to.be.true;
});
});
Expand Down

0 comments on commit 569d447

Please sign in to comment.