Skip to content

Commit

Permalink
Merge pull request #75 from xmidt-org/metrics
Browse files Browse the repository at this point in the history
Metrics
  • Loading branch information
j-mai authored Aug 18, 2021
2 parents 7014a79 + e602eaa commit 1f5e947
Show file tree
Hide file tree
Showing 10 changed files with 497 additions and 288 deletions.
46 changes: 43 additions & 3 deletions deploy/docker-compose/docFiles/caduceator-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,24 @@ vegetaConfig:
transactionUUID: "5"
bootTimeOffset: "-13m"
birthdateOffset: "-15m"
- wrp:
source: "mac:112233445566"
destination: "event:device-status/mac:112233445566/offline"
metadata:
- "hw-model": "hardware1"
- "fw-name": "firmware2"
- "hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "6"
bootTimeOffset: "-13m"
birthdateOffset: "-15m"
- wrp:
source: "mac:112233445566"
destination: "event:device-status/mac:112233445566/online"
metadata:
- "/hw-model": "hardware1"
- "/fw-name": "firmware2"
- "/hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "6"
transactionUUID: "7"
bootTimeOffset: "-13m"
birthdateOffset: "-16m"
- wrp:
Expand All @@ -97,7 +107,7 @@ vegetaConfig:
- "/hw-model": "hardware1"
- "/fw-name": "firmware2"
- "/hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "7"
transactionUUID: "8"
bootTimeOffset: "-13m"
birthdateOffset: "-17m"
- wrp:
Expand All @@ -107,9 +117,39 @@ vegetaConfig:
- "/hw-model": "hardware1"
- "/fw-name": "firmware2"
- "/hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "8"
transactionUUID: "9"
bootTimeOffset: "-13m"
birthdateOffset: "-18m"
- wrp:
source: "mac:112233445566"
destination: "event:device-status/mac:112233445566/invalid-event-type"
metadata:
- "/hw-model": "hardware1"
- "/fw-name": "firmware2"
- "/hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "10"
bootTimeOffset: "-9000h"
birthdateOffset: "-18m"
- wrp:
source: "uuid:112233445566"
destination: "event:device-status/mac:112233445566/online"
metadata:
- "/hw-model": "hardware1"
- "/fw-name": "firmware2"
- "/hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "11"
bootTimeOffset: "-23m"
birthdateOffset: "-18m"
- wrp:
source: "mac:112233445566"
destination: "event:device-status/mac:112233445566/offline/123"
metadata:
- "/hw-model": "hardware1"
- "/fw-name": "firmware2"
- "/hw-last-reboot-reason": "reboot-reason-2"
transactionUUID: "12"
bootTimeOffset: "-23m"
birthdateOffset: "-20m"
# simulates talaria rehash
vegetaRehash:
routines: 0
Expand Down
7 changes: 7 additions & 0 deletions deploy/docker-compose/docFiles/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ scrape_configs:
static_configs:
- targets: ["caduceator1:5013"]

- job_name: "gungnir"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ["gungnir:7003"]

- job_name: "glaukos"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
Expand Down
93 changes: 89 additions & 4 deletions eventmetrics/parsers/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"fmt"

"github.com/prometheus/client_golang/prometheus"
"github.com/xmidt-org/bascule/basculechecks"
"github.com/xmidt-org/interpreter"
"github.com/xmidt-org/touchstone"
"go.uber.org/fx"
)
Expand Down Expand Up @@ -71,28 +73,28 @@ func ProvideEventMetrics() fx.Option {
Name: "reboot_unparsable_count",
Help: "events that are not able to be fully processed, labelled by reason",
},
firmwareLabel, hardwareLabel, reasonLabel,
firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel,
),
touchstone.CounterVec(
prometheus.CounterOpts{
Name: "event_errors",
Help: "individual event errors",
},
firmwareLabel, hardwareLabel, reasonLabel,
firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel,
),
touchstone.CounterVec(
prometheus.CounterOpts{
Name: "boot_cycle_errors",
Help: "cycle errors",
},
reasonLabel,
reasonLabel, partnerIDLabel,
),
touchstone.CounterVec(
prometheus.CounterOpts{
Name: "reboot_cycle_errors",
Help: "cycle errors",
},
reasonLabel,
reasonLabel, partnerIDLabel,
),
touchstone.HistogramVec(
prometheus.HistogramOpts{
Expand Down Expand Up @@ -134,3 +136,86 @@ func (m *Measures) addTimeElapsedHistogram(f *touchstone.Factory, o prometheus.H
m.TimeElapsedHistograms[o.Name] = histogram
return nil
}

// AddMetadata adds to the metadata parser.
func (m *Measures) AddMetadata(metadataKey string) {
if m.MetadataFields != nil {
m.MetadataFields.With(prometheus.Labels{metadataKeyLabel: metadataKey}).Add(1.0)
}
}

// AddTotalUnparsable adds to the total unparsable counter.
func (m *Measures) AddTotalUnparsable(parserName string) {
if m.TotalUnparsableCount != nil {
m.TotalUnparsableCount.With(prometheus.Labels{parserLabel: parserName}).Add(1.0)
}
}

// AddRebootUnparsable adds to the RebootUnparsable counter.
func (m *Measures) AddRebootUnparsable(reason string, event interpreter.Event) {
if m.RebootUnparsableCount != nil {
hardwareVal, firmwareVal, _ := getHardwareFirmware(event)
partner := basculechecks.DeterminePartnerMetric(event.PartnerIDs)
m.RebootUnparsableCount.With(prometheus.Labels{firmwareLabel: firmwareVal,
hardwareLabel: hardwareVal, partnerIDLabel: partner, reasonLabel: reason}).Add(1.0)
}
}

// AddEventError adds a error tag to the event error counter.
func AddEventError(counter *prometheus.CounterVec, event interpreter.Event, errorTag string) {
if counter != nil {
hardwareVal, firmwareVal, _ := getHardwareFirmware(event)
partner := basculechecks.DeterminePartnerMetric(event.PartnerIDs)
counter.With(prometheus.Labels{firmwareLabel: firmwareVal,
hardwareLabel: hardwareVal, partnerIDLabel: partner, reasonLabel: errorTag}).Add(1.0)
}
}

// AddCycleError adds a cycle error tag to the cycle error counter.
func AddCycleError(counter *prometheus.CounterVec, event interpreter.Event, errorTag string) {
if counter != nil {
partner := basculechecks.DeterminePartnerMetric(event.PartnerIDs)
counter.With(prometheus.Labels{partnerIDLabel: partner, reasonLabel: errorTag}).Add(1.0)
}
}

// AddDuration adds the duration to the specific histogram.
func AddDuration(histogram prometheus.ObserverVec, duration float64, event interpreter.Event) {
if histogram != nil {
labels := getTimeElapsedHistogramLabels(event)
histogram.With(labels).Observe(duration)
}
}

// get hardware and firmware values from event metadata, returning false if either one or both are not found
func getHardwareFirmware(event interpreter.Event) (hardwareVal string, firmwareVal string, found bool) {
hardwareVal, hardwareFound := event.GetMetadataValue(hardwareMetadataKey)
firmwareVal, firmwareFound := event.GetMetadataValue(firmwareMetadataKey)

found = true
if !hardwareFound {
hardwareVal = unknownLabelValue
found = false
}
if !firmwareFound {
firmwareVal = unknownLabelValue
found = false
}

return
}

// grab relevant information from event metadata and return prometheus labels
func getTimeElapsedHistogramLabels(event interpreter.Event) prometheus.Labels {
hardwareVal, firmwareVal, _ := getHardwareFirmware(event)
rebootReason, reasonFound := event.GetMetadataValue(rebootReasonMetadataKey)
if !reasonFound {
rebootReason = unknownLabelValue
}

return prometheus.Labels{
hardwareLabel: hardwareVal,
firmwareLabel: firmwareVal,
rebootReasonLabel: rebootReason,
}
}
Loading

0 comments on commit 1f5e947

Please sign in to comment.