diff --git a/deploy/docker-compose/docFiles/caduceator-1.yaml b/deploy/docker-compose/docFiles/caduceator-1.yaml index 861144c..6e104af 100644 --- a/deploy/docker-compose/docFiles/caduceator-1.yaml +++ b/deploy/docker-compose/docFiles/caduceator-1.yaml @@ -80,6 +80,16 @@ vegetaConfig: transactionUUID: "5" bootTimeOffset: "-13m" birthdateOffset: "-15m" + - wrp: + source: "mac:112233445566" + destination: "event:device-status/mac:112233445566/offline" + metadata: + - "hw-model": "hardware1" + - "fw-name": "firmware2" + - "hw-last-reboot-reason": "reboot-reason-2" + transactionUUID: "6" + bootTimeOffset: "-13m" + birthdateOffset: "-15m" - wrp: source: "mac:112233445566" destination: "event:device-status/mac:112233445566/online" @@ -87,7 +97,7 @@ vegetaConfig: - "/hw-model": "hardware1" - "/fw-name": "firmware2" - "/hw-last-reboot-reason": "reboot-reason-2" - transactionUUID: "6" + transactionUUID: "7" bootTimeOffset: "-13m" birthdateOffset: "-16m" - wrp: @@ -97,7 +107,7 @@ vegetaConfig: - "/hw-model": "hardware1" - "/fw-name": "firmware2" - "/hw-last-reboot-reason": "reboot-reason-2" - transactionUUID: "7" + transactionUUID: "8" bootTimeOffset: "-13m" birthdateOffset: "-17m" - wrp: @@ -107,9 +117,39 @@ vegetaConfig: - "/hw-model": "hardware1" - "/fw-name": "firmware2" - "/hw-last-reboot-reason": "reboot-reason-2" - transactionUUID: "8" + transactionUUID: "9" bootTimeOffset: "-13m" birthdateOffset: "-18m" + - wrp: + source: "mac:112233445566" + destination: "event:device-status/mac:112233445566/invalid-event-type" + metadata: + - "/hw-model": "hardware1" + - "/fw-name": "firmware2" + - "/hw-last-reboot-reason": "reboot-reason-2" + transactionUUID: "10" + bootTimeOffset: "-9000h" + birthdateOffset: "-18m" + - wrp: + source: "uuid:112233445566" + destination: "event:device-status/mac:112233445566/online" + metadata: + - "/hw-model": "hardware1" + - "/fw-name": "firmware2" + - "/hw-last-reboot-reason": "reboot-reason-2" + transactionUUID: "11" + bootTimeOffset: "-23m" + birthdateOffset: "-18m" + - wrp: + source: "mac:112233445566" + destination: "event:device-status/mac:112233445566/offline/123" + metadata: + - "/hw-model": "hardware1" + - "/fw-name": "firmware2" + - "/hw-last-reboot-reason": "reboot-reason-2" + transactionUUID: "12" + bootTimeOffset: "-23m" + birthdateOffset: "-20m" # simulates talaria rehash vegetaRehash: routines: 0 diff --git a/deploy/docker-compose/docFiles/prometheus.yml b/deploy/docker-compose/docFiles/prometheus.yml index 8217193..d6873d0 100644 --- a/deploy/docker-compose/docFiles/prometheus.yml +++ b/deploy/docker-compose/docFiles/prometheus.yml @@ -64,6 +64,13 @@ scrape_configs: static_configs: - targets: ["caduceator1:5013"] + - job_name: "gungnir" + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ["gungnir:7003"] + - job_name: "glaukos" # metrics_path defaults to '/metrics' # scheme defaults to 'http'. diff --git a/eventmetrics/parsers/metrics.go b/eventmetrics/parsers/metrics.go index 83c7c86..7410b4c 100644 --- a/eventmetrics/parsers/metrics.go +++ b/eventmetrics/parsers/metrics.go @@ -22,6 +22,8 @@ import ( "fmt" "github.com/prometheus/client_golang/prometheus" + "github.com/xmidt-org/bascule/basculechecks" + "github.com/xmidt-org/interpreter" "github.com/xmidt-org/touchstone" "go.uber.org/fx" ) @@ -71,28 +73,28 @@ func ProvideEventMetrics() fx.Option { Name: "reboot_unparsable_count", Help: "events that are not able to be fully processed, labelled by reason", }, - firmwareLabel, hardwareLabel, reasonLabel, + firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel, ), touchstone.CounterVec( prometheus.CounterOpts{ Name: "event_errors", Help: "individual event errors", }, - firmwareLabel, hardwareLabel, reasonLabel, + firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel, ), touchstone.CounterVec( prometheus.CounterOpts{ Name: "boot_cycle_errors", Help: "cycle errors", }, - reasonLabel, + reasonLabel, partnerIDLabel, ), touchstone.CounterVec( prometheus.CounterOpts{ Name: "reboot_cycle_errors", Help: "cycle errors", }, - reasonLabel, + reasonLabel, partnerIDLabel, ), touchstone.HistogramVec( prometheus.HistogramOpts{ @@ -134,3 +136,86 @@ func (m *Measures) addTimeElapsedHistogram(f *touchstone.Factory, o prometheus.H m.TimeElapsedHistograms[o.Name] = histogram return nil } + +// AddMetadata adds to the metadata parser. +func (m *Measures) AddMetadata(metadataKey string) { + if m.MetadataFields != nil { + m.MetadataFields.With(prometheus.Labels{metadataKeyLabel: metadataKey}).Add(1.0) + } +} + +// AddTotalUnparsable adds to the total unparsable counter. +func (m *Measures) AddTotalUnparsable(parserName string) { + if m.TotalUnparsableCount != nil { + m.TotalUnparsableCount.With(prometheus.Labels{parserLabel: parserName}).Add(1.0) + } +} + +// AddRebootUnparsable adds to the RebootUnparsable counter. +func (m *Measures) AddRebootUnparsable(reason string, event interpreter.Event) { + if m.RebootUnparsableCount != nil { + hardwareVal, firmwareVal, _ := getHardwareFirmware(event) + partner := basculechecks.DeterminePartnerMetric(event.PartnerIDs) + m.RebootUnparsableCount.With(prometheus.Labels{firmwareLabel: firmwareVal, + hardwareLabel: hardwareVal, partnerIDLabel: partner, reasonLabel: reason}).Add(1.0) + } +} + +// AddEventError adds a error tag to the event error counter. +func AddEventError(counter *prometheus.CounterVec, event interpreter.Event, errorTag string) { + if counter != nil { + hardwareVal, firmwareVal, _ := getHardwareFirmware(event) + partner := basculechecks.DeterminePartnerMetric(event.PartnerIDs) + counter.With(prometheus.Labels{firmwareLabel: firmwareVal, + hardwareLabel: hardwareVal, partnerIDLabel: partner, reasonLabel: errorTag}).Add(1.0) + } +} + +// AddCycleError adds a cycle error tag to the cycle error counter. +func AddCycleError(counter *prometheus.CounterVec, event interpreter.Event, errorTag string) { + if counter != nil { + partner := basculechecks.DeterminePartnerMetric(event.PartnerIDs) + counter.With(prometheus.Labels{partnerIDLabel: partner, reasonLabel: errorTag}).Add(1.0) + } +} + +// AddDuration adds the duration to the specific histogram. +func AddDuration(histogram prometheus.ObserverVec, duration float64, event interpreter.Event) { + if histogram != nil { + labels := getTimeElapsedHistogramLabels(event) + histogram.With(labels).Observe(duration) + } +} + +// get hardware and firmware values from event metadata, returning false if either one or both are not found +func getHardwareFirmware(event interpreter.Event) (hardwareVal string, firmwareVal string, found bool) { + hardwareVal, hardwareFound := event.GetMetadataValue(hardwareMetadataKey) + firmwareVal, firmwareFound := event.GetMetadataValue(firmwareMetadataKey) + + found = true + if !hardwareFound { + hardwareVal = unknownLabelValue + found = false + } + if !firmwareFound { + firmwareVal = unknownLabelValue + found = false + } + + return +} + +// grab relevant information from event metadata and return prometheus labels +func getTimeElapsedHistogramLabels(event interpreter.Event) prometheus.Labels { + hardwareVal, firmwareVal, _ := getHardwareFirmware(event) + rebootReason, reasonFound := event.GetMetadataValue(rebootReasonMetadataKey) + if !reasonFound { + rebootReason = unknownLabelValue + } + + return prometheus.Labels{ + hardwareLabel: hardwareVal, + firmwareLabel: firmwareVal, + rebootReasonLabel: rebootReason, + } +} diff --git a/eventmetrics/parsers/metrics_test.go b/eventmetrics/parsers/metrics_test.go index 159d9fd..aad7b97 100644 --- a/eventmetrics/parsers/metrics_test.go +++ b/eventmetrics/parsers/metrics_test.go @@ -8,10 +8,337 @@ import ( "testing" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" + "github.com/xmidt-org/interpreter" "github.com/xmidt-org/touchstone" + "github.com/xmidt-org/touchstone/touchtest" ) +const ( + testReason = "testReason" +) + +func TestAddMetadata(t *testing.T) { + m := Measures{ + MetadataFields: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "metadataCounter", + Help: "metadataCounter", + }, + []string{metadataKeyLabel}, + ), + } + + metadataKey := "testKey" + m.AddMetadata(metadataKey) + assert.Equal(t, 1.0, testutil.ToFloat64(m.MetadataFields)) + + m = Measures{} + m.AddMetadata(metadataKey) + +} + +func TestAddTotalUnparsable(t *testing.T) { + m := Measures{ + TotalUnparsableCount: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "totalUnparsable", + Help: "totalUnparsable", + }, + []string{parserLabel}, + ), + } + + parserName := "testParser" + m.AddTotalUnparsable(parserName) + assert.Equal(t, 1.0, testutil.ToFloat64(m.TotalUnparsableCount)) + + m = Measures{} + m.AddTotalUnparsable(parserName) +} + +func TestAddRebootUnparsable(t *testing.T) { + m := Measures{ + RebootUnparsableCount: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rebootUnparsable", + Help: "rebootUnparsable", + }, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, + ), + } + + expectedRebootUnparsableCount := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rebootUnparsable", + Help: "rebootUnparsable", + }, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, + ) + + testEvent := interpreter.Event{ + Metadata: map[string]string{ + firmwareMetadataKey: "fw", + hardwareMetadataKey: "hw", + rebootReasonMetadataKey: "reboot", + }, + PartnerIDs: []string{ + "partner", + }, + } + + expectedRegistry := prometheus.NewPedanticRegistry() + actualRegistry := prometheus.NewPedanticRegistry() + expectedRegistry.Register(expectedRebootUnparsableCount) + actualRegistry.Register(m.RebootUnparsableCount) + + expectedRebootUnparsableCount.With(prometheus.Labels{firmwareLabel: "fw", + hardwareLabel: "hw", partnerIDLabel: "partner", reasonLabel: testReason}).Add(1.0) + m.AddRebootUnparsable(testReason, testEvent) + testAssert := touchtest.New(t) + testAssert.Expect(expectedRegistry) + assert.True(t, testAssert.GatherAndCompare(actualRegistry)) + + m = Measures{} + m.AddRebootUnparsable(testReason, testEvent) +} + +func TestAddEventError(t *testing.T) { + eventErrorTags := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "eventErrors", + Help: "eventErrors", + }, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, + ) + + expectedEventErrorTags := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "eventErrors", + Help: "eventErrors", + }, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, + ) + + testEvent := interpreter.Event{ + Metadata: map[string]string{ + firmwareMetadataKey: "fw", + hardwareMetadataKey: "hw", + rebootReasonMetadataKey: "reboot", + }, + PartnerIDs: []string{ + "partner", + }, + } + + expectedRegistry := prometheus.NewPedanticRegistry() + actualRegistry := prometheus.NewPedanticRegistry() + expectedRegistry.Register(expectedEventErrorTags) + actualRegistry.Register(eventErrorTags) + + expectedEventErrorTags.With(prometheus.Labels{firmwareLabel: "fw", + hardwareLabel: "hw", partnerIDLabel: "partner", reasonLabel: testReason}).Add(1.0) + AddEventError(eventErrorTags, testEvent, testReason) + testAssert := touchtest.New(t) + testAssert.Expect(expectedRegistry) + assert.True(t, testAssert.GatherAndCompare(actualRegistry)) + + AddEventError(nil, testEvent, testReason) +} + +func TestAddCycleError(t *testing.T) { + cycleErrorCount := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "cycleErrors", + Help: "cycleErrors", + }, + []string{partnerIDLabel, reasonLabel}, + ) + + expectedCycleErrorTags := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "cycleErrors", + Help: "cycleErrors", + }, + []string{partnerIDLabel, reasonLabel}, + ) + + testEvent := interpreter.Event{ + PartnerIDs: []string{ + "partner", + }, + } + + expectedRegistry := prometheus.NewPedanticRegistry() + actualRegistry := prometheus.NewPedanticRegistry() + expectedRegistry.Register(expectedCycleErrorTags) + actualRegistry.Register(cycleErrorCount) + + expectedCycleErrorTags.With(prometheus.Labels{partnerIDLabel: "partner", reasonLabel: testReason}).Add(1.0) + AddCycleError(cycleErrorCount, testEvent, testReason) + testAssert := touchtest.New(t) + testAssert.Expect(expectedRegistry) + assert.True(t, testAssert.GatherAndCompare(actualRegistry)) + + AddCycleError(nil, testEvent, testReason) +} + +func TestAddDuration(t *testing.T) { + actualHist := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rebootHistogram", + Help: "rebootHistogram", + Buckets: []float64{60, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1500, 1800, 3600, 7200, 14400, 21600}, + }, + []string{firmwareLabel, hardwareLabel, rebootReasonLabel}, + ) + + expectedHist := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rebootHistogram", + Help: "rebootHistogram", + Buckets: []float64{60, 120, 180, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1500, 1800, 3600, 7200, 14400, 21600}, + }, + []string{firmwareLabel, hardwareLabel, rebootReasonLabel}, + ) + + testEvent := interpreter.Event{ + Metadata: map[string]string{ + firmwareMetadataKey: "fw", + hardwareMetadataKey: "hw", + rebootReasonMetadataKey: "reboot", + }, + PartnerIDs: []string{ + "partner", + }, + } + + expectedRegistry := prometheus.NewPedanticRegistry() + actualRegistry := prometheus.NewPedanticRegistry() + expectedRegistry.Register(expectedHist) + actualRegistry.Register(actualHist) + + expectedHist.WithLabelValues("fw", "hw", "reboot").Observe(5.0) + AddDuration(actualHist, 5.0, testEvent) + + testAssert := touchtest.New(t) + testAssert.Expect(expectedRegistry) + assert.True(t, testAssert.GatherAndCompare(actualRegistry)) + + AddDuration(nil, 5.0, testEvent) +} + +func TestGetHardwareFirmware(t *testing.T) { + tests := []struct { + description string + event interpreter.Event + expectedHwVal string + expectedFwVal string + expectedFound bool + }{ + { + description: "all exists", + event: interpreter.Event{ + Metadata: map[string]string{ + hardwareMetadataKey: "testHw", + firmwareMetadataKey: "testFw", + }, + }, + expectedHwVal: "testHw", + expectedFwVal: "testFw", + expectedFound: true, + }, + { + description: "missing hw", + event: interpreter.Event{ + Metadata: map[string]string{ + firmwareMetadataKey: "testFw", + }, + }, + expectedHwVal: unknownLabelValue, + expectedFwVal: "testFw", + expectedFound: false, + }, + { + description: "missing fw", + event: interpreter.Event{ + Metadata: map[string]string{ + hardwareMetadataKey: "testHw", + }, + }, + expectedHwVal: "testHw", + expectedFwVal: unknownLabelValue, + expectedFound: false, + }, + { + description: "missing both", + event: interpreter.Event{ + Metadata: map[string]string{}, + }, + expectedHwVal: unknownLabelValue, + expectedFwVal: unknownLabelValue, + expectedFound: false, + }, + } + + for _, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + assert := assert.New(t) + hwVal, fwVal, found := getHardwareFirmware(tc.event) + assert.Equal(tc.expectedHwVal, hwVal) + assert.Equal(tc.expectedFwVal, fwVal) + assert.Equal(tc.expectedFound, found) + }) + } +} + +func TestGetTimeElapsedHistogramLabels(t *testing.T) { + tests := []struct { + description string + event interpreter.Event + expectedLabels prometheus.Labels + }{ + { + description: "all exists", + event: interpreter.Event{ + Metadata: map[string]string{ + hardwareMetadataKey: "testHw", + firmwareMetadataKey: "testFw", + rebootReasonMetadataKey: "testReboot", + }, + }, + expectedLabels: prometheus.Labels{ + hardwareLabel: "testHw", + firmwareLabel: "testFw", + rebootReasonLabel: "testReboot", + }, + }, + { + description: "missing reboot reason", + event: interpreter.Event{ + Metadata: map[string]string{ + hardwareMetadataKey: "testHw", + firmwareMetadataKey: "testFw", + }, + }, + expectedLabels: prometheus.Labels{ + hardwareLabel: "testHw", + firmwareLabel: "testFw", + rebootReasonLabel: unknownLabelValue, + }, + }, + } + + for _, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + assert := assert.New(t) + labels := getTimeElapsedHistogramLabels(tc.event) + assert.Equal(tc.expectedLabels, labels) + }) + } +} + func TestAddTimeElapsedHistogramSuccess(t *testing.T) { tests := []struct { description string diff --git a/eventmetrics/parsers/parserValidator.go b/eventmetrics/parsers/parserValidator.go index 87be27e..3d39951 100644 --- a/eventmetrics/parsers/parserValidator.go +++ b/eventmetrics/parsers/parserValidator.go @@ -146,22 +146,17 @@ func logCycleErr(currentEvent interpreter.Event, err error, counter *prometheus. var taggedErr validation.TaggedError if errors.As(err, &taggedErrs) { logger.Info("invalid cycle", zap.String(deviceIDKey, deviceID), zap.Strings("tags", validation.TagsToStrings(taggedErrs.UniqueTags()))) - if counter != nil { - for _, tag := range taggedErrs.UniqueTags() { - counter.With(prometheus.Labels{reasonLabel: tag.String()}).Add(1.0) - } + for _, tag := range taggedErrs.UniqueTags() { + AddCycleError(counter, currentEvent, tag.String()) } + } else if errors.As(err, &taggedErr) { logger.Info("invalid cycle", zap.String(deviceIDKey, deviceID), zap.String("tags", taggedErr.Tag().String())) - if counter != nil { - counter.With(prometheus.Labels{reasonLabel: taggedErr.Tag().String()}).Add(1.0) - } + AddCycleError(counter, currentEvent, taggedErr.Tag().String()) } else if err != nil { logger.Info("invalid cycle; no tags", zap.String(deviceIDKey, deviceID), zap.Error(err)) - if counter != nil { - counter.With(prometheus.Labels{reasonLabel: validation.Unknown.String()}).Add(1.0) - } + AddCycleError(counter, currentEvent, validation.Unknown.String()) } } @@ -172,7 +167,6 @@ func logEventError(logger *zap.Logger, counter *prometheus.CounterVec, err error deviceIDKey = "device id" ) - hardwareVal, firmwareVal, _ := getHardwareFirmware(event) deviceID, _ := event.DeviceID() eventID := event.TransactionUUID @@ -180,56 +174,14 @@ func logEventError(logger *zap.Logger, counter *prometheus.CounterVec, err error var taggedErr validation.TaggedError if errors.As(err, &taggedErrs) { logger.Info("event validation error", zap.Strings("tags", validation.TagsToStrings(taggedErrs.UniqueTags())), zap.String(eventIDKey, eventID), zap.String(deviceIDKey, deviceID)) - if counter != nil { - for _, tag := range taggedErrs.UniqueTags() { - counter.With(prometheus.Labels{firmwareLabel: firmwareVal, - hardwareLabel: hardwareVal, reasonLabel: tag.String()}).Add(1.0) - } + for _, tag := range taggedErrs.UniqueTags() { + AddEventError(counter, event, tag.String()) } } else if errors.As(err, &taggedErr) { logger.Info("event validation error", zap.String("tags", taggedErr.Tag().String()), zap.String(eventIDKey, eventID), zap.String(deviceIDKey, deviceID)) - if counter != nil { - counter.With(prometheus.Labels{firmwareLabel: firmwareVal, - hardwareLabel: hardwareVal, reasonLabel: taggedErr.Tag().String()}).Add(1.0) - } + AddEventError(counter, event, taggedErr.Tag().String()) } else if err != nil { logger.Info("event validation error; no tags", zap.Error(err), zap.String(eventIDKey, eventID), zap.String(deviceIDKey, deviceID)) - if counter != nil { - counter.With(prometheus.Labels{firmwareLabel: firmwareVal, - hardwareLabel: hardwareVal, reasonLabel: validation.Unknown.String()}).Add(1.0) - } - } -} - -// get hardware and firmware values from event metadata, returning false if either one or both are not found -func getHardwareFirmware(event interpreter.Event) (hardwareVal string, firmwareVal string, found bool) { - hardwareVal, hardwareFound := event.GetMetadataValue(hardwareMetadataKey) - firmwareVal, firmwareFound := event.GetMetadataValue(firmwareMetadataKey) - - found = true - if !hardwareFound { - hardwareVal = unknownLabelValue - found = false - } - if !firmwareFound { - firmwareVal = unknownLabelValue - found = false - } - - return -} - -// grab relevant information from event metadata and return prometheus labels -func getTimeElapsedHistogramLabels(event interpreter.Event) prometheus.Labels { - hardwareVal, firmwareVal, _ := getHardwareFirmware(event) - rebootReason, reasonFound := event.GetMetadataValue(rebootReasonMetadataKey) - if !reasonFound { - rebootReason = unknownLabelValue - } - - return prometheus.Labels{ - hardwareLabel: hardwareVal, - firmwareLabel: firmwareVal, - rebootReasonLabel: rebootReason, + AddEventError(counter, event, validation.Unknown.String()) } } diff --git a/eventmetrics/parsers/parserValidator_test.go b/eventmetrics/parsers/parserValidator_test.go index 668c51f..b0b1c53 100644 --- a/eventmetrics/parsers/parserValidator_test.go +++ b/eventmetrics/parsers/parserValidator_test.go @@ -7,6 +7,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/xmidt-org/bascule/basculechecks" "github.com/xmidt-org/interpreter" "github.com/xmidt-org/interpreter/validation" "github.com/xmidt-org/touchstone/touchtest" @@ -157,14 +158,14 @@ func TestLogCycleError(t *testing.T) { Name: "cycleErrs", Help: "cycleErrs", }, - []string{reasonLabel}, + []string{reasonLabel, partnerIDLabel}, ) actualCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "cycleErrs", Help: "cycleErrs", }, - []string{reasonLabel}, + []string{reasonLabel, partnerIDLabel}, ) ) @@ -173,7 +174,7 @@ func TestLogCycleError(t *testing.T) { logCycleErr(interpreter.Event{}, tc.err, actualCounter, logger) for _, tag := range tc.expectedTags { - expectedCounter.WithLabelValues(tag).Inc() + expectedCounter.WithLabelValues(tag, basculechecks.DeterminePartnerMetric([]string{})).Inc() } metricsAssert := touchtest.New(t) @@ -223,14 +224,14 @@ func TestLogEventError(t *testing.T) { Name: "eventErrs", Help: "eventErrs", }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, + []string{firmwareLabel, hardwareLabel, reasonLabel, partnerIDLabel}, ) actualCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "eventErrs", Help: "eventErrs", }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, + []string{firmwareLabel, hardwareLabel, reasonLabel, partnerIDLabel}, ) ) @@ -239,7 +240,7 @@ func TestLogEventError(t *testing.T) { logEventError(logger, actualCounter, tc.err, testEvent) for _, tag := range tc.expectedTags { - expectedCounter.WithLabelValues("fw", "hw", tag).Inc() + expectedCounter.WithLabelValues("fw", "hw", tag, basculechecks.DeterminePartnerMetric(testEvent.PartnerIDs)).Inc() } metricsAssert := touchtest.New(t) @@ -248,113 +249,3 @@ func TestLogEventError(t *testing.T) { }) } } - -func TestGetHardwareFirmware(t *testing.T) { - tests := []struct { - description string - event interpreter.Event - expectedHwVal string - expectedFwVal string - expectedFound bool - }{ - { - description: "all exists", - event: interpreter.Event{ - Metadata: map[string]string{ - hardwareMetadataKey: "testHw", - firmwareMetadataKey: "testFw", - }, - }, - expectedHwVal: "testHw", - expectedFwVal: "testFw", - expectedFound: true, - }, - { - description: "missing hw", - event: interpreter.Event{ - Metadata: map[string]string{ - firmwareMetadataKey: "testFw", - }, - }, - expectedHwVal: unknownLabelValue, - expectedFwVal: "testFw", - expectedFound: false, - }, - { - description: "missing fw", - event: interpreter.Event{ - Metadata: map[string]string{ - hardwareMetadataKey: "testHw", - }, - }, - expectedHwVal: "testHw", - expectedFwVal: unknownLabelValue, - expectedFound: false, - }, - { - description: "missing both", - event: interpreter.Event{ - Metadata: map[string]string{}, - }, - expectedHwVal: unknownLabelValue, - expectedFwVal: unknownLabelValue, - expectedFound: false, - }, - } - - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - assert := assert.New(t) - hwVal, fwVal, found := getHardwareFirmware(tc.event) - assert.Equal(tc.expectedHwVal, hwVal) - assert.Equal(tc.expectedFwVal, fwVal) - assert.Equal(tc.expectedFound, found) - }) - } -} - -func TestGetTimeElapsedHistogramLabels(t *testing.T) { - tests := []struct { - description string - event interpreter.Event - expectedLabels prometheus.Labels - }{ - { - description: "all exists", - event: interpreter.Event{ - Metadata: map[string]string{ - hardwareMetadataKey: "testHw", - firmwareMetadataKey: "testFw", - rebootReasonMetadataKey: "testReboot", - }, - }, - expectedLabels: prometheus.Labels{ - hardwareLabel: "testHw", - firmwareLabel: "testFw", - rebootReasonLabel: "testReboot", - }, - }, - { - description: "missing reboot reason", - event: interpreter.Event{ - Metadata: map[string]string{ - hardwareMetadataKey: "testHw", - firmwareMetadataKey: "testFw", - }, - }, - expectedLabels: prometheus.Labels{ - hardwareLabel: "testHw", - firmwareLabel: "testFw", - rebootReasonLabel: unknownLabelValue, - }, - }, - } - - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - assert := assert.New(t) - labels := getTimeElapsedHistogramLabels(tc.event) - assert.Equal(tc.expectedLabels, labels) - }) - } -} diff --git a/eventmetrics/parsers/rebootTimeParser.go b/eventmetrics/parsers/rebootTimeParser.go index b1f920c..3de8bbf 100644 --- a/eventmetrics/parsers/rebootTimeParser.go +++ b/eventmetrics/parsers/rebootTimeParser.go @@ -30,6 +30,7 @@ const ( firmwareLabel = "firmware" hardwareLabel = "hardware" rebootReasonLabel = "reboot_reason" + partnerIDLabel = "partner_id" validationErrReason = "validation_error" fatalErrReason = "incoming_event_fatal_error" calculationErrReason = "time_elapsed_calculation_error" @@ -100,7 +101,7 @@ func (p *RebootDurationParser) Parse(currentEvent interpreter.Event) { // Make sure event is actually a fully-manageable event. Make sure event follows event regex and is a fully-mangeable event. eventType, err := currentEvent.EventType() if err != nil { - p.addToUnparsableCounters(firmwareVal, hardwareVal, fatalErrReason) + p.addToUnparsableCounters(currentEvent, fatalErrReason) p.logger.Error(invalidIncomingMsg, zap.Error(err), zap.String("event destination", currentEvent.Destination)) return } else if eventType != "fully-manageable" { @@ -110,14 +111,14 @@ func (p *RebootDurationParser) Parse(currentEvent interpreter.Event) { // Check that event passes necessary checks. If it doesn't it is impossible to continue and we should exit. if !p.basicChecks(currentEvent) { - p.addToUnparsableCounters(firmwareVal, hardwareVal, fatalErrReason) + p.addToUnparsableCounters(currentEvent, fatalErrReason) return } // Get the history of events and parse events relevant to the latest boot-cycle, into a slice. relevantEvents, err := p.getEvents(currentEvent) if err != nil { - p.addToUnparsableCounters(firmwareVal, hardwareVal, fatalErrReason) + p.addToUnparsableCounters(currentEvent, fatalErrReason) return } @@ -129,7 +130,7 @@ func (p *RebootDurationParser) Parse(currentEvent interpreter.Event) { } if !allValid { - p.addToUnparsableCounters(firmwareVal, hardwareVal, validationErrReason) + p.addToUnparsableCounters(currentEvent, validationErrReason) return } @@ -142,7 +143,7 @@ func (p *RebootDurationParser) Parse(currentEvent interpreter.Event) { } if !calculationValid { - p.addToUnparsableCounters(firmwareVal, hardwareVal, calculationErrReason) + p.addToUnparsableCounters(currentEvent, calculationErrReason) } } @@ -191,13 +192,7 @@ func (p *RebootDurationParser) getEvents(currentEvent interpreter.Event) ([]inte } -func (p *RebootDurationParser) addToUnparsableCounters(firmwareVal string, hardwareVal string, reason string) { - if p.measures.TotalUnparsableCount != nil { - p.measures.TotalUnparsableCount.With(prometheus.Labels{parserLabel: p.name}).Add(1.0) - } - - if p.measures.RebootUnparsableCount != nil { - p.measures.RebootUnparsableCount.With(prometheus.Labels{firmwareLabel: firmwareVal, - hardwareLabel: hardwareVal, reasonLabel: reason}).Add(1.0) - } +func (p *RebootDurationParser) addToUnparsableCounters(event interpreter.Event, reason string) { + p.measures.AddTotalUnparsable(p.name) + p.measures.AddRebootUnparsable(reason, event) } diff --git a/eventmetrics/parsers/rebootTimeParser_test.go b/eventmetrics/parsers/rebootTimeParser_test.go index 62f14d7..2ebc878 100644 --- a/eventmetrics/parsers/rebootTimeParser_test.go +++ b/eventmetrics/parsers/rebootTimeParser_test.go @@ -66,28 +66,13 @@ func TestParseCalculationErr(t *testing.T) { for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { var ( - expectedTotalUnparsableCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "totalUnparsableEvents", - Help: "totalUnparsableEvents", - }, - []string{parserLabel}, - ) - expectedRebootUnparsableCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "rebootUnparsableEvents", - Help: "rebootUnparsableEvents", - }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, - ) - m = Measures{ RebootUnparsableCount: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "rebootUnparsableEvents", Help: "rebootUnparsableEvents", }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, ), TotalUnparsableCount: prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -99,14 +84,6 @@ func TestParseCalculationErr(t *testing.T) { } ) - assert := assert.New(t) - expectedRegistry := prometheus.NewPedanticRegistry() - actualRegistry := prometheus.NewPedanticRegistry() - expectedRegistry.Register(expectedTotalUnparsableCounter) - expectedRegistry.Register(expectedRebootUnparsableCounter) - actualRegistry.Register(m.TotalUnparsableCount) - actualRegistry.Register(m.RebootUnparsableCount) - invalidDurationCalculator := new(mockDurationCalculator) invalidDurationCalculator.On("Calculate", mock.Anything, mock.Anything).Return(tc.err) @@ -121,16 +98,6 @@ func TestParseCalculationErr(t *testing.T) { } parser.Parse(event) - - if tc.expectedInc { - expectedTotalUnparsableCounter.WithLabelValues("test_reboot_parser").Inc() - expectedRebootUnparsableCounter.WithLabelValues(fwVal, hwVal, calculationErrReason).Inc() - } - - testAssert := touchtest.New(t) - testAssert.Expect(expectedRegistry) - assert.True(testAssert.GatherAndCompare(actualRegistry)) - }) } @@ -157,7 +124,7 @@ func TestParseValidationErr(t *testing.T) { Name: "rebootUnparsableEvents", Help: "rebootUnparsableEvents", }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, ), TotalUnparsableCount: prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -168,21 +135,6 @@ func TestParseValidationErr(t *testing.T) { ), } - expectedTotalUnparsableCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "totalUnparsableEvents", - Help: "totalUnparsableEvents", - }, - []string{parserLabel}, - ) - expectedRebootUnparsableCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "rebootUnparsableEvents", - Help: "rebootUnparsableEvents", - }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, - ) - client = new(mockEventClient) eventsParser = new(mockEventsParser) validParserValidator = new(mockParserValidator) @@ -203,21 +155,7 @@ func TestParseValidationErr(t *testing.T) { logger: zap.NewNop(), } - expectedRegistry := prometheus.NewPedanticRegistry() - actualRegistry := prometheus.NewPedanticRegistry() - expectedRegistry.Register(expectedTotalUnparsableCounter) - expectedRegistry.Register(expectedRebootUnparsableCounter) - actualRegistry.Register(m.TotalUnparsableCount) - actualRegistry.Register(m.RebootUnparsableCount) - - expectedTotalUnparsableCounter.WithLabelValues("test_reboot_parser").Inc() - expectedRebootUnparsableCounter.WithLabelValues(fwVal, hwVal, validationErrReason).Inc() - rebootParser.Parse(event) - testAssert := touchtest.New(t) - testAssert.Expect(expectedRegistry) - assert.True(t, testAssert.GatherAndCompare(actualRegistry)) - } func TestParseNotFullyManageable(t *testing.T) { @@ -352,7 +290,6 @@ func TestParseFatalErr(t *testing.T) { for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { var ( - assert = assert.New(t) totalUnparsableCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "totalUnparsableEvents", @@ -365,21 +302,7 @@ func TestParseFatalErr(t *testing.T) { Name: "rebootUnparsableEvents", Help: "rebootUnparsableEvents", }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, - ) - expectedTotalUnparsableCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "totalUnparsableEvents", - Help: "totalUnparsableEvents", - }, - []string{parserLabel}, - ) - expectedRebootUnparsableCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "rebootUnparsableEvents", - Help: "rebootUnparsableEvents", - }, - []string{firmwareLabel, hardwareLabel, reasonLabel}, + []string{firmwareLabel, hardwareLabel, partnerIDLabel, reasonLabel}, ) ) @@ -396,20 +319,7 @@ func TestParseFatalErr(t *testing.T) { client: client, } - expectedRegistry := prometheus.NewPedanticRegistry() - actualRegistry := prometheus.NewPedanticRegistry() - expectedRegistry.Register(expectedTotalUnparsableCounter) - expectedRegistry.Register(expectedRebootUnparsableCounter) - actualRegistry.Register(m.TotalUnparsableCount) - actualRegistry.Register(m.RebootUnparsableCount) - - expectedTotalUnparsableCounter.WithLabelValues("test_reboot_parser").Inc() - expectedRebootUnparsableCounter.WithLabelValues(fwVal, hwVal, fatalErrReason).Inc() - parser.Parse(tc.event) - testAssert := touchtest.New(t) - testAssert.Expect(expectedRegistry) - assert.True(testAssert.GatherAndCompare(actualRegistry)) }) } } diff --git a/go.mod b/go.mod index 3596fa9..acdf65d 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/xmidt-org/arrange v0.3.0 github.com/xmidt-org/bascule v0.10.1 github.com/xmidt-org/httpaux v0.2.1 - github.com/xmidt-org/interpreter v0.0.6 + github.com/xmidt-org/interpreter v0.0.7 github.com/xmidt-org/sallust v0.1.5 github.com/xmidt-org/touchstone v0.0.3 github.com/xmidt-org/webpa-common v1.11.5 diff --git a/go.sum b/go.sum index 002b417..0212adb 100644 --- a/go.sum +++ b/go.sum @@ -619,6 +619,8 @@ github.com/xmidt-org/interpreter v0.0.6-0.20210716221517-3e9e5d5fc4e5 h1:DMEquuF github.com/xmidt-org/interpreter v0.0.6-0.20210716221517-3e9e5d5fc4e5/go.mod h1:ATGm0T0u1X6sL0TLzNtDsz+Y4hba8SGXNSt8hABwseY= github.com/xmidt-org/interpreter v0.0.6 h1:8VPrAFwfl0+pzXCzWiERlqgVWLNn7TzKF+2dsDZXUT4= github.com/xmidt-org/interpreter v0.0.6/go.mod h1:ATGm0T0u1X6sL0TLzNtDsz+Y4hba8SGXNSt8hABwseY= +github.com/xmidt-org/interpreter v0.0.7 h1:Qiekg+YF0vGqRYSHKKd2iLmvXz0QjC5u9KeeqLQ6B+A= +github.com/xmidt-org/interpreter v0.0.7/go.mod h1:ATGm0T0u1X6sL0TLzNtDsz+Y4hba8SGXNSt8hABwseY= github.com/xmidt-org/sallust v0.1.5 h1:yf95DXZUYnS+Td3w+jV3oO7XmhMbViMYK0A/WVM4QYo= github.com/xmidt-org/sallust v0.1.5/go.mod h1:azcKBypudADIeZ3Em8zGjVq3yQ7n4ueSvM/degHMIxo= github.com/xmidt-org/themis v0.4.4 h1:KewitRxStW1xOehDBi0YyGZyRv3PjFdYUEDvQFf1Nmk=