Skip to content

Commit

Permalink
fix: de-duplicate php package data (#871)
Browse files Browse the repository at this point in the history
adds a filter for package data already sent during the current
connection instance to prevent sending duplicate data between harvests.
  • Loading branch information
bduranleau-nr authored Apr 18, 2024
1 parent 8f0ae2c commit e6959a4
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 8 deletions.
85 changes: 81 additions & 4 deletions daemon/internal/newrelic/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ type App struct {
HarvestTrigger HarvestTriggerFunc
LastActivity time.Time
Rules MetricRules
PhpPackages map[PhpPackagesKey]struct{}
}

func (app *App) String() string {
Expand Down Expand Up @@ -180,6 +181,7 @@ func NewApp(info *AppInfo) *App {
info: info,
HarvestTrigger: nil,
LastActivity: now,
PhpPackages: make(map[PhpPackagesKey]struct{}),
}
}

Expand Down Expand Up @@ -303,10 +305,10 @@ func (app *App) NeedsConnectAttempt(now time.Time, backoff time.Duration) bool {
return false
}

//Since span events are not included in Faster Event Harvest due to concerns
//about downsampling within a distributed trace, the report period and harvest
//limit are reported separately in span_event_harvest_config instead of
//event_harvest_config. Combine them both into EventHarvestConfig here.
// Since span events are not included in Faster Event Harvest due to concerns
// about downsampling within a distributed trace, the report period and harvest
// limit are reported separately in span_event_harvest_config instead of
// event_harvest_config. Combine them both into EventHarvestConfig here.
func combineEventConfig(ehc collector.EventHarvestConfig, sehc collector.SpanEventHarvestConfig) collector.EventHarvestConfig {
ehc.EventConfigs.SpanEventConfig.Limit = sehc.SpanEventConfig.Limit
ehc.EventConfigs.SpanEventConfig.ReportPeriod = sehc.SpanEventConfig.ReportPeriod
Expand Down Expand Up @@ -338,3 +340,78 @@ func (app *App) Inactive(threshold time.Duration) bool {
}
return time.Since(app.LastActivity) > threshold
}

// filter seen php packages data to avoid sending duplicates
//
// the `App` structure contains a map of PHP Packages the reporting
// application has encountered.
//
// the map of packages should persist for the duration of the
// current connection
//
// takes the `PhpPackages.data` byte array as input and unmarshals
// into an anonymous interface array
//
// the JSON format received from the agent is:
//
// [["package_name","version",{}],...]
//
// for each entry, assign the package name and version to the `PhpPackagesKey`
// struct and use the key to verify data does not exist in the map. If the
// key does not exist, add it to the map and the array of 'new' packages.
//
// convert the array of 'new' packages into a byte array representing
// the expected data that should match input, minus the duplicates.
func (app *App) filterPhpPackages(data []byte) []byte {
if data == nil {
return nil
}

var pkgKey PhpPackagesKey
var newPkgs []PhpPackagesKey
var x []interface{}

err := json.Unmarshal(data, &x)
if nil != err {
log.Errorf("failed to unmarshal php package json: %s", err)
return nil
}

for _, pkgJson := range x {
pkg, _ := pkgJson.([]interface{})
if len(pkg) != 3 {
log.Errorf("invalid php package json structure: %+v", pkg)
return nil
}
name, ok := pkg[0].(string)
version, ok := pkg[1].(string)
pkgKey = PhpPackagesKey{name, version}
_, ok = app.PhpPackages[pkgKey]
if !ok {
app.PhpPackages[pkgKey] = struct{}{}
newPkgs = append(newPkgs, pkgKey)
}
}

if newPkgs == nil {
return nil
}

buf := &bytes.Buffer{}
buf.WriteString(`[`)
for _, pkg := range newPkgs {
buf.WriteString(`["`)
buf.WriteString(pkg.Name)
buf.WriteString(`","`)
buf.WriteString(pkg.Version)
buf.WriteString(`",{}],`)
}

resJson := buf.Bytes()

// swap last ',' character with ']'
resJson = resJson[:len(resJson)-1]
resJson = append(resJson, ']')

return resJson
}
51 changes: 51 additions & 0 deletions daemon/internal/newrelic/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,54 @@ func TestMaxPayloadSizeInBytesFromConnectReply(t *testing.T) {
t.Errorf("parseConnectReply(something), got [%v], expected [%v]", c.MaxPayloadSizeInBytes, expectedMaxPayloadSizeInBytes)
}
}

func TestFilterPhpPackages(t *testing.T) {
app := App{
PhpPackages: make(map[PhpPackagesKey]struct{}),
}
var nilData []byte = nil
emptyData := []byte(`[[{}]]`)
validData := []byte(`[["drupal","6.0",{}]]`)
moreValidData := []byte(`[["wordpress","7.0",{}],["symfony","5.1",{}]]`)
duplicateData := []byte(`[["drupal","6.0",{}]]`)
versionData := []byte(`[["drupal","9.0",{}]]`)
invalidData := []byte(`[[["1","2","3"],["4","5"]{}]]`)

filteredData := app.filterPhpPackages(nilData)
if filteredData != nil {
t.Errorf("expected 'nil' result on 'nil' input, got [%v]", filteredData)
}

filteredData = app.filterPhpPackages(emptyData)
if filteredData != nil {
t.Errorf("expected 'nil' result on empty data input, got [%v]", filteredData)
}

expect := []byte(`[["drupal","6.0",{}]]`)
filteredData = app.filterPhpPackages(validData)
if string(filteredData) != string(expect) {
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
}

expect = []byte(`[["wordpress","7.0",{}],["symfony","5.1",{}]]`)
filteredData = app.filterPhpPackages(moreValidData)
if string(filteredData) != string(expect) {
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
}

filteredData = app.filterPhpPackages(duplicateData)
if filteredData != nil {
t.Errorf("expected 'nil', got [%v]", filteredData)
}

expect = []byte(`[["drupal","9.0",{}]]`)
filteredData = app.filterPhpPackages(versionData)
if string(filteredData) != string(expect) {
t.Errorf("expected [%v], got [%v]", string(expect), string(filteredData))
}

filteredData = app.filterPhpPackages(invalidData)
if filteredData != nil {
t.Errorf("expected 'nil', go [%v]", filteredData)
}
}
15 changes: 15 additions & 0 deletions daemon/internal/newrelic/harvest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,19 @@ func TestHarvestEmpty(t *testing.T) {
if h.empty() {
t.Errorf("Harvest.empty() = true, want false")
}

// verify that php packages does not send harvest when data is nil
h = NewHarvest(startTime, collector.NewHarvestLimits(nil))
h.PhpPackages.AddPhpPackagesFromData(nil)
if !h.empty() {
t.Errorf("Harvest.empty = false, want true")
}

// verify that valid php package data sends a harvest
h = NewHarvest(startTime, collector.NewHarvestLimits(nil))
h.PhpPackages.AddPhpPackagesFromData([]byte(`[["testpackage","testversion",{}]]`))
if h.empty() {
t.Errorf("Harvest.empty = true, want false")
}

}
5 changes: 5 additions & 0 deletions daemon/internal/newrelic/php_packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ import (
"github.com/newrelic/newrelic-php-agent/daemon/internal/newrelic/log"
)

type PhpPackagesKey struct {
Name string
Version string
}

// phpPackages represents all detected packages reported by an agent.
type PhpPackages struct {
numSeen int
Expand Down
3 changes: 3 additions & 0 deletions daemon/internal/newrelic/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,8 @@ func harvestByType(ah *AppHarvest, args *harvestArgs, ht HarvestType, du_chan ch
// In such cases, harvest all types and return.
if ht&HarvestAll == HarvestAll {
ah.Harvest = NewHarvest(time.Now(), ah.App.connectReply.EventHarvestConfig.EventConfigs)
// filter already seen php packages
harvest.PhpPackages.data = ah.App.filterPhpPackages(harvest.PhpPackages.data)
if args.blocking {
// Invoked primarily by CleanExit
harvestAll(harvest, args, ah.connectReply.EventHarvestConfig, ah.TraceObserver, du_chan)
Expand All @@ -698,6 +700,7 @@ func harvestByType(ah *AppHarvest, args *harvestArgs, ht HarvestType, du_chan ch
slowSQLs := harvest.SlowSQLs
txnTraces := harvest.TxnTraces
phpPackages := harvest.PhpPackages
phpPackages.data = ah.App.filterPhpPackages(phpPackages.data)

harvest.Metrics = NewMetricTable(limits.MaxMetrics, time.Now())
harvest.Errors = NewErrorHeap(limits.MaxErrors)
Expand Down
10 changes: 6 additions & 4 deletions daemon/internal/newrelic/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ var (
sampleSpanEvent = []byte("belated birthday")
sampleLogEvent = []byte("log event test birthday")
sampleErrorEvent = []byte("forgotten birthday")
samplePhpPackages = []byte(`["package", "1.2.3",{}]`)
samplePhpPackages = []byte(`[["package","1.2.3",{}]]`)
)

type ClientReturn struct {
Expand Down Expand Up @@ -297,9 +297,11 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
// collect php packages
m.clientReturn <- ClientReturn{nil, nil, 202}
cp_pkgs := <-m.clientParams

// collect metrics
m.clientReturn <- ClientReturn{nil, nil, 202}
cp_metrics := <-m.clientParams

// collect usage metrics
m.clientReturn <- ClientReturn{nil, nil, 202}
cp_usage := <-m.clientParams
Expand All @@ -308,7 +310,7 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {

// check pkgs and metric data - it appears these can
// come in different orders so check both
toTestPkgs := `["Jars",["package", "1.2.3",{}]]`
toTestPkgs := `["Jars",[["package","1.2.3",{}]]]`
if toTestPkgs != string(cp_pkgs.data) {
if toTestPkgs != string(cp_metrics.data) {
t.Fatalf("packages data: expected '%s', got '%s'", toTestPkgs, string(cp_pkgs.data))
Expand All @@ -318,9 +320,9 @@ func TestProcessorHarvestDefaultDataPhpPackages(t *testing.T) {
time1 := strings.Split(string(cp_usage.data), ",")[1]
time2 := strings.Split(string(cp_usage.data), ",")[2]
usageMetrics := `["one",` + time1 + `,` + time2 + `,` +
`[[{"name":"Supportability/C/Collector/Output/Bytes"},[2,1285,0,0,0,0]],` +
`[[{"name":"Supportability/C/Collector/Output/Bytes"},[2,1286,0,0,0,0]],` +
`[{"name":"Supportability/C/Collector/metric_data/Output/Bytes"},[1,1253,0,0,0,0]],` +
`[{"name":"Supportability/C/Collector/update_loaded_modules/Output/Bytes"},[1,32,0,0,0,0]]]]`
`[{"name":"Supportability/C/Collector/update_loaded_modules/Output/Bytes"},[1,33,0,0,0,0]]]]`
if got, _ := OrderScrubMetrics(cp_usage.data, nil); string(got) != usageMetrics {
t.Fatalf("metrics data: expected '%s', got '%s'", string(usageMetrics), string(got))
}
Expand Down

0 comments on commit e6959a4

Please sign in to comment.