From 220a4e23eda441ad1c4d3b7bafd0bdeda6260eb2 Mon Sep 17 00:00:00 2001 From: Dan Kortschak <90160302+efd6@users.noreply.github.com> Date: Wed, 6 Sep 2023 08:10:22 +0930 Subject: [PATCH] x-pack/filebeat/input/entityanalytics/provider/azuread: allow fine-grain control of API requests (#36441) This adds support for specifying which of users/devices to collect from the AzureAD API endpoints in order to reduce network costs for users who do not need a full set of entities. The current change does not change the behaviour of device collection of registered owners and registered users; when the "devices" dataset is selected there user entities will still be collected as they are considered here as an attribute of the device, rather than a component of the users dataset. --- CHANGELOG.next.asciidoc | 1 + .../inputs/input-entity-analytics.asciidoc | 9 ++ .../entityanalytics/provider/azuread/azure.go | 42 +++++++--- .../provider/azuread/azure_test.go | 83 +++++++++++++------ .../entityanalytics/provider/azuread/conf.go | 7 ++ .../provider/azuread/conf_test.go | 8 ++ 6 files changed, 112 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8b5204dd33c..9a0d56c5e2a 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -214,6 +214,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Make HTTPJSON response body decoding errors more informative. {pull}36481[36481] - Allow fine-grained control of entity analytics API requests for Okta provider. {issue}36440[36440] {pull}36492[36492] - Add support for expanding `journald.process.capabilities` into the human-readable effective capabilities in the ECS `process.thread.capabilities.effective` field. {issue}36454[36454] {pull}36470[36470] +- Allow fine-grained control of entity analytics API requests for AzureAD provider. {issue}36440[36440] {pull}36441[36441] *Auditbeat* diff --git a/x-pack/filebeat/docs/inputs/input-entity-analytics.asciidoc b/x-pack/filebeat/docs/inputs/input-entity-analytics.asciidoc index 2957b5b7b43..ce21d4e0c5e 100644 --- a/x-pack/filebeat/docs/inputs/input-entity-analytics.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-entity-analytics.asciidoc @@ -255,6 +255,7 @@ Example configuration: enabled: true id: azure-1 provider: azure-ad + dataset: "all" sync_interval: "12h" update_interval: "30m" client_id: "CLIENT_ID" @@ -279,6 +280,14 @@ The client/application ID. Used for authentication. Field is required. The secret value, used for authentication. Field is required. +[float] +===== `dataset` + +The datasets to collect from the API. This can be one of "all", "users" or "devices", +or may be left empty for the default behavior which is to collect all entities. +When the `dataset` is set to "devices", some user entity data is collected in order +to populate the registered users and registered owner fields for each device. + [float] ===== `sync_interval` diff --git a/x-pack/filebeat/input/entityanalytics/provider/azuread/azure.go b/x-pack/filebeat/input/entityanalytics/provider/azuread/azure.go index 73b641c3949..7db004237c9 100644 --- a/x-pack/filebeat/input/entityanalytics/provider/azuread/azure.go +++ b/x-pack/filebeat/input/entityanalytics/provider/azuread/azure.go @@ -9,6 +9,7 @@ import ( "context" "errors" "fmt" + "strings" "time" "github.com/google/uuid" @@ -267,19 +268,38 @@ func (p *azure) doFetch(ctx context.Context, state *stateStore, fullSync bool) ( groupsDeltaLink = state.groupsLink } - changedUsers, userLink, err := p.fetcher.Users(ctx, usersDeltaLink) - if err != nil { - return updatedUsers, updatedDevices, err - } - p.logger.Debugf("Received %d users from API", len(changedUsers)) - - changedDevices, deviceLink, err := p.fetcher.Devices(ctx, devicesDeltaLink) - if err != nil { - return updatedUsers, updatedDevices, err + var ( + changedUsers []*fetcher.User + userLink string + ) + switch strings.ToLower(p.conf.Dataset) { + case "", "all", "users": + changedUsers, userLink, err = p.fetcher.Users(ctx, usersDeltaLink) + if err != nil { + return updatedUsers, updatedDevices, err + } + p.logger.Debugf("Received %d users from API", len(changedUsers)) + default: + p.logger.Debugf("Skipping user collection from API: dataset=%s", p.conf.Dataset) + } + + var ( + changedDevices []*fetcher.Device + deviceLink string + ) + switch strings.ToLower(p.conf.Dataset) { + case "", "all", "devices": + changedDevices, deviceLink, err = p.fetcher.Devices(ctx, devicesDeltaLink) + if err != nil { + return updatedUsers, updatedDevices, err + } + p.logger.Debugf("Received %d devices from API", len(changedDevices)) + default: + p.logger.Debugf("Skipping device collection from API: dataset=%s", p.conf.Dataset) } - p.logger.Debugf("Received %d devices from API", len(changedUsers)) - // Get group changes. + // Get group changes. Groups are required for both users and devices. + // So always collect these. changedGroups, groupLink, err := p.fetcher.Groups(ctx, groupsDeltaLink) if err != nil { return updatedUsers, updatedDevices, err diff --git a/x-pack/filebeat/input/entityanalytics/provider/azuread/azure_test.go b/x-pack/filebeat/input/entityanalytics/provider/azuread/azure_test.go index 0b28e4131a8..c1fd2e0e2fd 100644 --- a/x-pack/filebeat/input/entityanalytics/provider/azuread/azure_test.go +++ b/x-pack/filebeat/input/entityanalytics/provider/azuread/azure_test.go @@ -6,6 +6,7 @@ package azuread import ( "context" + "fmt" "testing" "time" @@ -18,36 +19,64 @@ import ( ) func TestAzure_DoFetch(t *testing.T) { - dbFilename := "TestAzure_DoFetch.db" - store := testSetupStore(t, dbFilename) - t.Cleanup(func() { - testCleanupStore(store, dbFilename) - }) - - a := azure{ - logger: logp.L(), - auth: mockauth.New(""), - fetcher: mockfetcher.New(), + tests := []struct { + dataset string + wantUsers bool + wantDevices bool + }{ + {dataset: "", wantUsers: true, wantDevices: true}, + {dataset: "all", wantUsers: true, wantDevices: true}, + {dataset: "users", wantUsers: true, wantDevices: false}, + {dataset: "devices", wantUsers: false, wantDevices: true}, } - ss, err := newStateStore(store) - require.NoError(t, err) - defer ss.close(false) + for _, test := range tests { + t.Run(test.dataset, func(t *testing.T) { + suffix := test.dataset + if suffix != "" { + suffix = "_" + suffix + } + dbFilename := fmt.Sprintf("TestAzure_DoFetch%s.db", suffix) + store := testSetupStore(t, dbFilename) + t.Cleanup(func() { + testCleanupStore(store, dbFilename) + }) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - gotUsers, gotDevices, err := a.doFetch(ctx, ss, false) - require.NoError(t, err) + a := azure{ + conf: conf{Dataset: test.dataset}, + logger: logp.L(), + auth: mockauth.New(""), + fetcher: mockfetcher.New(), + } - var wantModifiedUsers collections.UUIDSet - for _, v := range mockfetcher.UserResponse { - wantModifiedUsers.Add(v.ID) - } - var wantModifiedDevices collections.UUIDSet - for _, v := range mockfetcher.DeviceResponse { - wantModifiedDevices.Add(v.ID) - } + ss, err := newStateStore(store) + require.NoError(t, err) + defer ss.close(false) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + gotUsers, gotDevices, err := a.doFetch(ctx, ss, false) + require.NoError(t, err) - require.Equal(t, wantModifiedUsers.Values(), gotUsers.Values()) - require.Equal(t, wantModifiedDevices.Values(), gotDevices.Values()) + var wantModifiedUsers collections.UUIDSet + for _, v := range mockfetcher.UserResponse { + wantModifiedUsers.Add(v.ID) + } + var wantModifiedDevices collections.UUIDSet + for _, v := range mockfetcher.DeviceResponse { + wantModifiedDevices.Add(v.ID) + } + + if test.wantUsers { + require.Equal(t, wantModifiedUsers.Values(), gotUsers.Values()) + } else { + require.Equal(t, 0, gotUsers.Len()) + } + if test.wantDevices { + require.Equal(t, wantModifiedDevices.Values(), gotDevices.Values()) + } else { + require.Equal(t, 0, gotDevices.Len()) + } + }) + } } diff --git a/x-pack/filebeat/input/entityanalytics/provider/azuread/conf.go b/x-pack/filebeat/input/entityanalytics/provider/azuread/conf.go index d2f649e0503..105b05cbbbb 100644 --- a/x-pack/filebeat/input/entityanalytics/provider/azuread/conf.go +++ b/x-pack/filebeat/input/entityanalytics/provider/azuread/conf.go @@ -6,6 +6,7 @@ package azuread import ( "errors" + "strings" "time" ) @@ -21,6 +22,7 @@ type conf struct { TenantID string `config:"tenant_id" validate:"required"` SyncInterval time.Duration `config:"sync_interval"` UpdateInterval time.Duration `config:"update_interval"` + Dataset string `config:"dataset"` } // Validate runs validation against the config. @@ -34,6 +36,11 @@ func (c *conf) Validate() error { if c.UpdateInterval == 0 { return errors.New("update_interval must not be zero") } + switch strings.ToLower(c.Dataset) { + case "", "all", "users", "devices": + default: + return errors.New("dataset must be 'all', 'users', 'devices' or empty") + } return nil } diff --git a/x-pack/filebeat/input/entityanalytics/provider/azuread/conf_test.go b/x-pack/filebeat/input/entityanalytics/provider/azuread/conf_test.go index bd020fd4b19..611d60f427d 100644 --- a/x-pack/filebeat/input/entityanalytics/provider/azuread/conf_test.go +++ b/x-pack/filebeat/input/entityanalytics/provider/azuread/conf_test.go @@ -27,6 +27,14 @@ func TestConf_Validate(t *testing.T) { }, WantErr: "sync_interval must be longer than update_interval", }, + "err-invalid-dataset": { + In: conf{ + SyncInterval: defaultSyncInterval, + UpdateInterval: defaultUpdateInterval, + Dataset: "everything", + }, + WantErr: "dataset must be 'all', 'users', 'devices' or empty", + }, } for name, tc := range tests {