From dc5aabe23a03490f1ed6a5ebb44c828fdf43d28d Mon Sep 17 00:00:00 2001 From: anjali9791 Date: Thu, 5 Oct 2023 08:36:11 +0530 Subject: [PATCH] feat: enable included fields feature for search assets api (#61) Co-authored-by: anjali.agarwal --- core/asset/discovery.go | 3 + internal/server/v1beta1/search.go | 15 ++-- .../discovery_search_repository.go | 14 +++- .../discovery_search_repository_test.go | 80 +++++++++++++++---- 4 files changed, 86 insertions(+), 26 deletions(-) diff --git a/core/asset/discovery.go b/core/asset/discovery.go index cc6f5ad5..ba2fd2c0 100644 --- a/core/asset/discovery.go +++ b/core/asset/discovery.go @@ -70,6 +70,9 @@ type SearchConfig struct { // Offset parameter defines the offset from the first result you want to fetch // Note that MaxResults + Offset can not be more than the `index.max_result_window` index setting in ES cluster, which defaults to 10,000 Offset int + + // IncludeFields specifies the fields to return in response + IncludeFields []string } // SearchResult represents an item/result in a list of search results diff --git a/internal/server/v1beta1/search.go b/internal/server/v1beta1/search.go index 1c33c8ca..b35a851f 100644 --- a/internal/server/v1beta1/search.go +++ b/internal/server/v1beta1/search.go @@ -23,13 +23,14 @@ func (server *APIServer) SearchAssets(ctx context.Context, req *compassv1beta1.S } cfg := asset.SearchConfig{ - Text: text, - MaxResults: int(req.GetSize()), - Filters: filterConfigFromValues(req.GetFilter()), - RankBy: req.GetRankby(), - Queries: req.GetQuery(), - Flags: getSearchFlagsFromFlags(req.GetFlags()), - Offset: int(req.GetOffset()), + Text: text, + MaxResults: int(req.GetSize()), + Filters: filterConfigFromValues(req.GetFilter()), + RankBy: req.GetRankby(), + Queries: req.GetQuery(), + Flags: getSearchFlagsFromFlags(req.GetFlags()), + Offset: int(req.GetOffset()), + IncludeFields: req.GetIncludeFields(), } results, err := server.assetService.SearchAssets(ctx, cfg) diff --git a/internal/store/elasticsearch/discovery_search_repository.go b/internal/store/elasticsearch/discovery_search_repository.go index cff8ff45..6ac7af94 100644 --- a/internal/store/elasticsearch/discovery_search_repository.go +++ b/internal/store/elasticsearch/discovery_search_repository.go @@ -22,13 +22,13 @@ const ( suggesterName = "name-phrase-suggest" ) -var returnedAssetFieldsResult = []string{"id", "urn", "type", "service", "name", "description", "data", "labels", "created_at", "updated_at"} - // Search the asset store func (repo *DiscoveryRepository) Search(ctx context.Context, cfg asset.SearchConfig) (results []asset.SearchResult, err error) { if strings.TrimSpace(cfg.Text) == "" { return nil, asset.DiscoveryError{Op: "Search", Err: errors.New("search text cannot be empty")} } + var returnedAssetFieldsResult []string + maxResults := cfg.MaxResults if maxResults <= 0 { maxResults = defaultMaxResults @@ -38,6 +38,15 @@ func (repo *DiscoveryRepository) Search(ctx context.Context, cfg asset.SearchCon offset = 0 } + if len(cfg.IncludeFields) == 0 { + returnedAssetFieldsResult = []string{ + "id", "urn", "type", "service", "name", "description", "data", "labels", + "created_at", "updated_at", + } + } else { + returnedAssetFieldsResult = cfg.IncludeFields + } + defer func(start time.Time) { const op = "search" repo.cli.instrumentOp(ctx, instrumentParams{ @@ -350,6 +359,7 @@ func toSearchResults(hits []searchHit) []asset.SearchResult { "_highlight": hit.HighLight, } } + results[i] = asset.SearchResult{ Type: r.Type.String(), ID: id, diff --git a/internal/store/elasticsearch/discovery_search_repository_test.go b/internal/store/elasticsearch/discovery_search_repository_test.go index a59b37cb..cf387845 100644 --- a/internal/store/elasticsearch/discovery_search_repository_test.go +++ b/internal/store/elasticsearch/discovery_search_repository_test.go @@ -58,6 +58,7 @@ func TestSearcherSearch(t *testing.T) { Type string AssetID string Data map[string]interface{} + Service string } type searchTest struct { Description string @@ -68,10 +69,38 @@ func TestSearcherSearch(t *testing.T) { tests := []searchTest{ { - Description: "should fetch assets which has text in any of its fields", + Description: "should fetch assets with fields mentioned in included fields", + Config: asset.SearchConfig{ + Text: "topic", + IncludeFields: []string{"id", "data.company", "type"}, + }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic", Data: map[string]interface{}{"company": "gotocompany"}}, + {Type: "topic", AssetID: "order-topic", Data: map[string]interface{}{"company": "gotocompany"}}, + {Type: "topic", AssetID: "purchase-topic", Data: map[string]interface{}{"company": "microsoft"}}, + {Type: "topic", AssetID: "consumer-mq-2", Data: map[string]interface{}{"company": "gotocompany"}}, + {Type: "topic", AssetID: "transaction", Data: map[string]interface{}{"company": "gotocompany"}}, + }, + }, + { + Description: "should fetch assets with default fields if included fields is empty", Config: asset.SearchConfig{ Text: "topic", }, + Expected: []expectedRow{ + {Type: "topic", AssetID: "consumer-topic", Service: "rabbitmq", Data: map[string]interface{}{"company": "gotocompany", "country": "id", "description": "Update on every rabbitmq customer creation/update", "environment": "production", "partition": float64(50), "topic_name": "consumer-topic"}}, + {Type: "topic", AssetID: "order-topic", Service: "kafka", Data: map[string]interface{}{"company": "gotocompany", "country": "us", "description": "Topic for each submitted order", "environment": "integration", "malformed": "2023-05-23T04:35:42Z", "partition": float64(250), "topic_name": "order-topic"}}, + {Type: "topic", AssetID: "purchase-topic", Service: "kafka", Data: map[string]interface{}{"company": "microsoft", "country": "id", "description": "Topic for each submitted purchase", "environment": "integration", "malformed": "", "partition": float64(100), "topic_name": "purchase-topic"}}, + {Type: "topic", AssetID: "consumer-mq-2", Service: "rabbitmq", Data: map[string]interface{}{"company": "gotocompany", "country": "id", "description": "Another rabbitmq topic", "environment": "production", "partition": float64(50), "topic_name": "consumer-mq-2"}}, + {Type: "topic", AssetID: "transaction", Service: "rabbitmq", Data: map[string]interface{}{"company": "gotocompany", "description": "This publishes all the invoices from each of invoice storage where the invoice will be filtered and checked using invoice filterer and invoice checker", "environment": "production", "partition": float64(1), "topic_name": "transaction"}}, + }, + }, + { + Description: "should fetch assets which has text in any of its fields", + Config: asset.SearchConfig{ + Text: "topic", + IncludeFields: []string{"type", "id"}, + }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, {Type: "topic", AssetID: "order-topic"}, @@ -83,7 +112,8 @@ func TestSearcherSearch(t *testing.T) { { Description: "should enable fuzzy search", Config: asset.SearchConfig{ - Text: "tpic", + Text: "tpic", + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, @@ -96,15 +126,17 @@ func TestSearcherSearch(t *testing.T) { { Description: "should disable fuzzy search", Config: asset.SearchConfig{ - Text: "tpic", - Flags: asset.SearchFlags{DisableFuzzy: true}, + Text: "tpic", + Flags: asset.SearchFlags{DisableFuzzy: true}, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{}, }, { Description: "should put more weight on id fields", Config: asset.SearchConfig{ - Text: "invoice", + Text: "invoice", + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "table", AssetID: "us1-apple-invoice"}, @@ -119,6 +151,7 @@ func TestSearcherSearch(t *testing.T) { Filters: map[string][]string{ "service": {"rabbitmq", "postgres"}, }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "table", AssetID: "au2-microsoft-invoice"}, @@ -132,6 +165,7 @@ func TestSearcherSearch(t *testing.T) { Filters: map[string][]string{ "data.company": {"gotocompany"}, }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, @@ -149,6 +183,7 @@ func TestSearcherSearch(t *testing.T) { "data.environment": {"production"}, "data.company": {"gotocompany"}, }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, @@ -162,6 +197,7 @@ func TestSearcherSearch(t *testing.T) { Filters: map[string][]string{ "owners.email": {"john.doe@email.com"}, }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, @@ -170,8 +206,9 @@ func TestSearcherSearch(t *testing.T) { { Description: "should return a descendingly sorted based on usage count in search results if rank by usage in the config", Config: asset.SearchConfig{ - Text: "bigquery", - RankBy: "data.profile.usage_count", + Text: "bigquery", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, @@ -190,6 +227,7 @@ func TestSearcherSearch(t *testing.T) { "description": "rabbitmq", "owners.email": "john.doe", }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, @@ -198,9 +236,10 @@ func TestSearcherSearch(t *testing.T) { { Description: "should return 5 records with offset of 0", Config: asset.SearchConfig{ - Text: "topic", - Offset: 0, - MaxResults: 5, + Text: "topic", + Offset: 0, + MaxResults: 5, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "topic", AssetID: "consumer-topic"}, @@ -213,9 +252,10 @@ func TestSearcherSearch(t *testing.T) { { Description: "should return 4 records with offset of 1", Config: asset.SearchConfig{ - Text: "topic", - Offset: 1, - MaxResults: 5, + Text: "topic", + Offset: 1, + MaxResults: 5, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ // {Type: "topic", AssetID: "consumer-topic"}, @@ -232,6 +272,7 @@ func TestSearcherSearch(t *testing.T) { Queries: map[string]string{ "data.schema.columns.name": "common", }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-common"}, @@ -241,8 +282,9 @@ func TestSearcherSearch(t *testing.T) { { Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if searched for text 'tablename-abc-common-test'", Config: asset.SearchConfig{ - Text: "tablename-abc-common-test", - RankBy: "data.profile.usage_count", + Text: "tablename-abc-common-test", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, @@ -261,6 +303,7 @@ func TestSearcherSearch(t *testing.T) { Flags: asset.SearchFlags{ EnableHighlight: true, }, + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ @@ -284,8 +327,9 @@ func TestSearcherSearch(t *testing.T) { Description: "should return 'bigquery::gcpproject/dataset/tablename-abc-common-test' resource on top if " + "searched for text 'abc-test' as it has both the keywords searched", Config: asset.SearchConfig{ - Text: "abc-test", - RankBy: "data.profile.usage_count", + Text: "abc-test", + RankBy: "data.profile.usage_count", + IncludeFields: []string{"type", "id"}, }, Expected: []expectedRow{ {Type: "table", AssetID: "bigquery::gcpproject/dataset/tablename-abc-common-test"}, @@ -302,6 +346,8 @@ func TestSearcherSearch(t *testing.T) { for i, res := range test.Expected { assert.Equal(t, res.Type, results[i].Type) assert.Equal(t, res.AssetID, results[i].ID) + assert.Equal(t, res.Data, results[i].Data) + assert.Equal(t, res.Service, results[i].Service) if test.Config.Flags.EnableHighlight { assert.Equal(t, res.Data["_highlight"], results[i].Data["_highlight"]) }