-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor structure to allow Batch Transformations
- Loading branch information
1 parent
31a4b09
commit 3c8e391
Showing
20 changed files
with
510 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/** | ||
* Copyright (c) 2020-present Snowplow Analytics Ltd. | ||
* All rights reserved. | ||
* | ||
* This software is made available by Snowplow Analytics, Ltd., | ||
* under the terms of the Snowplow Limited Use License Agreement, Version 1.0 | ||
* located at https://docs.snowplow.io/limited-use-license-1.0 | ||
* BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION | ||
* OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. | ||
*/ | ||
|
||
package batchtransform | ||
|
||
import "github.com/snowplow/snowbridge/pkg/models" | ||
|
||
// BatchTransformationFunction is a transformation function which operates across a batch of events | ||
// It takes a batch as an input, and returns a successful batch and a slice of invalid messages | ||
type BatchTransformationFunction func([]models.MessageBatch) (success []models.MessageBatch, invalid []*models.Message, oversized []*models.Message) | ||
|
||
// BatchTransformationApplyFunction combines batch into one callable function | ||
type BatchTransformationApplyFunction func([]*models.Message, []BatchTransformationFunction, []BatchTransformationFunction) models.BatchTransformationResult | ||
|
||
// BatchTransformationGenerator returns a BatchTransformationApplyFunction from a provided set of BatchTransformationFunctions | ||
type BatchTransformationGenerator func(...BatchTransformationFunction) BatchTransformationApplyFunction | ||
|
||
// NewBatchTransformation constructs a function which applies all transformations to all messages, returning a TransformationResult. | ||
func NewBatchTransformation(tranformFunctions ...BatchTransformationFunction) BatchTransformationApplyFunction { | ||
// pre is a function to be run before the configured ones, post is to be run after. | ||
// This is done because sometimes functions need to _always_ run first or last, depending on the specific target logic. (eg. batching by dynamic headers, if configured) | ||
// pre and post functions are intended for use only in the implementations of targets. | ||
return func(messages []*models.Message, pre []BatchTransformationFunction, post []BatchTransformationFunction) models.BatchTransformationResult { | ||
// make a batch to begin with | ||
success := []models.MessageBatch{{OriginalMessages: messages}} | ||
|
||
// Because http will require specific functions to always go first and last, we provide these here | ||
// Compiler gets confused if we don't rename. | ||
functionsToRun := append(pre, tranformFunctions...) | ||
functionsToRun = append(functionsToRun, post...) | ||
|
||
// If no transformations, just return a result | ||
if len(functionsToRun) == 0 { | ||
return models.BatchTransformationResult{Success: success} | ||
} | ||
|
||
var invalid []*models.Message | ||
var oversized []*models.Message | ||
invalidList := make([]*models.Message, 0, len(messages)) | ||
oversizedList := make([]*models.Message, 0, len(messages)) | ||
// Run each transformation | ||
for _, transformFunction := range functionsToRun { | ||
// success is recomputed each time into a complete list of batches | ||
success, invalid, oversized = transformFunction(success) | ||
// Invalids are excluded each iteration so must be appended to a permanent list | ||
invalidList = append(invalidList, invalid...) | ||
|
||
oversizedList = append(oversizedList, oversized...) | ||
} | ||
|
||
return models.BatchTransformationResult{Success: success, Invalid: invalidList, Oversized: oversizedList} | ||
} | ||
} |
58 changes: 58 additions & 0 deletions
58
pkg/batchtransform/batchtransformconfig/batch_transform_config.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/** | ||
* Copyright (c) 2020-present Snowplow Analytics Ltd. | ||
* All rights reserved. | ||
* | ||
* This software is made available by Snowplow Analytics, Ltd., | ||
* under the terms of the Snowplow Limited Use License Agreement, Version 1.0 | ||
* located at https://docs.snowplow.io/limited-use-license-1.0 | ||
* BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION | ||
* OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. | ||
*/ | ||
|
||
package batchtransformconfig | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/snowplow/snowbridge/config" | ||
"github.com/snowplow/snowbridge/pkg/batchtransform" | ||
) | ||
|
||
// SupportedTransformations is a ConfigurationPair slice containing all the officially supported transformations. | ||
var SupportedTransformations = []config.ConfigurationPair{ | ||
// TODO: Add config implementations & put them here | ||
} | ||
|
||
// GetBatchTransformations builds and returns transformationApplyFunction | ||
// from the transformations configured. | ||
func GetBatchTransformations(c *config.Config, supportedTransformations []config.ConfigurationPair) (batchtransform.BatchTransformationApplyFunction, error) { | ||
funcs := make([]batchtransform.BatchTransformationFunction, 0) | ||
|
||
for _, transformation := range c.Data.BatchTransformations { | ||
|
||
useTransf := transformation.Use | ||
decoderOpts := &config.DecoderOptions{ | ||
Input: useTransf.Body, | ||
} | ||
|
||
var component interface{} | ||
var err error | ||
for _, pair := range supportedTransformations { | ||
if pair.Name == useTransf.Name { | ||
plug := pair.Handle | ||
component, err = c.CreateComponent(plug, decoderOpts) | ||
if err != nil { | ||
return nil, err | ||
} | ||
} | ||
} | ||
|
||
f, ok := component.(batchtransform.BatchTransformationFunction) | ||
if !ok { | ||
return nil, fmt.Errorf("could not interpret transformation configuration for %q", useTransf.Name) | ||
} | ||
funcs = append(funcs, f) | ||
} | ||
|
||
return batchtransform.NewBatchTransformation(funcs...), nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/** | ||
* Copyright (c) 2020-present Snowplow Analytics Ltd. | ||
* All rights reserved. | ||
* | ||
* This software is made available by Snowplow Analytics, Ltd., | ||
* under the terms of the Snowplow Limited Use License Agreement, Version 1.0 | ||
* located at https://docs.snowplow.io/limited-use-license-1.0 | ||
* BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION | ||
* OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. | ||
*/ | ||
|
||
package batchtransform | ||
|
||
import ( | ||
"bytes" | ||
"encoding/json" | ||
"text/template" | ||
|
||
"github.com/pkg/errors" | ||
"github.com/snowplow/snowbridge/pkg/models" | ||
) | ||
|
||
// TemplaterBatchTransformationFunction is a thing TODO add desc | ||
func TemplaterBatchTransformationFunction(batches []models.MessageBatch) ([]models.MessageBatch, []*models.Message) { | ||
|
||
// This is just an outline implementation of a templater function, to help figure out the design of batch transforms in general | ||
|
||
// The templater would fit here along the following lines: | ||
const templ = `{ | ||
attributes: [ {{$first_1 := true}} | ||
{{range .}}{{if $first_1}}{{$first_1 = false}}{{else}},{{end}} | ||
{{printf "%s" .attribute_data}}{{end}} | ||
], | ||
events: [ {{$first_2 := true}} | ||
{{range .}}{{if $first_2}}{{$first_2 = false}}{{else}},{{end}} | ||
{{printf "%s" .event_data}}{{end}} | ||
] | ||
}` | ||
|
||
invalid := make([]*models.Message, 0) | ||
safe := make([]*models.Message, 0) | ||
|
||
for _, b := range batches { | ||
formatted := []map[string]json.RawMessage{} | ||
for _, msg := range b.OriginalMessages { | ||
// Use json.RawMessage to ensure templating format works (real implementation has a problem to figure out here) | ||
var asMap map[string]json.RawMessage | ||
|
||
if err := json.Unmarshal(msg.Data, &asMap); err != nil { | ||
msg.SetError(errors.Wrap(err, "templater error")) // TODO: Cleanup! | ||
invalid = append(invalid, msg) | ||
continue | ||
} | ||
|
||
formatted = append(formatted, asMap) | ||
} | ||
var buf bytes.Buffer | ||
|
||
t := template.Must(template.New("example").Parse(templ)) | ||
if err := t.Execute(&buf, formatted); err != nil { | ||
for _, msg := range safe { | ||
msg.SetError(errors.Wrap(err, "templater error")) // TODO: Cleanup! | ||
invalid = append(invalid, msg) | ||
} | ||
return nil, invalid | ||
} | ||
|
||
// Assign the templated request to the HTTPRequestBody field | ||
b.BatchData = buf.Bytes() | ||
|
||
} | ||
|
||
return batches, invalid | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/** | ||
* Copyright (c) 2020-present Snowplow Analytics Ltd. | ||
* All rights reserved. | ||
* | ||
* This software is made available by Snowplow Analytics, Ltd., | ||
* under the terms of the Snowplow Limited Use License Agreement, Version 1.0 | ||
* located at https://docs.snowplow.io/limited-use-license-1.0 | ||
* BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION | ||
* OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. | ||
*/ | ||
|
||
package models | ||
|
||
import "time" | ||
|
||
// MessageBatch houses batches of messages, for batch transformations to operate across | ||
type MessageBatch struct { | ||
OriginalMessages []*Message // Most targets will use the data from here, but where we have a http templating transformation, we would use this to ack batches of messages | ||
BatchData []byte // Where we template http requests, we use this to define the body of the request | ||
HTTPHeaders map[string]string // For dynamic headers feature | ||
TimeRequestStarted time.Time | ||
TimeRequestFinished time.Time | ||
} | ||
|
||
// BatchTransformationResult houses the result of a batch transformation | ||
type BatchTransformationResult struct { | ||
Success []MessageBatch | ||
Invalid []*Message | ||
Oversized []*Message | ||
} |
Oops, something went wrong.