Skip to content

Commit

Permalink
Remove benchmark generate corpus command (#1553)
Browse files Browse the repository at this point in the history
* remove benchmark generate coprus command

* minor fixes in benhcmark rally

* make check-statis

* fix docs for benchmark rally command
  • Loading branch information
Andrea Spacca authored Nov 13, 2023
1 parent 2c241f1 commit b2251c5
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 576 deletions.
10 changes: 0 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,16 +165,6 @@ These benchmarks allow you to benchmark an integration end to end.
For details on how to configure system benchmarks for a package, review the [HOWTO guide](./docs/howto/system_benchmarking.md).
### `elastic-package benchmark generate-corpus`
_Context: package_
*BEWARE*: this command is in beta and it's behaviour may change in the future.
Use this command to generate benchmarks corpus data for a package.
Currently, only data for what we have related assets on https://github.com/elastic/elastic-integration-corpus-generator-tool are supported.
For details on how to run this command, review the [HOWTO guide](./docs/howto/generate_corpus.md).

### `elastic-package benchmark pipeline`
_Context: package_
Expand Down
93 changes: 0 additions & 93 deletions cmd/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ import (
"strings"
"time"

"github.com/dustin/go-humanize"

"github.com/elastic/elastic-package/internal/corpusgenerator"
"github.com/elastic/elastic-package/internal/elasticsearch"
"github.com/elastic/elastic-package/internal/install"
"github.com/elastic/elastic-package/internal/logger"
Expand All @@ -36,12 +33,6 @@ import (
"github.com/elastic/elastic-package/internal/testrunner"
)

const generateLongDescription = `
*BEWARE*: this command is in beta and it's behaviour may change in the future.
Use this command to generate benchmarks corpus data for a package.
Currently, only data for what we have related assets on https://github.com/elastic/elastic-integration-corpus-generator-tool are supported.
For details on how to run this command, review the [HOWTO guide](./docs/howto/generate_corpus.md).`

const benchLongDescription = `Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
#### Pipeline Benchmarks
Expand Down Expand Up @@ -80,9 +71,6 @@ func setupBenchmarkCommand() *cobraext.Command {
systemCmd := getSystemCommand()
cmd.AddCommand(systemCmd)

generateCorpusCmd := getGenerateCorpusCommand()
cmd.AddCommand(generateCorpusCmd)

return cobraext.NewCommand(cmd, cobraext.ContextPackage)
}

Expand Down Expand Up @@ -257,16 +245,6 @@ func rallyCommandAction(cmd *cobra.Command, args []string) error {
return cobraext.FlagParsingError(err, cobraext.BenchNameFlagName)
}

deferCleanup, err := cmd.Flags().GetDuration(cobraext.DeferCleanupFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.DeferCleanupFlagName)
}

metricsInterval, err := cmd.Flags().GetDuration(cobraext.BenchMetricsIntervalFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.BenchMetricsIntervalFlagName)
}

dataReindex, err := cmd.Flags().GetBool(cobraext.BenchReindexToMetricstoreFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.BenchReindexToMetricstoreFlagName)
Expand Down Expand Up @@ -314,8 +292,6 @@ func rallyCommandAction(cmd *cobra.Command, args []string) error {
withOpts := []rally.OptionFunc{
rally.WithVariant(variant),
rally.WithBenchmarkName(benchName),
rally.WithDeferCleanup(deferCleanup),
rally.WithMetricsInterval(metricsInterval),
rally.WithDataReindexing(dataReindex),
rally.WithPackageRootPath(packageRootPath),
rally.WithESAPI(esClient.API),
Expand Down Expand Up @@ -496,75 +472,6 @@ func systemCommandAction(cmd *cobra.Command, args []string) error {
return nil
}

func getGenerateCorpusCommand() *cobra.Command {
generateCorpusCmd := &cobra.Command{
Use: "generate-corpus",
Short: "Generate benchmarks corpus data for the package",
Long: generateLongDescription,
Args: cobra.NoArgs,
RunE: generateDataStreamCorpusCommandAction,
}

generateCorpusCmd.Flags().StringP(cobraext.PackageFlagName, cobraext.PackageFlagShorthand, "", cobraext.PackageFlagDescription)
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusDataSetFlagName, cobraext.GenerateCorpusDataSetFlagShorthand, "", cobraext.GenerateCorpusDataSetFlagDescription)
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusSizeFlagName, cobraext.GenerateCorpusSizeFlagShorthand, "", cobraext.GenerateCorpusSizeFlagDescription)
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusCommitFlagName, cobraext.GenerateCorpusCommitFlagShorthand, "main", cobraext.GenerateCorpusCommitFlagDescription)
generateCorpusCmd.Flags().StringP(cobraext.GenerateCorpusRallyTrackOutputDirFlagName, cobraext.GenerateCorpusRallyTrackOutputDirFlagShorthand, "", cobraext.GenerateCorpusRallyTrackOutputDirFlagDescription)

return generateCorpusCmd
}

func generateDataStreamCorpusCommandAction(cmd *cobra.Command, _ []string) error {
packageName, err := cmd.Flags().GetString(cobraext.PackageFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.PackageFlagName)
}

dataSetName, err := cmd.Flags().GetString(cobraext.GenerateCorpusDataSetFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusDataSetFlagName)
}

totSize, err := cmd.Flags().GetString(cobraext.GenerateCorpusSizeFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusSizeFlagName)
}

totSizeInBytes, err := humanize.ParseBytes(totSize)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusSizeFlagName)
}

commit, err := cmd.Flags().GetString(cobraext.GenerateCorpusCommitFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusCommitFlagName)
}

if len(commit) == 0 {
commit = "main"
}

rallyTrackOutputDir, err := cmd.Flags().GetString(cobraext.GenerateCorpusRallyTrackOutputDirFlagName)
if err != nil {
return cobraext.FlagParsingError(err, cobraext.GenerateCorpusRallyTrackOutputDirFlagName)
}

genLibClient := corpusgenerator.NewClient(commit)
generator, err := corpusgenerator.NewGenerator(genLibClient, packageName, dataSetName, totSizeInBytes)
if err != nil {
return fmt.Errorf("can't generate benchmarks data corpus for data stream: %w", err)
}

// TODO: we need a way to extract the type from the package and dataset, currently hardcode to `metrics`
dataStream := fmt.Sprintf("metrics-%s.%s-default", packageName, dataSetName)
err = corpusgenerator.RunGenerator(generator, dataStream, rallyTrackOutputDir)
if err != nil {
return fmt.Errorf("can't generate benchmarks data corpus for data stream: %w", err)
}

return nil
}

func initializeESMetricsClient(ctx context.Context) (*elasticsearch.Client, error) {
address := os.Getenv(benchcommon.ESMetricstoreHostEnv)
user := os.Getenv(benchcommon.ESMetricstoreUsernameEnv)
Expand Down
41 changes: 0 additions & 41 deletions docs/howto/generate_corpus.md

This file was deleted.

7 changes: 3 additions & 4 deletions docs/howto/rally_benchmarking.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# HOWTO: Writing system benchmarks for a package
# HOWTO: Writing rally benchmarks for a package

## Introduction
Elastic Packages are comprised of data streams. A rally benchmark runs `esrally` track with a corpus of data into an Elasticsearch data stream, and reports rally stats as well as retrieving performance metrics from the Elasticsearch nodes.
Expand All @@ -10,11 +10,11 @@ Conceptually, running a rally benchmark involves the following steps:
1. Deploy the Elastic Stack, including Elasticsearch, Kibana, and the Elastic Agent(s). This step takes time so it should typically be done once as a pre-requisite to running a system benchmark scenario.
1. Install a package that configures its assets for every data stream in the package.
1. Metrics collections from the cluster starts. (**TODO**: record metrics from all Elastic Agents involved using the `system` integration.)
1. Send the collected metrics to the ES Metricstore if set.
1. Generate data (it uses the [corpus-generator-tool](https://github.com/elastic/elastic-integration-corpus-generator-tool))
1. Run an `esrally` track with the corpus of generated data. `esrally` must be installed on the system where the `elastic-package` is run and available in the `PATH`.
1. Wait for the `esrally` track to be executed.
1. Metrics collection ends and a summary report is created.
1. Send the collected metrics to the ES Metricstore if set.
1. Delete test artifacts.
1. Optionally reindex all ingested data into the ES Metricstore for further analysis.
1. **TODO**: Optionally compare results against another benchmark run.
Expand Down Expand Up @@ -60,7 +60,6 @@ Example:
description: Benchmark 20000 events ingested
data_stream:
name: testds
warmup_time_period: 10s
corpora:
generator:
total_events: 900000
Expand Down Expand Up @@ -275,7 +274,7 @@ In the directory of the `rally-track-output-dir` flag two files are saved:
Both files are required to replay the rally benchmark. The first file references the second in its content.
The command to run for replaying the track is the following:
```shell
rally --target-hosts='{"default":["%es_cluster_host:es_cluster_port%"]}' --track-path=%path/to/saved-track-json% --client-options='{"default":{"basic_auth_user":"%es_user%","basic_auth_password":"%es_user%","use_ssl":true,"verify_certs":false}}' --pipeline=benchmark-only
esrally --target-hosts='{"defauelt":["%es_cluster_host:es_cluster_port%"]}' --track-path=%path/to/saved-track-json% --client-options='{"default":{"basic_auth_user":"%es_user%","basic_auth_password":"%es_user%","use_ssl":true,"verify_certs":false}}' --pipeline=benchmark-only
```

Please refer to [esrally CLI reference](https://esrally.readthedocs.io/en/stable/command_line_reference.html) for more details.
Expand Down
12 changes: 0 additions & 12 deletions internal/benchrunner/runners/rally/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,6 @@ func WithBenchmarkName(name string) OptionFunc {
}
}

func WithDeferCleanup(d time.Duration) OptionFunc {
return func(opts *Options) {
opts.DeferCleanup = d
}
}

func WithMetricsInterval(d time.Duration) OptionFunc {
return func(opts *Options) {
opts.MetricsInterval = d
}
}

func WithDataReindexing(b bool) OptionFunc {
return func(opts *Options) {
opts.ReindexData = b
Expand Down
71 changes: 69 additions & 2 deletions internal/benchrunner/runners/rally/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ import (
"os/exec"
"path/filepath"
"strings"
"text/template"
"time"

"github.com/elastic/elastic-package/internal/packages/installer"

"github.com/magefile/mage/sh"

"github.com/elastic/elastic-package/internal/corpusgenerator"
"github.com/elastic/elastic-package/internal/stack"

"github.com/google/uuid"
Expand Down Expand Up @@ -51,6 +51,42 @@ const (

// BenchType defining rally benchmark
BenchType benchrunner.Type = "rally"

rallyTrackTemplate = `{% import "rally.helpers" as rally with context %}
{
"version": 2,
"description": "Track for [[.DataStream]]",
"datastream": [
{
"name": "[[.DataStream]]",
"body": "[[.CorpusFilename]]"
}
],
"corpora": [
{
"name": "[[.CorpusFilename]]",
"documents": [
{
"target-data-stream": "[[.DataStream]]",
"source-file": "[[.CorpusFilename]]",
"document-count": [[.CorpusDocsCount]],
"uncompressed-bytes": [[.CorpusSizeInBytes]]
}
]
}
],
"schedule": [
{
"operation": {
"operation-type": "bulk",
"bulk-size": {{bulk_size | default(5000)}},
"ingest-percentage": {{ingest_percentage | default(100)}}
},
"clients": {{bulk_indexing_clients | default(8)}}
}
]
}
`
)

var ErrDryRun = errors.New("dry run: rally benchmark not executed")
Expand Down Expand Up @@ -524,7 +560,7 @@ func (r *runner) runGenerator(destDir string) error {
return fmt.Errorf("cannot not create rally track file: %w", err)
}
r.trackFile = trackFile.Name()
rallyTrackContent, err := corpusgenerator.GenerateRallyTrack(r.runtimeDataStream, corpusFile, corpusDocsCount)
rallyTrackContent, err := generateRallyTrack(r.runtimeDataStream, corpusFile, corpusDocsCount)
if err != nil {
return fmt.Errorf("cannot not generate rally track content: %w", err)
}
Expand Down Expand Up @@ -909,3 +945,34 @@ func createRunID() string {
func getDataStreamPath(packageRoot, dataStream string) string {
return filepath.Join(packageRoot, "data_stream", dataStream)
}

func generateRallyTrack(dataStream string, corpusFile *os.File, corpusDocsCount uint64) ([]byte, error) {
t := template.New("rallytrack")

parsedTpl, err := t.Delims("[[", "]]").Parse(rallyTrackTemplate)
if err != nil {
return nil, fmt.Errorf("error while parsing rally track template: %w", err)
}

fi, err := corpusFile.Stat()
if err != nil {
return nil, fmt.Errorf("error with stat on rally corpus file: %w", err)
}

corpusSizeInBytes := fi.Size()

buf := new(bytes.Buffer)
templateData := map[string]any{
"DataStream": dataStream,
"CorpusFilename": filepath.Base(corpusFile.Name()),
"CorpusDocsCount": corpusDocsCount,
"CorpusSizeInBytes": corpusSizeInBytes,
}

err = parsedTpl.Execute(buf, templateData)
if err != nil {
return nil, fmt.Errorf("error on parsin on rally track template: %w", err)
}

return buf.Bytes(), nil
}
7 changes: 5 additions & 2 deletions internal/benchrunner/runners/rally/scenario.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package rally
import (
"errors"
"fmt"
"os"
"path/filepath"

"github.com/elastic/go-ucfg/yaml"
Expand Down Expand Up @@ -56,7 +55,7 @@ func readConfig(path, scenario, packageName, packageVersion string) (*scenario,
configPath := filepath.Join(path, devPath, fmt.Sprintf("%s.yml", scenario))
c := defaultConfig()
cfg, err := yaml.NewConfigWithFile(configPath)
if err != nil && !errors.Is(err, os.ErrNotExist) {
if err != nil {
return nil, fmt.Errorf("can't load benchmark configuration: %s: %w", configPath, err)
}

Expand All @@ -69,5 +68,9 @@ func readConfig(path, scenario, packageName, packageVersion string) (*scenario,
c.Package = packageName
c.Version = packageVersion

if c.DataStream.Name == "" {
return nil, errors.New("can't read data stream name from benchmark configuration: empty")
}

return c, nil
}
Loading

0 comments on commit b2251c5

Please sign in to comment.