Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature(compression): add zstd log compression to statement log files #453

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ DOCKER_VERSION ?= latest
GOLANGCI_VERSION ?= 1.62.0

CQL_FEATURES ?= normal
CONCURRENCY ?= 1
CONCURRENCY ?= 4
DURATION ?= 10m
WARMUP ?= 0
MODE ?= mixed
Expand Down Expand Up @@ -42,7 +42,10 @@ GEMINI_FLAGS ?= --fail-fast \
--duration=$(DURATION) \
--warmup=$(WARMUP) \
--profiling-port=6060 \
--drop-schema=true
--drop-schema=true \
--oracle-statement-log-file=$(PWD)/results/oracle-statements.log.zst \
--test-statement-log-file=$(PWD)/results/test-statements.log.zst \
--statement-log-file-compression=zstd


ifndef GOBIN
Expand Down
26 changes: 21 additions & 5 deletions cmd/gemini/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
"text/tabwriter"
"time"

"github.com/scylladb/gemini/pkg/stmtlogger"

"github.com/gocql/gocql"
"github.com/hailocab/go-hostpool"
"github.com/pkg/errors"
Expand Down Expand Up @@ -108,6 +110,7 @@ var (
profilingPort int
testStatementLogFile string
oracleStatementLogFile string
statementLogFileCompression string
)

func interactive() bool {
Expand Down Expand Up @@ -214,11 +217,12 @@ func run(_ *cobra.Command, _ []string) error {

testCluster, oracleCluster := createClusters(cons, testHostSelectionPolicy, oracleHostSelectionPolicy, logger)
storeConfig := store.Config{
MaxRetriesMutate: maxRetriesMutate,
MaxRetriesMutateSleep: maxRetriesMutateSleep,
UseServerSideTimestamps: useServerSideTimestamps,
TestLogStatementsFile: testStatementLogFile,
OracleLogStatementsFile: oracleStatementLogFile,
MaxRetriesMutate: maxRetriesMutate,
MaxRetriesMutateSleep: maxRetriesMutateSleep,
UseServerSideTimestamps: useServerSideTimestamps,
TestLogStatementsFile: testStatementLogFile,
OracleLogStatementsFile: oracleStatementLogFile,
LogStatementFileCompression: getLogStatementFileCompression(statementLogFileCompression),
}
var tracingFile *os.File
if tracingOutFile != "" {
Expand Down Expand Up @@ -411,6 +415,17 @@ func createClusters(
return testCluster, oracleCluster
}

func getLogStatementFileCompression(input string) stmtlogger.Compression {
switch input {
case "zstd":
return stmtlogger.ZSTDCompression
case "gzip":
return stmtlogger.GZIPCompresssion
default:
return stmtlogger.NoCompression
}
}

func getReplicationStrategy(rs string, fallback *replication.Replication, logger *zap.Logger) *replication.Replication {
switch rs {
case "network":
Expand Down Expand Up @@ -538,6 +553,7 @@ func init() {
rootCmd.Flags().IntVarP(&maxErrorsToStore, "max-errors-to-store", "", 1000, "Maximum number of errors to store and output at the end")
rootCmd.Flags().StringVarP(&testStatementLogFile, "test-statement-log-file", "", "", "File to write statements flow to")
rootCmd.Flags().StringVarP(&oracleStatementLogFile, "oracle-statement-log-file", "", "", "File to write statements flow to")
rootCmd.Flags().StringVarP(&statementLogFileCompression, "statement-log-file-compression", "", "zstd", "Compression algorithm to use for statement log files")
}

func printSetup(seed, schemaSeed uint64) {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/gocql/gocql v1.8.0
github.com/google/go-cmp v0.6.0
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed
github.com/klauspost/compress v1.17.11
github.com/mitchellh/mapstructure v1.5.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.20.5
Expand All @@ -28,7 +29,6 @@ require (
github.com/fatih/color v1.18.0 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
Expand Down
43 changes: 39 additions & 4 deletions pkg/stmtlogger/filelogger.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package stmtlogger
import (
"bufio"
"bytes"
"compress/gzip"
"context"
"io"
"log"
Expand All @@ -26,6 +27,7 @@ import (
"sync/atomic"
"time"

"github.com/klauspost/compress/zstd"
"github.com/pkg/errors"
"go.uber.org/multierr"

Expand Down Expand Up @@ -53,9 +55,17 @@ type (
wg sync.WaitGroup
active atomic.Bool
}

Compression int
)

func NewFileLogger(filename string) (StmtToFile, error) {
const (
NoCompression Compression = iota
ZSTDCompression
GZIPCompresssion
)

func NewFileLogger(filename string, compression Compression) (StmtToFile, error) {
if filename == "" {
return &nopFileLogger{}, nil
}
Expand All @@ -65,14 +75,39 @@ func NewFileLogger(filename string) (StmtToFile, error) {
return nil, err
}

return NewLogger(fd)
return NewLogger(fd, compression)
}

func NewLogger(w io.Writer) (StmtToFile, error) {
func NewLogger(w io.Writer, compression Compression) (StmtToFile, error) {
ctx, cancel := context.WithCancel(context.Background())

var writer *bufio.Writer
switch compression {
case ZSTDCompression:
zstdWriter, err := zstd.NewWriter(w,
zstd.WithEncoderLevel(zstd.SpeedFastest),
zstd.WithAllLitEntropyCompression(true),
)
if err != nil {
cancel()
return nil, err
}

writer = bufio.NewWriterSize(zstdWriter, 8192)
case GZIPCompresssion:
gzipWriter, err := gzip.NewWriterLevel(w, gzip.BestSpeed)
if err != nil {
cancel()
return nil, err
}

writer = bufio.NewWriterSize(gzipWriter, 8192)
default:
writer = bufio.NewWriterSize(w, 8192)
}

out := &logger{
writer: bufio.NewWriterSize(w, 8192),
writer: writer,
fd: w,
channel: make(chan *bytes.Buffer, defaultChanSize),
cancel: cancel,
Expand Down
22 changes: 12 additions & 10 deletions pkg/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,12 @@ type Store interface {
}

type Config struct {
TestLogStatementsFile string
OracleLogStatementsFile string
MaxRetriesMutate int
MaxRetriesMutateSleep time.Duration
UseServerSideTimestamps bool
TestLogStatementsFile string
OracleLogStatementsFile string
LogStatementFileCompression stmtlogger.Compression
MaxRetriesMutate int
MaxRetriesMutateSleep time.Duration
UseServerSideTimestamps bool
}

func New(schema *typedef.Schema, testCluster, oracleCluster *gocql.ClusterConfig, cfg Config, traceOut io.Writer, logger *zap.Logger) (Store, error) {
Expand All @@ -78,15 +79,15 @@ func New(schema *typedef.Schema, testCluster, oracleCluster *gocql.ClusterConfig
}, []string{"system", "method"},
)

oracleStore, err := getStore("oracle", schema, oracleCluster, cfg, cfg.OracleLogStatementsFile, traceOut, logger, ops)
oracleStore, err := getStore("oracle", schema, oracleCluster, cfg, cfg.OracleLogStatementsFile, cfg.LogStatementFileCompression, traceOut, logger, ops)
if err != nil {
return nil, err
}

if testCluster == nil {
return nil, errors.New("test cluster is empty")
}
testStore, err := getStore("test", schema, testCluster, cfg, cfg.TestLogStatementsFile, traceOut, logger, ops)
testStore, err := getStore("test", schema, testCluster, cfg, cfg.TestLogStatementsFile, cfg.LogStatementFileCompression, traceOut, logger, ops)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -263,6 +264,7 @@ func getStore(
clusterConfig *gocql.ClusterConfig,
cfg Config,
stmtLogFile string,
compression stmtlogger.Compression,
traceOut io.Writer,
logger *zap.Logger,
ops *prometheus.CounterVec,
Expand All @@ -272,17 +274,17 @@ func getStore(
system: name,
}, nil
}
oracleSession, err := newSession(clusterConfig, traceOut)
session, err := newSession(clusterConfig, traceOut)
if err != nil {
return nil, errors.Wrapf(err, "failed to connect to %s cluster", name)
}
oracleFileLogger, err := stmtlogger.NewFileLogger(stmtLogFile)
oracleFileLogger, err := stmtlogger.NewFileLogger(stmtLogFile, compression)
if err != nil {
return nil, err
}

return &cqlStore{
session: oracleSession,
session: session,
schema: schema,
system: name,
ops: ops,
Expand Down
Loading