From 7026e71c84655211645e5be3497ab46177282203 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Mon, 7 Oct 2024 17:46:22 +0300 Subject: [PATCH 01/25] FMWK-570-backup-restore-state - wip on state support --- config_partition_filter.go | 8 +- config_partition_filter_test.go | 2 +- config_policy_compression.go | 64 ++++++++++++++++ config_policy_encryption.go | 66 ++++++++++++++++ state.go | 132 ++++++++++++++++++++++++++++++++ state_test.go | 47 ++++++++++++ 6 files changed, 314 insertions(+), 5 deletions(-) create mode 100644 config_policy_compression.go create mode 100644 config_policy_encryption.go create mode 100644 state.go create mode 100644 state_test.go diff --git a/config_partition_filter.go b/config_partition_filter.go index 0def8768..a62d3a16 100644 --- a/config_partition_filter.go +++ b/config_partition_filter.go @@ -66,21 +66,21 @@ func NewPartitionFilterAll() *a.PartitionFilter { // splitPartitions splits partition to groups. func splitPartitions(partitionFilters []*a.PartitionFilter, numWorkers int) ([]*a.PartitionFilter, error) { if numWorkers < 1 || numWorkers < len(partitionFilters) { - return nil, fmt.Errorf("numWorkers is less than partitionFilters, cannot split partitionFilters") + return nil, fmt.Errorf("numWorkers is less than PartitionFilters, cannot split PartitionFilters") } // Validations. for i := range partitionFilters { if partitionFilters[i].Begin < 0 { - return nil, fmt.Errorf("startPartition is less than 0, cannot split partitionFilters") + return nil, fmt.Errorf("startPartition is less than 0, cannot split PartitionFilters") } if partitionFilters[i].Count < 1 { - return nil, fmt.Errorf("numPartitions is less than 1, cannot split partitionFilters") + return nil, fmt.Errorf("numPartitions is less than 1, cannot split PartitionFilters") } if partitionFilters[i].Begin+partitionFilters[i].Count > MaxPartitions { - return nil, fmt.Errorf("startPartition + numPartitions is greater than the max partitionFilters: %d", + return nil, fmt.Errorf("startPartition + numPartitions is greater than the max PartitionFilters: %d", MaxPartitions) } } diff --git a/config_partition_filter_test.go b/config_partition_filter_test.go index 587859fb..7e8eab1a 100644 --- a/config_partition_filter_test.go +++ b/config_partition_filter_test.go @@ -129,7 +129,7 @@ func TestSplitPartitions_NumWorkersLessThanFilters(t *testing.T) { _, err := splitPartitions(partitionFilters, numWorkers) assert.Error(t, err) - assert.Equal(t, "numWorkers is less than partitionFilters, cannot split partitionFilters", err.Error()) + assert.Equal(t, "numWorkers is less than PartitionFilters, cannot split PartitionFilters", err.Error()) } func TestSplitPartitionRange(t *testing.T) { diff --git a/config_policy_compression.go b/config_policy_compression.go new file mode 100644 index 00000000..f93d967c --- /dev/null +++ b/config_policy_compression.go @@ -0,0 +1,64 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package backup + +import ( + "fmt" +) + +// Compression modes +const ( + // CompressNone no compression. + CompressNone = "NONE" + // CompressZSTD compression using ZSTD. + CompressZSTD = "ZSTD" +) + +// CompressionPolicy contains backup compression information. +type CompressionPolicy struct { + // The compression mode to be used (default is NONE). + Mode string `yaml:"mode,omitempty" json:"mode,omitempty" default:"NONE" enums:"NONE,ZSTD"` + // The compression level to use (or -1 if unspecified). + Level int `yaml:"level,omitempty" json:"level,omitempty"` +} + +// NewCompressionPolicy returns new compression policy for backup/restore operations. +func NewCompressionPolicy(mode string, level int) *CompressionPolicy { + return &CompressionPolicy{ + Mode: mode, + Level: level, + } +} + +// validate validates the compression policy parameters. +func (p *CompressionPolicy) validate() error { + if p == nil { + return nil + } + + if p.Mode != CompressNone && p.Mode != CompressZSTD { + return fmt.Errorf("invalid compression mode: %s", p.Mode) + } + + if p.Level == 0 { + p.Level = -1 + } + + if p.Level < -1 { + return fmt.Errorf("invalid compression level: %d", p.Level) + } + + return nil +} diff --git a/config_policy_encryption.go b/config_policy_encryption.go new file mode 100644 index 00000000..f367e6a3 --- /dev/null +++ b/config_policy_encryption.go @@ -0,0 +1,66 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package backup + +import ( + "errors" + "fmt" +) + +// Encryption modes +const ( + // EncryptNone no encryption. + EncryptNone = "NONE" + // EncryptAES128 encryption using AES128 algorithm. + EncryptAES128 = "AES128" + // EncryptAES256 encryption using AES256 algorithm. + EncryptAES256 = "AES256" +) + +// EncryptionPolicy contains backup encryption information. +type EncryptionPolicy struct { + // The path to the file containing the encryption key. + KeyFile *string `yaml:"key-file,omitempty" json:"key-file,omitempty"` + // The name of the environment variable containing the encryption key. + KeyEnv *string `yaml:"key-env,omitempty" json:"key-env,omitempty"` + // The secret keyword in Aerospike Secret Agent containing the encryption key. + KeySecret *string `yaml:"key-secret,omitempty" json:"key-secret,omitempty"` + // The encryption mode to be used (NONE, AES128, AES256) + Mode string `yaml:"mode,omitempty" json:"mode,omitempty" default:"NONE" enums:"NONE,AES128,AES256"` +} + +// validate validates the encryption policy. +func (p *EncryptionPolicy) validate() error { + if p == nil { + return nil + } + + if p.Mode != EncryptNone && p.Mode != EncryptAES128 && p.Mode != EncryptAES256 { + return fmt.Errorf("invalid encryption mode: %s", p.Mode) + } + + if p.KeyFile == nil && p.KeyEnv == nil && p.KeySecret == nil { + return errors.New("encryption key location not specified") + } + + // Only one parameter allowed to be set. + if (p.KeyFile != nil && p.KeyEnv != nil) || + (p.KeyFile != nil && p.KeySecret != nil) || + (p.KeyEnv != nil && p.KeySecret != nil) { + return fmt.Errorf("only one encryption key source may be specified") + } + + return nil +} diff --git a/state.go b/state.go new file mode 100644 index 00000000..4ef50807 --- /dev/null +++ b/state.go @@ -0,0 +1,132 @@ +package backup + +import ( + "context" + "encoding/gob" + "fmt" + "log/slog" + "os" + "time" + + a "github.com/aerospike/aerospike-client-go/v7" +) + +// State contains current backups status data. +type State struct { + // Global backup context. + ctx context.Context + + // File to save to. + fileName string + + // How often file will be saved to disk. + dumpTimeout time.Duration + + // List of applied partition filters + PartitionFilters []*a.PartitionFilter + + // Save files cursor. + // TODO: think how to map it to filters. + + // timestamp of last dump to file. + SavedAt time.Time + + // logger for logging errors. + logger *slog.Logger +} + +// NewState creates status service from parameters, for backup operations. +func NewState( + ctx context.Context, + fileName string, + dumpTimeout time.Duration, + partitionFilters []*a.PartitionFilter, + logger *slog.Logger, +) *State { + s := &State{ + ctx: ctx, + fileName: fileName, + dumpTimeout: dumpTimeout, + PartitionFilters: partitionFilters, + logger: logger, + } + // Run watcher on initialization. + go s.serve() + + return s +} + +// NewStateFromFile creates a status service from the file, for restore operations. +func NewStateFromFile(ctx context.Context, fileName string, logger *slog.Logger) (*State, error) { + // TODO: replace with io reader/writer. + reader, err := os.Open(fileName) + if err != nil { + return nil, fmt.Errorf("failed to open state file: %w", err) + } + + dec := gob.NewDecoder(reader) + + var state State + if err = dec.Decode(&state); err != nil { + return nil, fmt.Errorf("failed to decode state: %w", err) + } + + state.ctx = ctx + state.logger = logger + + return &state, nil +} + +// serve dumps files to disk. +func (s *State) serve() { + ticker := time.NewTicker(s.dumpTimeout) + defer ticker.Stop() + + // Dump a file at the very beginning. + if err := s.dump(); err != nil { + s.logger.Error("failed to dump state", slog.Any("error", err)) + return + } + + // Server ticker. + for { + select { + case <-s.ctx.Done(): + // saves state and exit + if err := s.dump(); err != nil { + s.logger.Error("failed to dump state", slog.Any("error", err)) + return + } + + return + case <-ticker.C: + // save state and sleep. + time.Sleep(time.Second) + // save intermediate state. + if err := s.dump(); err != nil { + s.logger.Error("failed to dump state", slog.Any("error", err)) + return + } + + s.SavedAt = time.Now() + } + } +} + +func (s *State) dump() error { + // TODO: replace with io reader/writer. + file, err := os.OpenFile(s.fileName, os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + return fmt.Errorf("failed to create state file %s: %w", s.fileName, err) + } + + enc := gob.NewEncoder(file) + + // TODO: check if we must create copies from PartitionFilters. + + if err = enc.Encode(s); err != nil { + return fmt.Errorf("failed to encode state data: %w", err) + } + + return nil +} diff --git a/state_test.go b/state_test.go new file mode 100644 index 00000000..07d91ea4 --- /dev/null +++ b/state_test.go @@ -0,0 +1,47 @@ +package backup + +import ( + "context" + "log/slog" + "os" + "path/filepath" + "testing" + "time" + + a "github.com/aerospike/aerospike-client-go/v7" + "github.com/stretchr/testify/require" +) + +const ( + testDuration = 1 * time.Second +) + +func TestState(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + tempFile := filepath.Join(t.TempDir(), "state_test.gob") + pfs := []*a.PartitionFilter{ + NewPartitionFilterByID(1), + NewPartitionFilterByID(2), + } + logger := slog.New(slog.NewTextHandler(nil, nil)) + + // Check init. + state := NewState(ctx, tempFile, testDuration, pfs, logger) + + time.Sleep(testDuration * 3) + + require.NotZero(t, state.SavedAt) + cancel() + + // Check that file exists. + _, err := os.Stat(tempFile) + require.NoError(t, err) + + // Check restore. + newCtx := context.Background() + newState, err := NewStateFromFile(newCtx, tempFile, logger) + require.NoError(t, err) + require.Equal(t, newState.PartitionFilters, pfs) +} From adf5d0592fd6824f58513fdf841328c3ffee60ad Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Mon, 7 Oct 2024 17:49:50 +0300 Subject: [PATCH 02/25] FMWK-570-backup-restore-state - refactoring --- config_partition_filter.go | 8 ++-- config_partition_filter_test.go | 2 +- policy_compression.go | 64 -------------------------------- policy_encryption.go | 66 --------------------------------- 4 files changed, 5 insertions(+), 135 deletions(-) delete mode 100644 policy_compression.go delete mode 100644 policy_encryption.go diff --git a/config_partition_filter.go b/config_partition_filter.go index a62d3a16..0def8768 100644 --- a/config_partition_filter.go +++ b/config_partition_filter.go @@ -66,21 +66,21 @@ func NewPartitionFilterAll() *a.PartitionFilter { // splitPartitions splits partition to groups. func splitPartitions(partitionFilters []*a.PartitionFilter, numWorkers int) ([]*a.PartitionFilter, error) { if numWorkers < 1 || numWorkers < len(partitionFilters) { - return nil, fmt.Errorf("numWorkers is less than PartitionFilters, cannot split PartitionFilters") + return nil, fmt.Errorf("numWorkers is less than partitionFilters, cannot split partitionFilters") } // Validations. for i := range partitionFilters { if partitionFilters[i].Begin < 0 { - return nil, fmt.Errorf("startPartition is less than 0, cannot split PartitionFilters") + return nil, fmt.Errorf("startPartition is less than 0, cannot split partitionFilters") } if partitionFilters[i].Count < 1 { - return nil, fmt.Errorf("numPartitions is less than 1, cannot split PartitionFilters") + return nil, fmt.Errorf("numPartitions is less than 1, cannot split partitionFilters") } if partitionFilters[i].Begin+partitionFilters[i].Count > MaxPartitions { - return nil, fmt.Errorf("startPartition + numPartitions is greater than the max PartitionFilters: %d", + return nil, fmt.Errorf("startPartition + numPartitions is greater than the max partitionFilters: %d", MaxPartitions) } } diff --git a/config_partition_filter_test.go b/config_partition_filter_test.go index 7e8eab1a..587859fb 100644 --- a/config_partition_filter_test.go +++ b/config_partition_filter_test.go @@ -129,7 +129,7 @@ func TestSplitPartitions_NumWorkersLessThanFilters(t *testing.T) { _, err := splitPartitions(partitionFilters, numWorkers) assert.Error(t, err) - assert.Equal(t, "numWorkers is less than PartitionFilters, cannot split PartitionFilters", err.Error()) + assert.Equal(t, "numWorkers is less than partitionFilters, cannot split partitionFilters", err.Error()) } func TestSplitPartitionRange(t *testing.T) { diff --git a/policy_compression.go b/policy_compression.go deleted file mode 100644 index f93d967c..00000000 --- a/policy_compression.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2024 Aerospike, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package backup - -import ( - "fmt" -) - -// Compression modes -const ( - // CompressNone no compression. - CompressNone = "NONE" - // CompressZSTD compression using ZSTD. - CompressZSTD = "ZSTD" -) - -// CompressionPolicy contains backup compression information. -type CompressionPolicy struct { - // The compression mode to be used (default is NONE). - Mode string `yaml:"mode,omitempty" json:"mode,omitempty" default:"NONE" enums:"NONE,ZSTD"` - // The compression level to use (or -1 if unspecified). - Level int `yaml:"level,omitempty" json:"level,omitempty"` -} - -// NewCompressionPolicy returns new compression policy for backup/restore operations. -func NewCompressionPolicy(mode string, level int) *CompressionPolicy { - return &CompressionPolicy{ - Mode: mode, - Level: level, - } -} - -// validate validates the compression policy parameters. -func (p *CompressionPolicy) validate() error { - if p == nil { - return nil - } - - if p.Mode != CompressNone && p.Mode != CompressZSTD { - return fmt.Errorf("invalid compression mode: %s", p.Mode) - } - - if p.Level == 0 { - p.Level = -1 - } - - if p.Level < -1 { - return fmt.Errorf("invalid compression level: %d", p.Level) - } - - return nil -} diff --git a/policy_encryption.go b/policy_encryption.go deleted file mode 100644 index f367e6a3..00000000 --- a/policy_encryption.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2024 Aerospike, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package backup - -import ( - "errors" - "fmt" -) - -// Encryption modes -const ( - // EncryptNone no encryption. - EncryptNone = "NONE" - // EncryptAES128 encryption using AES128 algorithm. - EncryptAES128 = "AES128" - // EncryptAES256 encryption using AES256 algorithm. - EncryptAES256 = "AES256" -) - -// EncryptionPolicy contains backup encryption information. -type EncryptionPolicy struct { - // The path to the file containing the encryption key. - KeyFile *string `yaml:"key-file,omitempty" json:"key-file,omitempty"` - // The name of the environment variable containing the encryption key. - KeyEnv *string `yaml:"key-env,omitempty" json:"key-env,omitempty"` - // The secret keyword in Aerospike Secret Agent containing the encryption key. - KeySecret *string `yaml:"key-secret,omitempty" json:"key-secret,omitempty"` - // The encryption mode to be used (NONE, AES128, AES256) - Mode string `yaml:"mode,omitempty" json:"mode,omitempty" default:"NONE" enums:"NONE,AES128,AES256"` -} - -// validate validates the encryption policy. -func (p *EncryptionPolicy) validate() error { - if p == nil { - return nil - } - - if p.Mode != EncryptNone && p.Mode != EncryptAES128 && p.Mode != EncryptAES256 { - return fmt.Errorf("invalid encryption mode: %s", p.Mode) - } - - if p.KeyFile == nil && p.KeyEnv == nil && p.KeySecret == nil { - return errors.New("encryption key location not specified") - } - - // Only one parameter allowed to be set. - if (p.KeyFile != nil && p.KeyEnv != nil) || - (p.KeyFile != nil && p.KeySecret != nil) || - (p.KeyEnv != nil && p.KeySecret != nil) { - return fmt.Errorf("only one encryption key source may be specified") - } - - return nil -} From 77cd6f3af32d93b7e4eb3387fdb570c3ae5c8ee1 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Wed, 9 Oct 2024 17:28:48 +0300 Subject: [PATCH 03/25] FMWK-570-backup-restore-state - wip --- client.go | 10 +- cmd/asbackup/readme.md | 2 + config_backup.go | 23 ++++ handler_backup.go | 32 +++++- handler_backup_records.go | 24 ++-- io/aerospike/record_reader.go | 3 +- io/encoding/asb/decode.go | 2 +- io/encoding/asb/encode.go | 4 +- io/local/writer.go | 3 + io_encoding.go | 2 +- models/data_models.go | 6 +- state.go | 209 ++++++++++++++++++++++++++++------ state_test.go | 23 +++- writers.go | 18 +-- 14 files changed, 291 insertions(+), 70 deletions(-) diff --git a/client.go b/client.go index b100af8f..27a029cd 100644 --- a/client.go +++ b/client.go @@ -212,7 +212,10 @@ func (c *Client) Backup( return nil, fmt.Errorf("failed to validate backup config: %w", err) } - handler := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, writer, c.scanLimiter) + handler, err := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, writer, c.scanLimiter) + if err != nil { + return nil, fmt.Errorf("failed to create backup handler: %w", err) + } handler.run() return handler, nil @@ -271,7 +274,10 @@ func (c *Client) Estimate( return 0, fmt.Errorf("failed to validate backup config: %w", err) } - handler := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, nil, c.scanLimiter) + handler, err := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, nil, c.scanLimiter) + if err != nil { + return 0, fmt.Errorf("failed to create backup handler: %w", err) + } result, err := handler.getEstimate(ctx, estimateSamples) if err != nil { diff --git a/cmd/asbackup/readme.md b/cmd/asbackup/readme.md index 22b36538..4f76fbba 100644 --- a/cmd/asbackup/readme.md +++ b/cmd/asbackup/readme.md @@ -182,6 +182,8 @@ Azure Flags: state will be placed in the directory with name `.asb.state`, or `.asb.state` if `--output-file-prefix` is given. +-q, --output-file-prefix When using directory parameter, prepend a prefix to the names of the generated files. + --machine Output machine-readable status updates to the given path, typically a FIFO. --no-config-file Do not read any config file. Default: disabled diff --git a/config_backup.go b/config_backup.go index cd0c4a24..b7606adb 100644 --- a/config_backup.go +++ b/config_backup.go @@ -104,6 +104,18 @@ type BackupConfig struct { Compact bool // Only include records that have no ttl set (persistent records). NoTTLOnly bool + // Either a path with a file name or a directory in which the backup state file will be + // placed if the backup is interrupted/fails. If a path with a file name is used, that + // exact path is where the backup file will be placed. If a directory is given, the backup + // state will be placed in the directory with name `.asb.state` + // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. + StateFile string + // How often we will dump a state file to disk. + StateFileDumpDuration time.Duration + // Resumes an interrupted/failed backup from where it was left off, given the .state file + // that was generated from the interrupted/failed run. + // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. + Continue bool } // NewDefaultBackupConfig returns a new BackupConfig with default values. @@ -117,6 +129,7 @@ func NewDefaultBackupConfig() *BackupConfig { } } +// isParalleledByNodes checks if backup is parallel by nodes. func (c *BackupConfig) isParalleledByNodes() bool { return c.ParallelNodes || len(c.NodeList) > 0 } @@ -129,6 +142,16 @@ func (c *BackupConfig) isDefaultPartitionFilter() bool { c.PartitionFilters[0].Digest == nil } +// isStateFirstRun checks if it is first run of backup with a state file. +func (c *BackupConfig) isStateFirstRun() bool { + return c.StateFile != "" && c.Continue == false +} + +// isStateContinueRun checks if we continue backup from a state file. +func (c *BackupConfig) isStateContinue() bool { + return c.StateFile != "" && c.Continue == true +} + func (c *BackupConfig) isFullBackup() bool { // full backup doesn't have a lower bound. return c.ModAfter == nil && c.isDefaultPartitionFilter() diff --git a/handler_backup.go b/handler_backup.go index d298ed4b..f6c31ad7 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -37,6 +37,8 @@ import ( "golang.org/x/time/rate" ) +const filePrefixContinue = "continue_" + // Writer provides access to backup storage. // Exported for integration tests. type Writer interface { @@ -71,6 +73,8 @@ type BackupHandler struct { id string stats models.BackupStats + // Backup state for continuation. + state *State } // newBackupHandler creates a new BackupHandler. @@ -81,7 +85,7 @@ func newBackupHandler( logger *slog.Logger, writer Writer, scanLimiter *semaphore.Weighted, -) *BackupHandler { +) (*BackupHandler, error) { id := uuid.NewString() // For estimates calculations, a writer will be nil. storageType := "" @@ -96,6 +100,12 @@ func newBackupHandler( // redefine context cancel. ctx, cancel := context.WithCancel(ctx) + // Keep in mind, that on continue operation, we update partitions list in config by pointer. + state, err := NewState(ctx, config, logger) + if err != nil { + return nil, err + } + return &BackupHandler{ ctx: ctx, cancel: cancel, @@ -109,7 +119,8 @@ func newBackupHandler( limiter: limiter, infoClient: asinfo.NewInfoClientFromAerospike(ac, config.InfoPolicy), scanLimiter: scanLimiter, - } + state: state, + }, nil } // run runs the backup job. @@ -233,6 +244,11 @@ func (bh *BackupHandler) backupSync(ctx context.Context) error { if err != nil { return err } + // Have to reload filter + bh.config.PartitionFilters, err = bh.state.loadPartitionFilters() + if err != nil { + return err + } return handler.run(ctx, writeWorkers, &bh.stats.ReadRecords) } @@ -243,7 +259,7 @@ func (bh *BackupHandler) makeWriteWorkers( writeWorkers := make([]pipeline.Worker[*models.Token], len(backupWriters)) for i, w := range backupWriters { - var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger) + var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsChan) dataWriter = newWriterWithTokenStats(dataWriter, &bh.stats, bh.logger) writeWorkers[i] = pipeline.NewWriteWorker(dataWriter, bh.limiter) } @@ -287,7 +303,11 @@ func (bh *BackupHandler) newWriter(ctx context.Context) (io.WriteCloser, error) } func (bh *BackupHandler) newConfiguredWriter(ctx context.Context) (io.WriteCloser, error) { - filename := bh.encoder.GenerateFilename() + prefix := "" + if bh.config.isStateContinue() { + prefix = filePrefixContinue + } + filename := bh.encoder.GenerateFilename(prefix) storageWriter, err := bh.writer.NewWriter(ctx, filename) if err != nil { @@ -411,7 +431,7 @@ func (bh *BackupHandler) backupSIndexes( reader := aerospike.NewSIndexReader(bh.infoClient, bh.config.Namespace, bh.logger) sindexReadWorker := pipeline.NewReadWorker[*models.Token](reader) - sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger)) + sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) sindexWriter = newWriterWithTokenStats(sindexWriter, &bh.stats, bh.logger) sindexWriteWorker := pipeline.NewWriteWorker(sindexWriter, bh.limiter) @@ -430,7 +450,7 @@ func (bh *BackupHandler) backupUDFs( reader := aerospike.NewUDFReader(bh.infoClient, bh.logger) udfReadWorker := pipeline.NewReadWorker[*models.Token](reader) - udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger)) + udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) udfWriter = newWriterWithTokenStats(udfWriter, &bh.stats, bh.logger) udfWriteWorker := pipeline.NewWriteWorker(udfWriter, bh.limiter) diff --git a/handler_backup_records.go b/handler_backup_records.go index c33ba35e..dbcd4601 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -203,24 +203,21 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkers( func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( ctx context.Context, n int, scanPolicy *a.ScanPolicy, ) ([]pipeline.Worker[*models.Token], error) { - partitionGroups, err := splitPartitions(bh.config.PartitionFilters, n) - if err != nil { - return nil, err + var err error + partitionGroups := bh.config.PartitionFilters + if !bh.config.isStateContinue() { + partitionGroups, err = splitPartitions(bh.config.PartitionFilters, n) + if err != nil { + return nil, err + } } // If we have multiply partition filters, we shrink workers to number of filters. - n = len(partitionGroups) + readWorkers := make([]pipeline.Worker[*models.Token], len(partitionGroups)) - readWorkers := make([]pipeline.Worker[*models.Token], n) - - for i := 0; i < n; i++ { + for i := range partitionGroups { recordReaderConfig := bh.recordReaderConfigForPartitions(partitionGroups[i], scanPolicy) - // For the first partition in the list, we start from digest if it is set. - if bh.afterDigest != nil && i == 0 { - recordReaderConfig = bh.recordReaderConfigForPartitions(partitionGroups[i], scanPolicy) - } - recordReader := aerospike.NewRecordReader( ctx, bh.aerospikeClient, @@ -278,10 +275,11 @@ func (bh *backupRecordsHandler) recordReaderConfigForPartitions( partitionFilter *a.PartitionFilter, scanPolicy *a.ScanPolicy, ) *aerospike.RecordReaderConfig { + pfCopy := *partitionFilter return aerospike.NewRecordReaderConfig( bh.config.Namespace, bh.config.SetList, - partitionFilter, + &pfCopy, nil, scanPolicy, bh.config.BinList, diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index 3cb50b53..d62807e4 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -141,7 +141,8 @@ func (r *RecordReader) Read() (*models.Token, error) { rec := models.Record{ Record: res.Record, } - recToken := models.NewRecordToken(&rec, 0) + // TODO: check how accurate is value of filter at this moment. + recToken := models.NewRecordToken(&rec, 0, *r.config.partitionFilter) return recToken, nil } diff --git a/io/encoding/asb/decode.go b/io/encoding/asb/decode.go index 4c024cf1..ea415103 100644 --- a/io/encoding/asb/decode.go +++ b/io/encoding/asb/decode.go @@ -161,7 +161,7 @@ func (r *Decoder) NextToken() (*models.Token, error) { case *models.UDF: return models.NewUDFToken(v, size), nil case *models.Record: - return models.NewRecordToken(v, size), nil + return models.NewRecordToken(v, size, a.PartitionFilter{}), nil default: return nil, fmt.Errorf("unsupported token type %T", v) } diff --git a/io/encoding/asb/encode.go b/io/encoding/asb/encode.go index 2b99842e..5b875de1 100644 --- a/io/encoding/asb/encode.go +++ b/io/encoding/asb/encode.go @@ -48,8 +48,8 @@ func NewEncoder(namespace string, compact bool) *Encoder { } // GenerateFilename generates a file name for the given namespace. -func (e *Encoder) GenerateFilename() string { - return fmt.Sprintf("%s_%d.asb", e.namespace, e.id.Add(1)) +func (e *Encoder) GenerateFilename(prefix string) string { + return fmt.Sprintf("%s%s_%d.asb", prefix, e.namespace, e.id.Add(1)) } // EncodeToken encodes a token to the ASB format. diff --git a/io/local/writer.go b/io/local/writer.go index 79ea37e3..3ba08717 100644 --- a/io/local/writer.go +++ b/io/local/writer.go @@ -32,6 +32,8 @@ type Writer struct { options // Sync for running backup to one file. called atomic.Bool + + fileName string } // WithRemoveFiles adds remove files flag, so all files will be removed from backup folder before backup. @@ -194,6 +196,7 @@ func (w *Writer) NewWriter(ctx context.Context, fileName string) (io.WriteCloser if ctx.Err() != nil { return nil, ctx.Err() } + // protection for single file backup. if !w.isDir { if !w.called.CompareAndSwap(false, true) { diff --git a/io_encoding.go b/io_encoding.go index b2a0e573..d9920d68 100644 --- a/io_encoding.go +++ b/io_encoding.go @@ -36,7 +36,7 @@ const ( type Encoder interface { EncodeToken(*models.Token) ([]byte, error) GetHeader() []byte - GenerateFilename() string + GenerateFilename(prefix string) string } // NewEncoder returns a new Encoder according to `EncoderType`. diff --git a/models/data_models.go b/models/data_models.go index 33833229..500d9836 100644 --- a/models/data_models.go +++ b/models/data_models.go @@ -101,14 +101,18 @@ type Token struct { Record *Record Type TokenType Size uint64 + // Current filter state. Must copy this value. + Filter a.PartitionFilter + FileName string } // NewRecordToken creates a new token with the given record. -func NewRecordToken(r *Record, size uint64) *Token { +func NewRecordToken(r *Record, size uint64, filter a.PartitionFilter) *Token { return &Token{ Record: r, Type: TokenTypeRecord, Size: size, + Filter: filter, } } diff --git a/state.go b/state.go index 4ef50807..ef2e50c5 100644 --- a/state.go +++ b/state.go @@ -1,85 +1,182 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package backup import ( "context" "encoding/gob" "fmt" + "log" "log/slog" "os" + "sync" "time" a "github.com/aerospike/aerospike-client-go/v7" ) +// Must be the same as pipeline channelSize +const channelSize = 256 + // State contains current backups status data. type State struct { // Global backup context. ctx context.Context // File to save to. - fileName string + FileName string // How often file will be saved to disk. - dumpTimeout time.Duration - - // List of applied partition filters - PartitionFilters []*a.PartitionFilter - - // Save files cursor. - // TODO: think how to map it to filters. + DumpDuration time.Duration - // timestamp of last dump to file. - SavedAt time.Time + // Config *BackupConfig // logger for logging errors. logger *slog.Logger + + // ------ experiments ----- + RecordsChan chan recordState + RecordStates map[string]recordState + mu sync.RWMutex +} + +type recordState struct { + Filter filter +} + +// filter contains custom filter struct to save filter to GOB. +type filter struct { + Begin int + Count int + Digest []byte + Cursor []byte +} + +func mapToFilter(pf a.PartitionFilter) (filter, error) { + c, err := pf.EncodeCursor() + if err != nil { + return filter{}, fmt.Errorf("failed to encode cursor: %w", err) + } + return filter{ + Begin: pf.Begin, + Count: pf.Count, + Digest: pf.Digest, + Cursor: c, + }, nil +} + +func mapFromFilter(f filter) (*a.PartitionFilter, error) { + pf := &a.PartitionFilter{Begin: f.Begin, Count: f.Count, Digest: f.Digest} + if err := pf.DecodeCursor(f.Cursor); err != nil { + return nil, fmt.Errorf("failed to decode cursor: %w", err) + } + + return pf, nil +} + +func newRecordState(filter a.PartitionFilter) recordState { + f, err := mapToFilter(filter) + if err != nil { + log.Fatalf("failed to map partition filter: %w", err) + } + return recordState{ + Filter: f, + } +} + +func NewState(ctx context.Context, + config *BackupConfig, + logger *slog.Logger, +) (*State, error) { + switch { + case config.isStateFirstRun(): + return newState(ctx, config, logger), nil + case config.isStateContinue(): + s, err := newStateFromFile(ctx, config, logger) + if err != nil { + return nil, err + } + // change filters in config. + config.PartitionFilters, err = s.loadPartitionFilters() + if err != nil { + return nil, err + } + return s, nil + } + + return nil, nil } // NewState creates status service from parameters, for backup operations. -func NewState( +func newState( ctx context.Context, - fileName string, - dumpTimeout time.Duration, - partitionFilters []*a.PartitionFilter, + config *BackupConfig, logger *slog.Logger, ) *State { + s := &State{ - ctx: ctx, - fileName: fileName, - dumpTimeout: dumpTimeout, - PartitionFilters: partitionFilters, - logger: logger, + ctx: ctx, + FileName: config.StateFile, + DumpDuration: config.StateFileDumpDuration, + // Config: config, + logger: logger, + RecordsChan: make(chan recordState, channelSize), + RecordStates: make(map[string]recordState), } // Run watcher on initialization. go s.serve() + go s.serveRecords() return s } -// NewStateFromFile creates a status service from the file, for restore operations. -func NewStateFromFile(ctx context.Context, fileName string, logger *slog.Logger) (*State, error) { +// NewStateFromFile creates a status service from the file, to continue operations. +func newStateFromFile( + ctx context.Context, + config *BackupConfig, + logger *slog.Logger, +) (*State, error) { // TODO: replace with io reader/writer. - reader, err := os.Open(fileName) + reader, err := os.Open(config.StateFile) if err != nil { return nil, fmt.Errorf("failed to open state file: %w", err) } dec := gob.NewDecoder(reader) - var state State - if err = dec.Decode(&state); err != nil { + var s State + if err = dec.Decode(&s); err != nil { return nil, fmt.Errorf("failed to decode state: %w", err) } - state.ctx = ctx - state.logger = logger + s.ctx = ctx + s.logger = logger + s.RecordsChan = make(chan recordState, channelSize) + + logger.Debug("loaded state file successfully") + + // Run watcher on initialization. + go s.serve() + go s.serveRecords() - return &state, nil + return &s, nil } // serve dumps files to disk. func (s *State) serve() { - ticker := time.NewTicker(s.dumpTimeout) + ticker := time.NewTicker(s.DumpDuration) defer ticker.Stop() // Dump a file at the very beginning. @@ -98,6 +195,8 @@ func (s *State) serve() { return } + s.logger.Debug("state context done") + return case <-ticker.C: // save state and sleep. @@ -107,26 +206,66 @@ func (s *State) serve() { s.logger.Error("failed to dump state", slog.Any("error", err)) return } - - s.SavedAt = time.Now() } } } func (s *State) dump() error { // TODO: replace with io reader/writer. - file, err := os.OpenFile(s.fileName, os.O_CREATE|os.O_WRONLY, 0o666) + file, err := os.OpenFile(s.FileName, os.O_CREATE|os.O_WRONLY, 0o666) if err != nil { - return fmt.Errorf("failed to create state file %s: %w", s.fileName, err) + return fmt.Errorf("failed to create state file %s: %w", s.FileName, err) } enc := gob.NewEncoder(file) - - // TODO: check if we must create copies from PartitionFilters. - + s.mu.RLock() if err = enc.Encode(s); err != nil { return fmt.Errorf("failed to encode state data: %w", err) } + s.mu.RUnlock() + + s.logger.Debug("state file dumped", slog.Time("saved at", time.Now())) return nil } + +func (s *State) loadPartitionFilters() ([]*a.PartitionFilter, error) { + s.mu.RLock() + + result := make([]*a.PartitionFilter, 0, len(s.RecordStates)) + for _, state := range s.RecordStates { + f, err := mapFromFilter(state.Filter) + if err != nil { + return nil, err + } + + result = append(result, f) + } + + s.mu.RUnlock() + + return result, nil +} + +func (s *State) serveRecords() { + + var counter int + for { + select { + case <-s.ctx.Done(): + return + case state := <-s.RecordsChan: + counter++ + s.mu.Lock() + key := fmt.Sprintf("%d%d%s", state.Filter.Begin, state.Filter.Count, state.Filter.Digest) + s.RecordStates[key] = state + s.mu.Unlock() + + // if counter == 400000 { + // s.dump() + // fmt.Println("done 4000000") + // os.Exit(1) + // } + } + } +} diff --git a/state_test.go b/state_test.go index 07d91ea4..086c5411 100644 --- a/state_test.go +++ b/state_test.go @@ -1,3 +1,17 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package backup import ( @@ -39,9 +53,16 @@ func TestState(t *testing.T) { _, err := os.Stat(tempFile) require.NoError(t, err) + // Nullify the link. + pfs = nil + result := []*a.PartitionFilter{ + NewPartitionFilterByID(1), + NewPartitionFilterByID(2), + } + // Check restore. newCtx := context.Background() newState, err := NewStateFromFile(newCtx, tempFile, logger) require.NoError(t, err) - require.Equal(t, newState.PartitionFilters, pfs) + require.Equal(t, newState.PartitionFilters, result) } diff --git a/writers.go b/writers.go index 2f7b8a08..9d51fc6b 100644 --- a/writers.go +++ b/writers.go @@ -85,21 +85,23 @@ func (tw *tokenStatsWriter) Close() error { // It writes the types from the models package as encoded data // to an io.Writer. It uses an Encoder to encode the data. type tokenWriter struct { - encoder Encoder - output io.Writer - logger *slog.Logger + encoder Encoder + output io.Writer + logger *slog.Logger + stateChan chan<- recordState } // newTokenWriter creates a new tokenWriter. -func newTokenWriter(encoder Encoder, output io.Writer, logger *slog.Logger) *tokenWriter { +func newTokenWriter(encoder Encoder, output io.Writer, logger *slog.Logger, stateChan chan<- recordState) *tokenWriter { id := uuid.NewString() logger = logging.WithWriter(logger, id, logging.WriterTypeToken) logger.Debug("created new token writer") return &tokenWriter{ - encoder: encoder, - output: output, - logger: logger, + encoder: encoder, + output: output, + logger: logger, + stateChan: stateChan, } } @@ -110,6 +112,8 @@ func (w *tokenWriter) Write(v *models.Token) (int, error) { return 0, fmt.Errorf("error encoding token: %w", err) } + w.stateChan <- newRecordState(v.Filter) + return w.output.Write(data) } From a11e87445ac2a3214cdf52de3b0be2dedafe0c03 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Thu, 10 Oct 2024 12:28:32 +0300 Subject: [PATCH 04/25] FMWK-570-backup-restore-state - state for local reader/writer --- client.go | 6 ++- handler_backup.go | 21 ++++---- handler_restore.go | 5 ++ io/aws/s3/reader.go | 6 +-- io/azure/blob/reader.go | 6 +-- io/encoding/asb/encode.go | 4 +- io/gcp/storage/reader.go | 6 +-- io/local/options.go | 81 +++++++++++++++++++++++++++++ io/local/reader.go | 58 +++------------------ io/local/writer.go | 37 ++++++------- state.go | 107 +++++++++++++++++++++++++------------- 11 files changed, 205 insertions(+), 132 deletions(-) create mode 100644 io/local/options.go diff --git a/client.go b/client.go index 27a029cd..c0d14dad 100644 --- a/client.go +++ b/client.go @@ -195,10 +195,12 @@ func (c *Client) getUsableScanPolicy(p *a.ScanPolicy) *a.ScanPolicy { // - ctx can be used to cancel the backup operation. // - config is the configuration for the backup operation. // - writer creates new writers for the backup operation. +// - reader is used only for reading a state file for continuation operations. func (c *Client) Backup( ctx context.Context, config *BackupConfig, writer Writer, + reader StreamingReader, ) (*BackupHandler, error) { if config == nil { return nil, fmt.Errorf("backup config required") @@ -212,7 +214,7 @@ func (c *Client) Backup( return nil, fmt.Errorf("failed to validate backup config: %w", err) } - handler, err := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, writer, c.scanLimiter) + handler, err := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, writer, reader, c.scanLimiter) if err != nil { return nil, fmt.Errorf("failed to create backup handler: %w", err) } @@ -274,7 +276,7 @@ func (c *Client) Estimate( return 0, fmt.Errorf("failed to validate backup config: %w", err) } - handler, err := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, nil, c.scanLimiter) + handler, err := newBackupHandler(ctx, config, c.aerospikeClient, c.logger, nil, nil, c.scanLimiter) if err != nil { return 0, fmt.Errorf("failed to create backup handler: %w", err) } diff --git a/handler_backup.go b/handler_backup.go index f6c31ad7..3b177796 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -37,8 +37,6 @@ import ( "golang.org/x/time/rate" ) -const filePrefixContinue = "continue_" - // Writer provides access to backup storage. // Exported for integration tests. type Writer interface { @@ -84,6 +82,7 @@ func newBackupHandler( ac AerospikeClient, logger *slog.Logger, writer Writer, + reader StreamingReader, scanLimiter *semaphore.Weighted, ) (*BackupHandler, error) { id := uuid.NewString() @@ -101,7 +100,7 @@ func newBackupHandler( ctx, cancel := context.WithCancel(ctx) // Keep in mind, that on continue operation, we update partitions list in config by pointer. - state, err := NewState(ctx, config, logger) + state, err := NewState(ctx, config, reader, writer, logger) if err != nil { return nil, err } @@ -244,10 +243,12 @@ func (bh *BackupHandler) backupSync(ctx context.Context) error { if err != nil { return err } - // Have to reload filter - bh.config.PartitionFilters, err = bh.state.loadPartitionFilters() - if err != nil { - return err + if bh.config.isStateContinue() { + // Have to reload filter, as on count records cursor is moving and future scans returns nothing. + bh.config.PartitionFilters, err = bh.state.loadPartitionFilters() + if err != nil { + return err + } } return handler.run(ctx, writeWorkers, &bh.stats.ReadRecords) @@ -303,11 +304,7 @@ func (bh *BackupHandler) newWriter(ctx context.Context) (io.WriteCloser, error) } func (bh *BackupHandler) newConfiguredWriter(ctx context.Context) (io.WriteCloser, error) { - prefix := "" - if bh.config.isStateContinue() { - prefix = filePrefixContinue - } - filename := bh.encoder.GenerateFilename(prefix) + filename := bh.encoder.GenerateFilename(bh.state.getFileSuffix()) storageWriter, err := bh.writer.NewWriter(ctx, filename) if err != nil { diff --git a/handler_restore.go b/handler_restore.go index 0ed2ee29..c2febe84 100644 --- a/handler_restore.go +++ b/handler_restore.go @@ -39,6 +39,11 @@ type StreamingReader interface { // Must be run in a goroutine `go rh.reader.StreamFiles(ctx, readersCh, errorsCh)`. StreamFiles(context.Context, chan<- io.ReadCloser, chan<- error) + // StreamFile creates a single file reader and sends io.Readers to the `readersCh` + // In case of an error, it is sent to the `errorsCh` channel. + // Must be run in a goroutine `go rh.reader.StreamFile()`. + StreamFile(ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) + // GetType returns the type of storage. Used in logging. GetType() string } diff --git a/io/aws/s3/reader.go b/io/aws/s3/reader.go index 58201370..a5d9b160 100644 --- a/io/aws/s3/reader.go +++ b/io/aws/s3/reader.go @@ -110,7 +110,7 @@ func (r *Reader) StreamFiles( } // If not a folder, only file. - r.streamFile(ctx, r.path, readersCh, errorsCh) + r.StreamFile(ctx, r.path, readersCh, errorsCh) } func (r *Reader) streamDirectory( @@ -169,9 +169,9 @@ func (r *Reader) streamDirectory( } } -// streamFile opens single file from s3 and sends io.Readers to the `readersCh` +// StreamFile opens single file from s3 and sends io.Readers to the `readersCh` // In case of an error, it is sent to the `errorsCh` channel. -func (r *Reader) streamFile( +func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { defer close(readersCh) diff --git a/io/azure/blob/reader.go b/io/azure/blob/reader.go index df326e74..280bfe4c 100644 --- a/io/azure/blob/reader.go +++ b/io/azure/blob/reader.go @@ -99,7 +99,7 @@ func (r *Reader) StreamFiles( } // If not a folder, only file. - r.streamFile(ctx, r.path, readersCh, errorsCh) + r.StreamFile(ctx, r.path, readersCh, errorsCh) } func (r *Reader) streamDirectory( @@ -146,9 +146,9 @@ func (r *Reader) streamDirectory( } } -// streamFile opens a single file from GCP cloud storage and sends io.Readers to the `readersCh` +// StreamFile opens a single file from GCP cloud storage and sends io.Readers to the `readersCh` // In case of an error, it is sent to the `errorsCh` channel. -func (r *Reader) streamFile( +func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { defer close(readersCh) diff --git a/io/encoding/asb/encode.go b/io/encoding/asb/encode.go index 5b875de1..001def2c 100644 --- a/io/encoding/asb/encode.go +++ b/io/encoding/asb/encode.go @@ -48,8 +48,8 @@ func NewEncoder(namespace string, compact bool) *Encoder { } // GenerateFilename generates a file name for the given namespace. -func (e *Encoder) GenerateFilename(prefix string) string { - return fmt.Sprintf("%s%s_%d.asb", prefix, e.namespace, e.id.Add(1)) +func (e *Encoder) GenerateFilename(suffix string) string { + return fmt.Sprintf("%s_%d%s.asb", e.namespace, e.id.Add(1), suffix) } // EncodeToken encodes a token to the ASB format. diff --git a/io/gcp/storage/reader.go b/io/gcp/storage/reader.go index b3ad85c3..9a11740a 100644 --- a/io/gcp/storage/reader.go +++ b/io/gcp/storage/reader.go @@ -103,7 +103,7 @@ func (r *Reader) StreamFiles( } // If not a folder, only file. - r.streamFile(ctx, r.path, readersCh, errorsCh) + r.StreamFile(ctx, r.path, readersCh, errorsCh) } func (r *Reader) streamDirectory( @@ -159,9 +159,9 @@ func (r *Reader) streamDirectory( } } -// streamFile opens a single file from GCP cloud storage and sends io.Readers to the `readersCh` +// StreamFile opens a single file from GCP cloud storage and sends io.Readers to the `readersCh` // In case of an error, it is sent to the `errorsCh` channel. -func (r *Reader) streamFile( +func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { defer close(readersCh) diff --git a/io/local/options.go b/io/local/options.go new file mode 100644 index 00000000..4322ad6f --- /dev/null +++ b/io/local/options.go @@ -0,0 +1,81 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +type options struct { + // path contains path to file or directory. + path string + // isDir flag describes what we have in path, file or directory. + isDir bool + // isRemovingFiles flag describes should we remove everything from backup folder or not. + isRemovingFiles bool + // validator contains files validator that is applied to files if isDir = true. + validator validator + // withNestedDir describes if we should check for if an object is a directory for read/write operations. + // When we stream files or delete files in folder, we skip directories. This flag will avoid skipping. + // Default: false + withNestedDir bool + // unbuffered means that writings toi disk will be unbuffered. + unbuffered bool +} + +type Opt func(*options) + +// WithDir adds directory to reading/writing files from/to. +func WithDir(path string) Opt { + return func(r *options) { + r.path = path + r.isDir = true + } +} + +// WithFile adds a file path to reading/writing from/to. +func WithFile(path string) Opt { + return func(r *options) { + r.path = path + r.isDir = false + } +} + +// WithValidator adds validator to Reader, so files will be validated before reading. +// Is used only for Reader. +func WithValidator(v validator) Opt { + return func(r *options) { + r.validator = v + } +} + +// WithNestedDir adds withNestedDir = true parameter. That means that we won't skip nested folders. +func WithNestedDir() Opt { + return func(r *options) { + r.withNestedDir = true + } +} + +// WithRemoveFiles adds remove files flag, so all files will be removed from backup folder before backup. +// Is used only for Writer. +func WithRemoveFiles() Opt { + return func(r *options) { + r.isRemovingFiles = true + } +} + +// WithUnbufferedWrite adds an unbuffered flag to the writer. +// Which means that writings to disk will be unbuffered. +func WithUnbufferedWrite() Opt { + return func(r *options) { + r.unbuffered = true + } +} diff --git a/io/local/reader.go b/io/local/reader.go index 9ed53e7a..cd6407bc 100644 --- a/io/local/reader.go +++ b/io/local/reader.go @@ -38,54 +38,6 @@ type Reader struct { options } -type options struct { - // path contains path to file or directory. - path string - // isDir flag describes what we have in path, file or directory. - isDir bool - // isRemovingFiles flag describes should we remove everything from backup folder or not. - isRemovingFiles bool - // validator contains files validator that is applied to files if isDir = true. - validator validator - // withNestedDir describes if we should check for if an object is a directory for read/write operations. - // When we stream files or delete files in folder, we skip directories. This flag will avoid skipping. - // Default: false - withNestedDir bool -} - -type Opt func(*options) - -// WithDir adds directory to reading/writing files from/to. -func WithDir(path string) Opt { - return func(r *options) { - r.path = path - r.isDir = true - } -} - -// WithFile adds a file path to reading/writing from/to. -func WithFile(path string) Opt { - return func(r *options) { - r.path = path - r.isDir = false - } -} - -// WithValidator adds validator to Reader, so files will be validated before reading. -// Is used only for Reader. -func WithValidator(v validator) Opt { - return func(r *options) { - r.validator = v - } -} - -// WithNestedDir adds withNestedDir = true parameter. That means that we won't skip nested folders. -func WithNestedDir() Opt { - return func(r *options) { - r.withNestedDir = true - } -} - // NewReader creates a new local directory/file Reader. // Must be called with WithDir(path string) or WithFile(path string) - mandatory. // Can be called with WithValidator(v validator) - optional. @@ -124,7 +76,7 @@ func (r *Reader) StreamFiles( } // If not a folder, only file. - r.streamFile(ctx, r.path, readersCh, errorsCh) + r.StreamFile(ctx, r.path, readersCh, errorsCh) } func (r *Reader) streamDirectory( @@ -181,15 +133,19 @@ func (r *Reader) streamDirectory( } } -// streamFile opens single file and sends io.Readers to the `readersCh` +// StreamFile opens single file and sends io.Readers to the `readersCh` // In case of an error, it is sent to the `errorsCh` channel. -func (r *Reader) streamFile( +func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { if ctx.Err() != nil { errorsCh <- ctx.Err() return } + if r.isDir { + filename = filepath.Join(r.path, filename) + } + reader, err := os.Open(filename) if err != nil { errorsCh <- fmt.Errorf("failed to open %s: %w", filename, err) diff --git a/io/local/writer.go b/io/local/writer.go index 3ba08717..093dad22 100644 --- a/io/local/writer.go +++ b/io/local/writer.go @@ -32,16 +32,6 @@ type Writer struct { options // Sync for running backup to one file. called atomic.Bool - - fileName string -} - -// WithRemoveFiles adds remove files flag, so all files will be removed from backup folder before backup. -// Is used only for Writer. -func WithRemoveFiles() Opt { - return func(r *options) { - r.isRemovingFiles = true - } } // NewWriter creates a new writer for local directory/file writes. @@ -63,17 +53,17 @@ func NewWriter(ctx context.Context, opts ...Opt) (*Writer, error) { return nil, fmt.Errorf("failed to prepare backup directory: %w", err) } - if w.isDir { - // Check if backup dir is empty. - isEmpty, err := isEmptyDirectory(w.path) - if err != nil { - return nil, fmt.Errorf("failed to check if directory is empty: %w", err) - } - - if !isEmpty && !w.isRemovingFiles { - return nil, fmt.Errorf("backup folder must be empty or set RemoveFiles = true") - } - } + // if w.isDir { + // // Check if backup dir is empty. + // isEmpty, err := isEmptyDirectory(w.path) + // if err != nil { + // return nil, fmt.Errorf("failed to check if directory is empty: %w", err) + // } + // + // if !isEmpty && !w.isRemovingFiles { + // return nil, fmt.Errorf("backup folder must be empty or set RemoveFiles = true") + // } + // } // If we want to remove files from backup path. if w.isRemovingFiles { @@ -214,6 +204,11 @@ func (w *Writer) NewWriter(ctx context.Context, fileName string) (io.WriteCloser return nil, fmt.Errorf("failed to open file %s: %w", filePath, err) } + // If unbuffered write is set, we return file directly. + if w.unbuffered { + return file, nil + } + return &bufferedFile{bufio.NewWriterSize(file, bufferSize), file}, nil } diff --git a/state.go b/state.go index ef2e50c5..2352951f 100644 --- a/state.go +++ b/state.go @@ -18,38 +18,39 @@ import ( "context" "encoding/gob" "fmt" + "io" "log" "log/slog" - "os" "sync" "time" a "github.com/aerospike/aerospike-client-go/v7" ) -// Must be the same as pipeline channelSize -const channelSize = 256 - // State contains current backups status data. type State struct { // Global backup context. ctx context.Context - // File to save to. + // Counter to count how many times State instance was initialized. + // Is used to create prefix for backup files. + Counter int + // RecordsChan communication channel to save current filter state. + RecordsChan chan recordState + // RecordStates store states of all filters. + RecordStates map[string]recordState + // Mutex for RecordStates operations. + // Ordinary mutex is used, because we must not allow any writings when we read state. + mu sync.Mutex + // File to save state to. FileName string - // How often file will be saved to disk. DumpDuration time.Duration - // Config *BackupConfig - + // writer is used to create a state file. + writer Writer // logger for logging errors. logger *slog.Logger - - // ------ experiments ----- - RecordsChan chan recordState - RecordStates map[string]recordState - mu sync.RWMutex } type recordState struct { @@ -96,19 +97,26 @@ func newRecordState(filter a.PartitionFilter) recordState { } } -func NewState(ctx context.Context, +// NewState returns new state instance depending on config. +// If we continue back up, the state will be loaded from a state file, +// if it is the first operation, new state instance will be returned. +func NewState( + ctx context.Context, config *BackupConfig, + reader StreamingReader, + writer Writer, logger *slog.Logger, ) (*State, error) { switch { case config.isStateFirstRun(): - return newState(ctx, config, logger), nil + return newState(ctx, config, writer, logger), nil case config.isStateContinue(): - s, err := newStateFromFile(ctx, config, logger) + s, err := newStateFromFile(ctx, config, reader, writer, logger) if err != nil { return nil, err } // change filters in config. + // TODO: may be move it handler, so everyone wil see it. config.PartitionFilters, err = s.loadPartitionFilters() if err != nil { return nil, err @@ -119,21 +127,23 @@ func NewState(ctx context.Context, return nil, nil } -// NewState creates status service from parameters, for backup operations. +// newState creates status service from parameters, for backup operations. func newState( ctx context.Context, config *BackupConfig, + writer Writer, logger *slog.Logger, ) *State { s := &State{ - ctx: ctx, + ctx: ctx, + // RecordsChan must not be buffered, so we can stop all operations. + RecordsChan: make(chan recordState), + RecordStates: make(map[string]recordState), FileName: config.StateFile, DumpDuration: config.StateFileDumpDuration, - // Config: config, + writer: writer, logger: logger, - RecordsChan: make(chan recordState, channelSize), - RecordStates: make(map[string]recordState), } // Run watcher on initialization. go s.serve() @@ -142,19 +152,20 @@ func newState( return s } -// NewStateFromFile creates a status service from the file, to continue operations. +// newStateFromFile creates a status service from the file, to continue operations. func newStateFromFile( ctx context.Context, config *BackupConfig, + reader StreamingReader, + writer Writer, logger *slog.Logger, ) (*State, error) { - // TODO: replace with io reader/writer. - reader, err := os.Open(config.StateFile) + f, err := openFile(ctx, reader, config.StateFile) if err != nil { return nil, fmt.Errorf("failed to open state file: %w", err) } - dec := gob.NewDecoder(reader) + dec := gob.NewDecoder(f) var s State if err = dec.Decode(&s); err != nil { @@ -162,8 +173,10 @@ func newStateFromFile( } s.ctx = ctx + s.writer = writer s.logger = logger - s.RecordsChan = make(chan recordState, channelSize) + s.RecordsChan = make(chan recordState) + s.Counter++ logger.Debug("loaded state file successfully") @@ -199,8 +212,6 @@ func (s *State) serve() { return case <-ticker.C: - // save state and sleep. - time.Sleep(time.Second) // save intermediate state. if err := s.dump(); err != nil { s.logger.Error("failed to dump state", slog.Any("error", err)) @@ -211,18 +222,18 @@ func (s *State) serve() { } func (s *State) dump() error { - // TODO: replace with io reader/writer. - file, err := os.OpenFile(s.FileName, os.O_CREATE|os.O_WRONLY, 0o666) + file, err := s.writer.NewWriter(s.ctx, s.FileName) if err != nil { return fmt.Errorf("failed to create state file %s: %w", s.FileName, err) } enc := gob.NewEncoder(file) - s.mu.RLock() + s.mu.Lock() if err = enc.Encode(s); err != nil { return fmt.Errorf("failed to encode state data: %w", err) } - s.mu.RUnlock() + // file.Close() + s.mu.Unlock() s.logger.Debug("state file dumped", slog.Time("saved at", time.Now())) @@ -230,7 +241,7 @@ func (s *State) dump() error { } func (s *State) loadPartitionFilters() ([]*a.PartitionFilter, error) { - s.mu.RLock() + s.mu.Lock() result := make([]*a.PartitionFilter, 0, len(s.RecordStates)) for _, state := range s.RecordStates { @@ -242,7 +253,7 @@ func (s *State) loadPartitionFilters() ([]*a.PartitionFilter, error) { result = append(result, f) } - s.mu.RUnlock() + s.mu.Unlock() return result, nil } @@ -261,7 +272,9 @@ func (s *State) serveRecords() { s.RecordStates[key] = state s.mu.Unlock() - // if counter == 400000 { + // For tests: + // ---------- + // if counter == 1000 { // s.dump() // fmt.Println("done 4000000") // os.Exit(1) @@ -269,3 +282,27 @@ func (s *State) serveRecords() { } } } + +func (s *State) getFileSuffix() string { + if s.Counter > 0 { + return fmt.Sprintf("(%d)", s.Counter) + } + + return "" +} + +func openFile(ctx context.Context, reader StreamingReader, fileName string) (io.ReadCloser, error) { + readCh := make(chan io.ReadCloser) + errCh := make(chan error) + go reader.StreamFile(ctx, fileName, readCh, errCh) + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case err := <-errCh: + return nil, err + case file := <-readCh: + return file, nil + } + } +} From 2d87705c1901887c5d073cf2ae6701c3c9b62546 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Thu, 10 Oct 2024 14:38:30 +0300 Subject: [PATCH 05/25] FMWK-570-backup-restore-state - cleaning the code --- README.md | 6 +- client.go | 1 + cmd/internal/app/asbackup.go | 15 +++- cmd/internal/app/configs.go | 10 ++- cmd/internal/flags/backup.go | 14 ++++ cmd/internal/models/backup.go | 41 +++++----- config_backup.go | 4 +- examples/aws/s3/main.go | 2 +- examples/azure/blob/main.go | 2 +- examples/gcp/storage/main.go | 2 +- examples/readme/main.go | 2 +- handler_backup.go | 17 +++- handler_backup_records.go | 5 +- internal/processors/change_namespace_test.go | 8 +- io/aerospike/record_reader.go | 9 ++- io/aerospike/record_reader_test.go | 4 +- io/encoding/asb/decode.go | 2 +- io/encoding/asb/decode_test.go | 2 +- io/local/options.go | 10 +++ io/local/writer.go | 22 +++--- mocks/Encoder_mock.go | 21 ++--- mocks/StreamingReader_mock.go | 36 +++++++++ mocks/Writer_mock.go | 46 +++++++++++ models/data_models.go | 7 +- models/partition_filter_serialized.go | 55 +++++++++++++ state.go | 82 +++++--------------- state_test.go | 53 ------------- tests/integration/integration_test.go | 16 +++- writers.go | 11 ++- writers_test.go | 10 +-- 30 files changed, 318 insertions(+), 197 deletions(-) create mode 100644 models/partition_filter_serialized.go diff --git a/README.md b/README.md index 39640280..057f9c6c 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ func main() { // For backup to single file use local.WithFile(fileName) writers, err := local.NewWriter( + context.Background(), local.WithRemoveFiles(), local.WithDir("backups_folder"), ) @@ -52,10 +53,11 @@ func main() { backupCfg := backup.NewDefaultBackupConfig() backupCfg.Namespace = "test" - backupCfg.Parallel = 5 + backupCfg.ParallelRead = 10 + backupCfg.ParallelWrite = 10 ctx := context.Background() - backupHandler, err := backupClient.Backup(ctx, backupCfg, writers) + backupHandler, err := backupClient.Backup(ctx, backupCfg, writers, nil) if err != nil { panic(err) } diff --git a/client.go b/client.go index c0d14dad..c8723ebe 100644 --- a/client.go +++ b/client.go @@ -218,6 +218,7 @@ func (c *Client) Backup( if err != nil { return nil, fmt.Errorf("failed to create backup handler: %w", err) } + handler.run() return handler, nil diff --git a/cmd/internal/app/asbackup.go b/cmd/internal/app/asbackup.go index 87a91418..31a6650f 100644 --- a/cmd/internal/app/asbackup.go +++ b/cmd/internal/app/asbackup.go @@ -30,6 +30,8 @@ type ASBackup struct { backupClient *backup.Client backupConfig *backup.BackupConfig writer backup.Writer + // reader is used to read state file. + reader backup.StreamingReader // Additional params. isEstimate bool estimatesSamples int64 @@ -60,6 +62,7 @@ func NewASBackup( // Initializations. var ( writer backup.Writer + reader backup.StreamingReader err error ) // We initialize a writer only if output is configured. @@ -75,6 +78,15 @@ func NewASBackup( } } + if backupParams.StateFileDst != "" || backupParams.Continue != "" { + r := &models.Restore{InputFile: backupParams.OutputFile} + + reader, err = getReader(ctx, r, commonParams, awsS3, gcpStorage, azureBlob) + if err != nil { + return nil, fmt.Errorf("failed to create reader: %w", err) + } + } + aerospikeClient, err := newAerospikeClient(clientConfig, backupParams.PreferRacks) if err != nil { return nil, fmt.Errorf("failed to create aerospike client: %w", err) @@ -100,6 +112,7 @@ func NewASBackup( backupClient: backupClient, backupConfig: backupConfig, writer: writer, + reader: reader, isEstimate: backupParams.Estimate, estimatesSamples: backupParams.EstimateSamples, }, nil @@ -120,7 +133,7 @@ func (b *ASBackup) Run(ctx context.Context) error { printEstimateReport(estimates) default: - h, err := b.backupClient.Backup(ctx, b.backupConfig, b.writer) + h, err := b.backupClient.Backup(ctx, b.backupConfig, b.writer, b.reader) if err != nil { return fmt.Errorf("failed to start backup: %w", err) } diff --git a/cmd/internal/app/configs.go b/cmd/internal/app/configs.go index 93800065..c2b52fdd 100644 --- a/cmd/internal/app/configs.go +++ b/cmd/internal/app/configs.go @@ -59,10 +59,16 @@ func mapBackupConfig( c.ParallelWrite = commonParams.Parallel c.ParallelRead = commonParams.Parallel // As we set --nice in MiB we must convert it to bytes - // TODO: make Bandwidth int64 to avoid overflow. c.Bandwidth = commonParams.Nice * 1024 * 1024 c.Compact = backupParams.Compact c.NoTTLOnly = backupParams.NoTTLOnly + c.StateFileDumpDuration = time.Duration(backupParams.StateFileDumpDuration) * time.Millisecond + c.StateFile = backupParams.StateFileDst + + if backupParams.Continue != "" { + c.StateFile = backupParams.Continue + c.Continue = true + } // Overwrite partitions if we use nodes. if backupParams.ParallelNodes || backupParams.NodeList != "" { @@ -135,7 +141,6 @@ func mapRestoreConfig( c.WritePolicy = mapWritePolicy(restoreParams, commonParams) c.InfoPolicy = mapInfoPolicy(restoreParams.TimeOut) // As we set --nice in MiB we must convert it to bytes - // TODO: make Bandwidth int64 to avoid overflow. c.Bandwidth = commonParams.Nice * 1024 * 1024 c.ExtraTTL = restoreParams.ExtraTTL c.IgnoreRecordError = restoreParams.IgnoreRecordError @@ -288,7 +293,6 @@ func recordExistsAction(replace, unique bool) aerospike.RecordExistsAction { } } -// TODO: why no info policy timeout is set for backup in C tool? func mapInfoPolicy(timeOut int64) *aerospike.InfoPolicy { p := aerospike.NewInfoPolicy() p.Timeout = time.Duration(timeOut) * time.Millisecond diff --git a/cmd/internal/flags/backup.go b/cmd/internal/flags/backup.go index 5d7fb0df..0fedfc21 100644 --- a/cmd/internal/flags/backup.go +++ b/cmd/internal/flags/backup.go @@ -123,6 +123,20 @@ func (f *Backup) NewFlagSet() *pflag.FlagSet { flagSet.Int64Var(&f.EstimateSamples, "estimate-samples", 10000, "The number of samples to take when running a backup estimate.") + flagSet.StringVarP(&f.Continue, "continue", "c", + "", + "Resumes an interrupted/failed backup from where it was left off, given the .state file\n"+ + "that was generated from the interrupted/failed run.") + flagSet.StringVar(&f.StateFileDst, "state-file-dst", + "", + "Either a path with a file name or a directory in which the backup state file will be\n"+ + "placed if the backup is interrupted/fails. If a path with a file name is used, that\n"+ + "exact path is where the backup file will be placed. If a directory is given, the backup\n"+ + "state will be placed in the directory with name `.asb.state`, or\n"+ + "`.asb.state` if `--output-file-prefix` is given.") + flagSet.Int64Var(&f.StateFileDumpDuration, "state-file-dump-duration", + 10000, + "Intervals in milliseconds, how often dump state file to disk.") return flagSet } diff --git a/cmd/internal/models/backup.go b/cmd/internal/models/backup.go index d3ed6b3e..33f48326 100644 --- a/cmd/internal/models/backup.go +++ b/cmd/internal/models/backup.go @@ -15,25 +15,28 @@ package models type Backup struct { - OutputFile string - RemoveFiles bool - ModifiedBefore string - ModifiedAfter string - FileLimit int64 - AfterDigest string - MaxRecords int64 - NoBins bool - SleepBetweenRetries int - FilterExpression string - ParallelNodes bool - RemoveArtifacts bool - Compact bool - NodeList string - NoTTLOnly bool - PreferRacks string - PartitionList string - Estimate bool - EstimateSamples int64 + OutputFile string + RemoveFiles bool + ModifiedBefore string + ModifiedAfter string + FileLimit int64 + AfterDigest string + MaxRecords int64 + NoBins bool + SleepBetweenRetries int + FilterExpression string + ParallelNodes bool + RemoveArtifacts bool + Compact bool + NodeList string + NoTTLOnly bool + PreferRacks string + PartitionList string + Estimate bool + EstimateSamples int64 + StateFileDst string + StateFileDumpDuration int64 + Continue string } // ShouldClearTarget check if we should clean target directory. diff --git a/config_backup.go b/config_backup.go index b7606adb..861fb0cc 100644 --- a/config_backup.go +++ b/config_backup.go @@ -144,12 +144,12 @@ func (c *BackupConfig) isDefaultPartitionFilter() bool { // isStateFirstRun checks if it is first run of backup with a state file. func (c *BackupConfig) isStateFirstRun() bool { - return c.StateFile != "" && c.Continue == false + return c.StateFile != "" && !c.Continue } // isStateContinueRun checks if we continue backup from a state file. func (c *BackupConfig) isStateContinue() bool { - return c.StateFile != "" && c.Continue == true + return c.StateFile != "" && !c.Continue } func (c *BackupConfig) isFullBackup() bool { diff --git a/examples/aws/s3/main.go b/examples/aws/s3/main.go index da2f44d2..aa38b5c3 100644 --- a/examples/aws/s3/main.go +++ b/examples/aws/s3/main.go @@ -97,7 +97,7 @@ func runBackup(ctx context.Context, c *backup.Client) { // set compression policy backupCfg.CompressionPolicy = backup.NewCompressionPolicy(backup.CompressZSTD, 20) - backupHandler, err := c.Backup(ctx, backupCfg, writers) + backupHandler, err := c.Backup(ctx, backupCfg, writers, nil) if err != nil { panic(err) } diff --git a/examples/azure/blob/main.go b/examples/azure/blob/main.go index 8dbe34b5..1f3b2af3 100644 --- a/examples/azure/blob/main.go +++ b/examples/azure/blob/main.go @@ -101,7 +101,7 @@ func runBackup(ctx context.Context, c *backup.Client) { // set compression policy backupCfg.CompressionPolicy = backup.NewCompressionPolicy(backup.CompressZSTD, 20) - backupHandler, err := c.Backup(ctx, backupCfg, writers) + backupHandler, err := c.Backup(ctx, backupCfg, writers, nil) if err != nil { panic(err) } diff --git a/examples/gcp/storage/main.go b/examples/gcp/storage/main.go index 82e431a8..f3bf7fb6 100644 --- a/examples/gcp/storage/main.go +++ b/examples/gcp/storage/main.go @@ -92,7 +92,7 @@ func runBackup(ctx context.Context, c *backup.Client) { // set compression policy backupCfg.CompressionPolicy = backup.NewCompressionPolicy(backup.CompressZSTD, 20) - backupHandler, err := c.Backup(ctx, backupCfg, writers) + backupHandler, err := c.Backup(ctx, backupCfg, writers, nil) if err != nil { panic(err) } diff --git a/examples/readme/main.go b/examples/readme/main.go index 73646be9..1e0d2635 100644 --- a/examples/readme/main.go +++ b/examples/readme/main.go @@ -51,7 +51,7 @@ func main() { backupCfg.Namespace = "test" backupCfg.ParallelRead = 5 - backupHandler, err := backupClient.Backup(ctx, backupCfg, writers) + backupHandler, err := backupClient.Backup(ctx, backupCfg, writers, nil) if err != nil { panic(err) } diff --git a/handler_backup.go b/handler_backup.go index 3b177796..657f275d 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -99,10 +99,18 @@ func newBackupHandler( // redefine context cancel. ctx, cancel := context.WithCancel(ctx) - // Keep in mind, that on continue operation, we update partitions list in config by pointer. - state, err := NewState(ctx, config, reader, writer, logger) - if err != nil { - return nil, err + var ( + state *State + err error + ) + + if config.StateFile != "" { + // Keep in mind, that on continue operation, we update partitions list in config by pointer. + state, err = NewState(ctx, config, reader, writer, logger) + if err != nil { + cancel() + return nil, err + } } return &BackupHandler{ @@ -243,6 +251,7 @@ func (bh *BackupHandler) backupSync(ctx context.Context) error { if err != nil { return err } + if bh.config.isStateContinue() { // Have to reload filter, as on count records cursor is moving and future scans returns nothing. bh.config.PartitionFilters, err = bh.state.loadPartitionFilters() diff --git a/handler_backup_records.go b/handler_backup_records.go index dbcd4601..f8523181 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -37,8 +37,6 @@ type backupRecordsHandler struct { aerospikeClient AerospikeClient logger *slog.Logger scanLimiter *semaphore.Weighted - // is used when AfterDigest is set. - afterDigest []byte } func newBackupRecordsHandler( @@ -204,7 +202,9 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( ctx context.Context, n int, scanPolicy *a.ScanPolicy, ) ([]pipeline.Worker[*models.Token], error) { var err error + partitionGroups := bh.config.PartitionFilters + if !bh.config.isStateContinue() { partitionGroups, err = splitPartitions(bh.config.PartitionFilters, n) if err != nil { @@ -276,6 +276,7 @@ func (bh *backupRecordsHandler) recordReaderConfigForPartitions( scanPolicy *a.ScanPolicy, ) *aerospike.RecordReaderConfig { pfCopy := *partitionFilter + return aerospike.NewRecordReaderConfig( bh.config.Namespace, bh.config.SetList, diff --git a/internal/processors/change_namespace_test.go b/internal/processors/change_namespace_test.go index 7c5aa8e4..cb250a61 100644 --- a/internal/processors/change_namespace_test.go +++ b/internal/processors/change_namespace_test.go @@ -45,7 +45,7 @@ func TestChangeNamespaceProcessor(t *testing.T) { Record: &aerospike.Record{ Key: key, }, - }, 0), + }, 0, nil), wantErr: false, }, { @@ -56,7 +56,7 @@ func TestChangeNamespaceProcessor(t *testing.T) { Record: &aerospike.Record{ Key: key, }, - }, 0), + }, 0, nil), wantErr: false, }, { @@ -74,7 +74,7 @@ func TestChangeNamespaceProcessor(t *testing.T) { Record: &aerospike.Record{ Key: invalidKey, }, - }, 0), + }, 0, nil), wantErr: true, }, { @@ -85,7 +85,7 @@ func TestChangeNamespaceProcessor(t *testing.T) { Record: &aerospike.Record{ Key: key, }, - }, 0), + }, 0, nil), wantErr: false, }, } diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index d62807e4..36084780 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -141,8 +141,13 @@ func (r *RecordReader) Read() (*models.Token, error) { rec := models.Record{ Record: res.Record, } - // TODO: check how accurate is value of filter at this moment. - recToken := models.NewRecordToken(&rec, 0, *r.config.partitionFilter) + + pfs, err := models.NewPartitionFilterSerialized(r.config.partitionFilter) + if err != nil { + return nil, fmt.Errorf("failed to serialize partition filter: %w", err) + } + + recToken := models.NewRecordToken(&rec, 0, pfs) return recToken, nil } diff --git a/io/aerospike/record_reader_test.go b/io/aerospike/record_reader_test.go index 56496309..90f51513 100644 --- a/io/aerospike/record_reader_test.go +++ b/io/aerospike/record_reader_test.go @@ -85,7 +85,7 @@ func (suite *readersTestSuite) TestAerospikeRecordReader() { v, err := reader.Read() suite.Nil(err) - expectedRecToken := models.NewRecordToken(mockRec, 0) + expectedRecToken := models.NewRecordToken(mockRec, 0, nil) suite.Equal(expectedRecToken, v) mockScanner.AssertExpectations(suite.T()) } @@ -273,7 +273,7 @@ func (suite *readersTestSuite) TestAerospikeRecordReaderWithPolicy() { v, err := reader.Read() suite.Nil(err) - expectedRecToken := models.NewRecordToken(mockRec, 0) + expectedRecToken := models.NewRecordToken(mockRec, 0, nil) suite.Equal(expectedRecToken, v) mockScanner.AssertExpectations(suite.T()) } diff --git a/io/encoding/asb/decode.go b/io/encoding/asb/decode.go index ea415103..af8a2c5e 100644 --- a/io/encoding/asb/decode.go +++ b/io/encoding/asb/decode.go @@ -161,7 +161,7 @@ func (r *Decoder) NextToken() (*models.Token, error) { case *models.UDF: return models.NewUDFToken(v, size), nil case *models.Record: - return models.NewRecordToken(v, size, a.PartitionFilter{}), nil + return models.NewRecordToken(v, size, nil), nil default: return nil, fmt.Errorf("unsupported token type %T", v) } diff --git a/io/encoding/asb/decode_test.go b/io/encoding/asb/decode_test.go index e0e397e3..3f07da67 100644 --- a/io/encoding/asb/decode_test.go +++ b/io/encoding/asb/decode_test.go @@ -3718,7 +3718,7 @@ func TestASBReader_NextToken(t *testing.T) { Generation: 10, }, VoidTime: 10, - }, 106), + }, 106, nil), }, { name: "negative EOF", diff --git a/io/local/options.go b/io/local/options.go index 4322ad6f..2fd31e86 100644 --- a/io/local/options.go +++ b/io/local/options.go @@ -29,6 +29,8 @@ type options struct { withNestedDir bool // unbuffered means that writings toi disk will be unbuffered. unbuffered bool + // skipDirCheck if true, backup directory won't be checked. + skipDirCheck bool } type Opt func(*options) @@ -79,3 +81,11 @@ func WithUnbufferedWrite() Opt { r.unbuffered = true } } + +// WithSkipDirCheck adds skip dir check flags. +// Which means that backup directory won't be checked for emptiness. +func WithSkipDirCheck() Opt { + return func(r *options) { + r.skipDirCheck = true + } +} diff --git a/io/local/writer.go b/io/local/writer.go index 093dad22..71b222ff 100644 --- a/io/local/writer.go +++ b/io/local/writer.go @@ -53,17 +53,17 @@ func NewWriter(ctx context.Context, opts ...Opt) (*Writer, error) { return nil, fmt.Errorf("failed to prepare backup directory: %w", err) } - // if w.isDir { - // // Check if backup dir is empty. - // isEmpty, err := isEmptyDirectory(w.path) - // if err != nil { - // return nil, fmt.Errorf("failed to check if directory is empty: %w", err) - // } - // - // if !isEmpty && !w.isRemovingFiles { - // return nil, fmt.Errorf("backup folder must be empty or set RemoveFiles = true") - // } - // } + if w.isDir && !w.skipDirCheck { + // Check if backup dir is empty. + isEmpty, err := isEmptyDirectory(w.path) + if err != nil { + return nil, fmt.Errorf("failed to check if directory is empty: %w", err) + } + + if !isEmpty && !w.isRemovingFiles { + return nil, fmt.Errorf("backup folder must be empty or set RemoveFiles = true") + } + } // If we want to remove files from backup path. if w.isRemovingFiles { diff --git a/mocks/Encoder_mock.go b/mocks/Encoder_mock.go index 8f352f7a..98b6d1c6 100644 --- a/mocks/Encoder_mock.go +++ b/mocks/Encoder_mock.go @@ -78,17 +78,17 @@ func (_c *MockEncoder_EncodeToken_Call) RunAndReturn(run func(*models.Token) ([] return _c } -// GenerateFilename provides a mock function with given fields: -func (_m *MockEncoder) GenerateFilename() string { - ret := _m.Called() +// GenerateFilename provides a mock function with given fields: prefix +func (_m *MockEncoder) GenerateFilename(prefix string) string { + ret := _m.Called(prefix) if len(ret) == 0 { panic("no return value specified for GenerateFilename") } var r0 string - if rf, ok := ret.Get(0).(func() string); ok { - r0 = rf() + if rf, ok := ret.Get(0).(func(string) string); ok { + r0 = rf(prefix) } else { r0 = ret.Get(0).(string) } @@ -102,13 +102,14 @@ type MockEncoder_GenerateFilename_Call struct { } // GenerateFilename is a helper method to define mock.On call -func (_e *MockEncoder_Expecter) GenerateFilename() *MockEncoder_GenerateFilename_Call { - return &MockEncoder_GenerateFilename_Call{Call: _e.mock.On("GenerateFilename")} +// - prefix string +func (_e *MockEncoder_Expecter) GenerateFilename(prefix interface{}) *MockEncoder_GenerateFilename_Call { + return &MockEncoder_GenerateFilename_Call{Call: _e.mock.On("GenerateFilename", prefix)} } -func (_c *MockEncoder_GenerateFilename_Call) Run(run func()) *MockEncoder_GenerateFilename_Call { +func (_c *MockEncoder_GenerateFilename_Call) Run(run func(prefix string)) *MockEncoder_GenerateFilename_Call { _c.Call.Run(func(args mock.Arguments) { - run() + run(args[0].(string)) }) return _c } @@ -118,7 +119,7 @@ func (_c *MockEncoder_GenerateFilename_Call) Return(_a0 string) *MockEncoder_Gen return _c } -func (_c *MockEncoder_GenerateFilename_Call) RunAndReturn(run func() string) *MockEncoder_GenerateFilename_Call { +func (_c *MockEncoder_GenerateFilename_Call) RunAndReturn(run func(string) string) *MockEncoder_GenerateFilename_Call { _c.Call.Return(run) return _c } diff --git a/mocks/StreamingReader_mock.go b/mocks/StreamingReader_mock.go index 1d0c17f7..d24b1ddd 100644 --- a/mocks/StreamingReader_mock.go +++ b/mocks/StreamingReader_mock.go @@ -67,6 +67,42 @@ func (_c *MockStreamingReader_GetType_Call) RunAndReturn(run func() string) *Moc return _c } +// StreamFile provides a mock function with given fields: ctx, filename, readersCh, errorsCh +func (_m *MockStreamingReader) StreamFile(ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { + _m.Called(ctx, filename, readersCh, errorsCh) +} + +// MockStreamingReader_StreamFile_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'StreamFile' +type MockStreamingReader_StreamFile_Call struct { + *mock.Call +} + +// StreamFile is a helper method to define mock.On call +// - ctx context.Context +// - filename string +// - readersCh chan<- io.ReadCloser +// - errorsCh chan<- error +func (_e *MockStreamingReader_Expecter) StreamFile(ctx interface{}, filename interface{}, readersCh interface{}, errorsCh interface{}) *MockStreamingReader_StreamFile_Call { + return &MockStreamingReader_StreamFile_Call{Call: _e.mock.On("StreamFile", ctx, filename, readersCh, errorsCh)} +} + +func (_c *MockStreamingReader_StreamFile_Call) Run(run func(ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error)) *MockStreamingReader_StreamFile_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(chan<- io.ReadCloser), args[3].(chan<- error)) + }) + return _c +} + +func (_c *MockStreamingReader_StreamFile_Call) Return() *MockStreamingReader_StreamFile_Call { + _c.Call.Return() + return _c +} + +func (_c *MockStreamingReader_StreamFile_Call) RunAndReturn(run func(context.Context, string, chan<- io.ReadCloser, chan<- error)) *MockStreamingReader_StreamFile_Call { + _c.Call.Return(run) + return _c +} + // StreamFiles provides a mock function with given fields: _a0, _a1, _a2 func (_m *MockStreamingReader) StreamFiles(_a0 context.Context, _a1 chan<- io.ReadCloser, _a2 chan<- error) { _m.Called(_a0, _a1, _a2) diff --git a/mocks/Writer_mock.go b/mocks/Writer_mock.go index d83e6ff4..1642431f 100644 --- a/mocks/Writer_mock.go +++ b/mocks/Writer_mock.go @@ -126,6 +126,52 @@ func (_c *MockWriter_NewWriter_Call) RunAndReturn(run func(context.Context, stri return _c } +// RemoveFiles provides a mock function with given fields: ctx +func (_m *MockWriter) RemoveFiles(ctx context.Context) error { + ret := _m.Called(ctx) + + if len(ret) == 0 { + panic("no return value specified for RemoveFiles") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context) error); ok { + r0 = rf(ctx) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockWriter_RemoveFiles_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'RemoveFiles' +type MockWriter_RemoveFiles_Call struct { + *mock.Call +} + +// RemoveFiles is a helper method to define mock.On call +// - ctx context.Context +func (_e *MockWriter_Expecter) RemoveFiles(ctx interface{}) *MockWriter_RemoveFiles_Call { + return &MockWriter_RemoveFiles_Call{Call: _e.mock.On("RemoveFiles", ctx)} +} + +func (_c *MockWriter_RemoveFiles_Call) Run(run func(ctx context.Context)) *MockWriter_RemoveFiles_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *MockWriter_RemoveFiles_Call) Return(_a0 error) *MockWriter_RemoveFiles_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockWriter_RemoveFiles_Call) RunAndReturn(run func(context.Context) error) *MockWriter_RemoveFiles_Call { + _c.Call.Return(run) + return _c +} + // NewMockWriter creates a new instance of MockWriter. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewMockWriter(t interface { diff --git a/models/data_models.go b/models/data_models.go index 500d9836..11f2959f 100644 --- a/models/data_models.go +++ b/models/data_models.go @@ -101,13 +101,12 @@ type Token struct { Record *Record Type TokenType Size uint64 - // Current filter state. Must copy this value. - Filter a.PartitionFilter - FileName string + // Current filter state. + Filter *PartitionFilterSerialized } // NewRecordToken creates a new token with the given record. -func NewRecordToken(r *Record, size uint64, filter a.PartitionFilter) *Token { +func NewRecordToken(r *Record, size uint64, filter *PartitionFilterSerialized) *Token { return &Token{ Record: r, Type: TokenTypeRecord, diff --git a/models/partition_filter_serialized.go b/models/partition_filter_serialized.go new file mode 100644 index 00000000..5a511a92 --- /dev/null +++ b/models/partition_filter_serialized.go @@ -0,0 +1,55 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package models + +import ( + "fmt" + + a "github.com/aerospike/aerospike-client-go/v7" +) + +// PartitionFilterSerialized represent serialized a.PartitionFilter. +// To save cursor state. +type PartitionFilterSerialized struct { + Begin int + Count int + Digest []byte + Cursor []byte +} + +// NewPartitionFilterSerialized serialize *a.PartitionFilter and returns new PartitionFilterSerialized instance. +func NewPartitionFilterSerialized(pf *a.PartitionFilter) (*PartitionFilterSerialized, error) { + c, err := pf.EncodeCursor() + if err != nil { + return nil, fmt.Errorf("failed to encode cursor: %w", err) + } + + return &PartitionFilterSerialized{ + Begin: pf.Begin, + Count: pf.Count, + Digest: pf.Digest, + Cursor: c, + }, nil +} + +// Decode decodes *PartitionFilterSerialized to *a.PartitionFilter +func (p *PartitionFilterSerialized) Decode() (*a.PartitionFilter, error) { + pf := &a.PartitionFilter{Begin: p.Begin, Count: p.Count, Digest: p.Digest} + if err := pf.DecodeCursor(p.Cursor); err != nil { + return nil, fmt.Errorf("failed to decode cursor: %w", err) + } + + return pf, nil +} diff --git a/state.go b/state.go index 2352951f..19aa16d2 100644 --- a/state.go +++ b/state.go @@ -19,12 +19,12 @@ import ( "encoding/gob" "fmt" "io" - "log" "log/slog" "sync" "time" a "github.com/aerospike/aerospike-client-go/v7" + "github.com/aerospike/backup-go/models" ) // State contains current backups status data. @@ -36,9 +36,9 @@ type State struct { // Is used to create prefix for backup files. Counter int // RecordsChan communication channel to save current filter state. - RecordsChan chan recordState + RecordsChan chan *models.PartitionFilterSerialized // RecordStates store states of all filters. - RecordStates map[string]recordState + RecordStates map[string]*models.PartitionFilterSerialized // Mutex for RecordStates operations. // Ordinary mutex is used, because we must not allow any writings when we read state. mu sync.Mutex @@ -53,50 +53,6 @@ type State struct { logger *slog.Logger } -type recordState struct { - Filter filter -} - -// filter contains custom filter struct to save filter to GOB. -type filter struct { - Begin int - Count int - Digest []byte - Cursor []byte -} - -func mapToFilter(pf a.PartitionFilter) (filter, error) { - c, err := pf.EncodeCursor() - if err != nil { - return filter{}, fmt.Errorf("failed to encode cursor: %w", err) - } - return filter{ - Begin: pf.Begin, - Count: pf.Count, - Digest: pf.Digest, - Cursor: c, - }, nil -} - -func mapFromFilter(f filter) (*a.PartitionFilter, error) { - pf := &a.PartitionFilter{Begin: f.Begin, Count: f.Count, Digest: f.Digest} - if err := pf.DecodeCursor(f.Cursor); err != nil { - return nil, fmt.Errorf("failed to decode cursor: %w", err) - } - - return pf, nil -} - -func newRecordState(filter a.PartitionFilter) recordState { - f, err := mapToFilter(filter) - if err != nil { - log.Fatalf("failed to map partition filter: %w", err) - } - return recordState{ - Filter: f, - } -} - // NewState returns new state instance depending on config. // If we continue back up, the state will be loaded from a state file, // if it is the first operation, new state instance will be returned. @@ -116,11 +72,11 @@ func NewState( return nil, err } // change filters in config. - // TODO: may be move it handler, so everyone wil see it. config.PartitionFilters, err = s.loadPartitionFilters() if err != nil { return nil, err } + return s, nil } @@ -134,12 +90,11 @@ func newState( writer Writer, logger *slog.Logger, ) *State { - s := &State{ ctx: ctx, // RecordsChan must not be buffered, so we can stop all operations. - RecordsChan: make(chan recordState), - RecordStates: make(map[string]recordState), + RecordsChan: make(chan *models.PartitionFilterSerialized), + RecordStates: make(map[string]*models.PartitionFilterSerialized), FileName: config.StateFile, DumpDuration: config.StateFileDumpDuration, writer: writer, @@ -175,7 +130,7 @@ func newStateFromFile( s.ctx = ctx s.writer = writer s.logger = logger - s.RecordsChan = make(chan recordState) + s.RecordsChan = make(chan *models.PartitionFilterSerialized) s.Counter++ logger.Debug("loaded state file successfully") @@ -228,6 +183,7 @@ func (s *State) dump() error { } enc := gob.NewEncoder(file) + s.mu.Lock() if err = enc.Encode(s); err != nil { return fmt.Errorf("failed to encode state data: %w", err) @@ -244,8 +200,9 @@ func (s *State) loadPartitionFilters() ([]*a.PartitionFilter, error) { s.mu.Lock() result := make([]*a.PartitionFilter, 0, len(s.RecordStates)) + for _, state := range s.RecordStates { - f, err := mapFromFilter(state.Filter) + f, err := state.Decode() if err != nil { return nil, err } @@ -259,26 +216,23 @@ func (s *State) loadPartitionFilters() ([]*a.PartitionFilter, error) { } func (s *State) serveRecords() { - var counter int + for { select { case <-s.ctx.Done(): return case state := <-s.RecordsChan: + if state == nil { + continue + } + counter++ + s.mu.Lock() - key := fmt.Sprintf("%d%d%s", state.Filter.Begin, state.Filter.Count, state.Filter.Digest) + key := fmt.Sprintf("%d%d%s", state.Begin, state.Count, state.Digest) s.RecordStates[key] = state s.mu.Unlock() - - // For tests: - // ---------- - // if counter == 1000 { - // s.dump() - // fmt.Println("done 4000000") - // os.Exit(1) - // } } } } @@ -294,7 +248,9 @@ func (s *State) getFileSuffix() string { func openFile(ctx context.Context, reader StreamingReader, fileName string) (io.ReadCloser, error) { readCh := make(chan io.ReadCloser) errCh := make(chan error) + go reader.StreamFile(ctx, fileName, readCh, errCh) + for { select { case <-ctx.Done(): diff --git a/state_test.go b/state_test.go index 086c5411..51ee55a2 100644 --- a/state_test.go +++ b/state_test.go @@ -13,56 +13,3 @@ // limitations under the License. package backup - -import ( - "context" - "log/slog" - "os" - "path/filepath" - "testing" - "time" - - a "github.com/aerospike/aerospike-client-go/v7" - "github.com/stretchr/testify/require" -) - -const ( - testDuration = 1 * time.Second -) - -func TestState(t *testing.T) { - t.Parallel() - - ctx, cancel := context.WithCancel(context.Background()) - tempFile := filepath.Join(t.TempDir(), "state_test.gob") - pfs := []*a.PartitionFilter{ - NewPartitionFilterByID(1), - NewPartitionFilterByID(2), - } - logger := slog.New(slog.NewTextHandler(nil, nil)) - - // Check init. - state := NewState(ctx, tempFile, testDuration, pfs, logger) - - time.Sleep(testDuration * 3) - - require.NotZero(t, state.SavedAt) - cancel() - - // Check that file exists. - _, err := os.Stat(tempFile) - require.NoError(t, err) - - // Nullify the link. - pfs = nil - result := []*a.PartitionFilter{ - NewPartitionFilterByID(1), - NewPartitionFilterByID(2), - } - - // Check restore. - newCtx := context.Background() - newState, err := NewStateFromFile(newCtx, tempFile, logger) - require.NoError(t, err) - require.Equal(t, newState.PartitionFilters, result) -} diff --git a/tests/integration/integration_test.go b/tests/integration/integration_test.go index d87b41a6..5934e391 100644 --- a/tests/integration/integration_test.go +++ b/tests/integration/integration_test.go @@ -238,6 +238,7 @@ func runBackupRestore(suite *backupRestoreTestSuite, backupConfig *backup.Backup ctx, backupConfig, &dst, + nil, ) suite.Nil(err) suite.NotNil(bh) @@ -384,6 +385,7 @@ func runBackupRestoreDirectory(suite *backupRestoreTestSuite, ctx, backupConfig, writers, + nil, ) suite.Nil(err) suite.NotNil(bh) @@ -463,7 +465,7 @@ func (suite *backupRestoreTestSuite) TestRestoreExpiredRecords() { VoidTime: 1, } - token := models.NewRecordToken(modelRec, 0) + token := models.NewRecordToken(modelRec, 0, nil) v, err := encoder.EncodeToken(token) if err != nil { suite.FailNow(err.Error()) @@ -556,6 +558,7 @@ func (suite *backupRestoreTestSuite) TestBackupRestoreIOWithPartitions() { ctx, backupConfig, writers, + nil, ) suite.Nil(err) suite.NotNil(bh) @@ -595,6 +598,7 @@ func (suite *backupRestoreTestSuite) TestBackupContext() { ctx, backup.NewDefaultBackupConfig(), &writer, + nil, ) suite.NotNil(bh) suite.Nil(err) @@ -746,6 +750,7 @@ func (suite *backupRestoreTestSuite) TestBackupParallelNodes() { ctx, bCfg, &dst, + nil, ) suite.NotNil(bh) suite.Nil(err) @@ -763,6 +768,7 @@ func (suite *backupRestoreTestSuite) TestBackupParallelNodesList() { ctx, bCfg, &dst, + nil, ) suite.NotNil(bh) suite.Nil(err) @@ -792,6 +798,7 @@ func (suite *backupRestoreTestSuite) TestBackupPartitionList() { ctx, bCfg, &dst, + nil, ) suite.NotNil(bh) suite.Nil(err) @@ -1081,6 +1088,7 @@ func (suite *backupRestoreTestSuite) TestBackupAfterDigestOk() { ctx, backupConfig, &dst, + nil, ) suite.Nil(err) suite.NotNil(bh) @@ -1113,6 +1121,12 @@ func (b *byteReadWriterFactory) StreamFiles(_ context.Context, readersCh chan<- close(readersCh) } +func (b *byteReadWriterFactory) StreamFile(_ context.Context, _ string, readersCh chan<- io.ReadCloser, _ chan<- error) { + reader := io.NopCloser(bytes.NewReader(b.buffer.Bytes())) + readersCh <- reader + close(readersCh) +} + func (b *byteReadWriterFactory) OpenFile(_ context.Context, _ string, readersCh chan<- io.ReadCloser, _ chan<- error) { reader := io.NopCloser(bytes.NewReader(b.buffer.Bytes())) readersCh <- reader diff --git a/writers.go b/writers.go index 9d51fc6b..c8fd7f89 100644 --- a/writers.go +++ b/writers.go @@ -88,11 +88,16 @@ type tokenWriter struct { encoder Encoder output io.Writer logger *slog.Logger - stateChan chan<- recordState + stateChan chan<- *models.PartitionFilterSerialized } // newTokenWriter creates a new tokenWriter. -func newTokenWriter(encoder Encoder, output io.Writer, logger *slog.Logger, stateChan chan<- recordState) *tokenWriter { +func newTokenWriter( + encoder Encoder, + output io.Writer, + logger *slog.Logger, + stateChan chan<- *models.PartitionFilterSerialized, +) *tokenWriter { id := uuid.NewString() logger = logging.WithWriter(logger, id, logging.WriterTypeToken) logger.Debug("created new token writer") @@ -112,7 +117,7 @@ func (w *tokenWriter) Write(v *models.Token) (int, error) { return 0, fmt.Errorf("error encoding token: %w", err) } - w.stateChan <- newRecordState(v.Filter) + w.stateChan <- v.Filter return w.output.Write(data) } diff --git a/writers_test.go b/writers_test.go index 4dcce0b3..4d78ef23 100644 --- a/writers_test.go +++ b/writers_test.go @@ -49,7 +49,7 @@ func (suite *writersTestSuite) TestTokenWriter() { }, }, } - recToken := models.NewRecordToken(expRecord, 0) + recToken := models.NewRecordToken(expRecord, 0, nil) expUDF := &models.UDF{ Name: "udf", @@ -70,7 +70,7 @@ func (suite *writersTestSuite) TestTokenWriter() { mockEncoder.EXPECT().EncodeToken(invalidToken).Return(nil, errors.New("error")) dst := bytes.Buffer{} - writer := newTokenWriter(mockEncoder, &dst, slog.Default()) + writer := newTokenWriter(mockEncoder, &dst, slog.Default(), nil) suite.NotNil(writer) _, err := writer.Write(recToken) @@ -92,7 +92,7 @@ func (suite *writersTestSuite) TestTokenWriter() { failRec := &models.Record{ Record: &a.Record{}, } - failRecToken := models.NewRecordToken(failRec, 0) + failRecToken := models.NewRecordToken(failRec, 0, nil) mockEncoder.EXPECT().EncodeToken(failRecToken).Return(nil, errors.New("error")) _, err = writer.Write(failRecToken) suite.NotNil(err) @@ -103,7 +103,7 @@ func (suite *writersTestSuite) TestTokenWriter() { func (suite *writersTestSuite) TestTokenStatsWriter() { mockWriter := pipemocks.NewMockDataWriter[*models.Token](suite.T()) - mockWriter.EXPECT().Write(models.NewRecordToken(&models.Record{}, 0)).Return(1, nil) + mockWriter.EXPECT().Write(models.NewRecordToken(&models.Record{}, 0, nil)).Return(1, nil) mockWriter.EXPECT().Write(models.NewSIndexToken(&models.SIndex{}, 0)).Return(1, nil) mockWriter.EXPECT().Write(models.NewUDFToken(&models.UDF{}, 0)).Return(1, nil) mockWriter.EXPECT().Write(&models.Token{Type: models.TokenTypeInvalid}).Return(0, errors.New("error")) @@ -116,7 +116,7 @@ func (suite *writersTestSuite) TestTokenStatsWriter() { writer := newWriterWithTokenStats(mockWriter, mockStats, slog.Default()) suite.NotNil(writer) - _, err := writer.Write(models.NewRecordToken(&models.Record{}, 0)) + _, err := writer.Write(models.NewRecordToken(&models.Record{}, 0, nil)) suite.Nil(err) _, err = writer.Write(models.NewSIndexToken(&models.SIndex{}, 0)) From edfcae25109a7bce721fcbc63c39e7e033e55ad8 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Thu, 10 Oct 2024 16:25:00 +0300 Subject: [PATCH 06/25] FMWK-570-backup-restore-state - added nil checks --- handler_backup.go | 25 +++++++++++++++++++++---- io/aerospike/record_reader.go | 15 ++++++++++----- io/aws/s3/options.go | 10 ++++++++++ io/aws/s3/writer.go | 2 +- io/azure/blob/options.go | 10 ++++++++++ io/azure/blob/writer.go | 2 +- io/gcp/storage/options.go | 10 ++++++++++ io/gcp/storage/writer.go | 2 +- writers.go | 4 +++- 9 files changed, 67 insertions(+), 13 deletions(-) diff --git a/handler_backup.go b/handler_backup.go index 657f275d..fc321f12 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -269,7 +269,11 @@ func (bh *BackupHandler) makeWriteWorkers( writeWorkers := make([]pipeline.Worker[*models.Token], len(backupWriters)) for i, w := range backupWriters { - var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsChan) + var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger, nil) + if bh.state != nil { + dataWriter = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsChan) + } + dataWriter = newWriterWithTokenStats(dataWriter, &bh.stats, bh.logger) writeWorkers[i] = pipeline.NewWriteWorker(dataWriter, bh.limiter) } @@ -313,7 +317,11 @@ func (bh *BackupHandler) newWriter(ctx context.Context) (io.WriteCloser, error) } func (bh *BackupHandler) newConfiguredWriter(ctx context.Context) (io.WriteCloser, error) { - filename := bh.encoder.GenerateFilename(bh.state.getFileSuffix()) + suffix := "" + if bh.state != nil { + suffix = bh.state.getFileSuffix() + } + filename := bh.encoder.GenerateFilename(suffix) storageWriter, err := bh.writer.NewWriter(ctx, filename) if err != nil { @@ -437,7 +445,11 @@ func (bh *BackupHandler) backupSIndexes( reader := aerospike.NewSIndexReader(bh.infoClient, bh.config.Namespace, bh.logger) sindexReadWorker := pipeline.NewReadWorker[*models.Token](reader) - sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) + sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil)) + if bh.state != nil { + sindexWriter = pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) + } + sindexWriter = newWriterWithTokenStats(sindexWriter, &bh.stats, bh.logger) sindexWriteWorker := pipeline.NewWriteWorker(sindexWriter, bh.limiter) @@ -456,7 +468,12 @@ func (bh *BackupHandler) backupUDFs( reader := aerospike.NewUDFReader(bh.infoClient, bh.logger) udfReadWorker := pipeline.NewReadWorker[*models.Token](reader) - udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) + udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil)) + + if bh.state != nil { + udfWriter = pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) + } + udfWriter = newWriterWithTokenStats(udfWriter, &bh.stats, bh.logger) udfWriteWorker := pipeline.NewWriteWorker(udfWriter, bh.limiter) diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index 36084780..06ea837a 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -142,12 +142,17 @@ func (r *RecordReader) Read() (*models.Token, error) { Record: res.Record, } - pfs, err := models.NewPartitionFilterSerialized(r.config.partitionFilter) - if err != nil { - return nil, fmt.Errorf("failed to serialize partition filter: %w", err) - } + recToken := models.NewRecordToken(&rec, 0, nil) - recToken := models.NewRecordToken(&rec, 0, pfs) + // For indexes and udf, partition filter will be nil. + if r.config.partitionFilter != nil { + pfs, err := models.NewPartitionFilterSerialized(r.config.partitionFilter) + if err != nil { + return nil, fmt.Errorf("failed to serialize partition filter: %w", err) + } + + recToken = models.NewRecordToken(&rec, 0, pfs) + } return recToken, nil } diff --git a/io/aws/s3/options.go b/io/aws/s3/options.go index bd956914..153e3146 100644 --- a/io/aws/s3/options.go +++ b/io/aws/s3/options.go @@ -30,6 +30,8 @@ type options struct { // startAfter is where you want Amazon S3 to start listing from. Amazon S3 starts // listing after this specified key. StartAfter can be any key in the bucket. startAfter string + // skipDirCheck if true, backup directory won't be checked. + skipDirCheck bool } type Opt func(*options) @@ -80,3 +82,11 @@ func WithStartAfter(v string) Opt { r.startAfter = v } } + +// WithSkipDirCheck adds skip dir check flags. +// Which means that backup directory won't be checked for emptiness. +func WithSkipDirCheck() Opt { + return func(r *options) { + r.skipDirCheck = true + } +} diff --git a/io/aws/s3/writer.go b/io/aws/s3/writer.go index f14069fd..855b5240 100644 --- a/io/aws/s3/writer.go +++ b/io/aws/s3/writer.go @@ -90,7 +90,7 @@ func NewWriter( return nil, fmt.Errorf("bucket does not exist or you don't have access: %w", err) } - if w.isDir { + if w.isDir && !w.skipDirCheck { // Check if backup dir is empty. isEmpty, err := isEmptyDirectory(ctx, client, bucketName, w.prefix) if err != nil { diff --git a/io/azure/blob/options.go b/io/azure/blob/options.go index ae7bcca3..67fa8cc6 100644 --- a/io/azure/blob/options.go +++ b/io/azure/blob/options.go @@ -38,6 +38,8 @@ type options struct { // as the value for the marker parameter in a subsequent call to request the next // page of list items. The marker value is opaque to the client. marker string + // skipDirCheck if true, backup directory won't be checked. + skipDirCheck bool } type Opt func(*options) @@ -98,3 +100,11 @@ func WithMarker(v string) Opt { r.marker = v } } + +// WithSkipDirCheck adds skip dir check flags. +// Which means that backup directory won't be checked for emptiness. +func WithSkipDirCheck() Opt { + return func(r *options) { + r.skipDirCheck = true + } +} diff --git a/io/azure/blob/writer.go b/io/azure/blob/writer.go index 15f6421d..3976464a 100644 --- a/io/azure/blob/writer.go +++ b/io/azure/blob/writer.go @@ -80,7 +80,7 @@ func NewWriter( return nil, fmt.Errorf("unable to get container properties: %w", err) } - if w.isDir { + if w.isDir && !w.skipDirCheck { // Check if backup dir is empty. isEmpty, err := isEmptyDirectory(ctx, client, containerName, prefix) if err != nil { diff --git a/io/gcp/storage/options.go b/io/gcp/storage/options.go index f55d71e2..b6b54c87 100644 --- a/io/gcp/storage/options.go +++ b/io/gcp/storage/options.go @@ -30,6 +30,8 @@ type options struct { // startOffset is used to filter results to objects whose names are // lexicographically equal to or after startOffset. startOffset string + // skipDirCheck if true, backup directory won't be checked. + skipDirCheck bool } type Opt func(*options) @@ -82,3 +84,11 @@ func WithStartOffset(v string) Opt { r.startOffset = v } } + +// WithSkipDirCheck adds skip dir check flags. +// Which means that backup directory won't be checked for emptiness. +func WithSkipDirCheck() Opt { + return func(r *options) { + r.skipDirCheck = true + } +} diff --git a/io/gcp/storage/writer.go b/io/gcp/storage/writer.go index 776d1168..27cc19d2 100644 --- a/io/gcp/storage/writer.go +++ b/io/gcp/storage/writer.go @@ -80,7 +80,7 @@ func NewWriter( return nil, fmt.Errorf("failed to get bucketHandler %s attr: %w", bucketName, err) } - if w.isDir { + if w.isDir && !w.skipDirCheck { // Check if backup dir is empty. isEmpty, err := isEmptyDirectory(ctx, bucketHandler, prefix) if err != nil { diff --git a/writers.go b/writers.go index c8fd7f89..cec668ff 100644 --- a/writers.go +++ b/writers.go @@ -117,7 +117,9 @@ func (w *tokenWriter) Write(v *models.Token) (int, error) { return 0, fmt.Errorf("error encoding token: %w", err) } - w.stateChan <- v.Filter + if w.stateChan != nil { + w.stateChan <- v.Filter + } return w.output.Write(data) } From 81d47743337fc6acd62107deb264fb0d80522b64 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 13 Oct 2024 18:14:33 +0300 Subject: [PATCH 07/25] FMWK-570-backup-restore-state - tersting --- config_backup.go | 2 +- handler_backup.go | 9 ++-- handler_backup_records.go | 13 +++-- handler_restore.go | 14 ++++-- io/aerospike/record_reader.go | 36 ++++++++------ io/aerospike/result_sets.go | 48 ++++++++++++++++++- io/encoding/asb/decode.go | 2 +- models/data_models.go | 4 +- models/partition_filter_serialized.go | 10 ++-- pipeline/pipeline.go | 68 ++++++++++++++++++++------- state.go | 12 ++--- writers.go | 4 +- 12 files changed, 162 insertions(+), 60 deletions(-) diff --git a/config_backup.go b/config_backup.go index 861fb0cc..2678770d 100644 --- a/config_backup.go +++ b/config_backup.go @@ -149,7 +149,7 @@ func (c *BackupConfig) isStateFirstRun() bool { // isStateContinueRun checks if we continue backup from a state file. func (c *BackupConfig) isStateContinue() bool { - return c.StateFile != "" && !c.Continue + return c.StateFile != "" && c.Continue } func (c *BackupConfig) isFullBackup() bool { diff --git a/handler_backup.go b/handler_backup.go index fc321f12..796100c0 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -185,9 +185,9 @@ func (bh *BackupHandler) getEstimateSamples(ctx context.Context, recordsNumber i scanPolicy.RawCDT = true nodes := bh.aerospikeClient.GetNodes() - handler := newBackupRecordsHandler(bh.config, bh.aerospikeClient, bh.logger, bh.scanLimiter) + handler := newBackupRecordsHandler(bh.config, bh.aerospikeClient, bh.logger, bh.scanLimiter, bh.state) readerConfig := handler.recordReaderConfigForNode(nodes, &scanPolicy) - recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger) + recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger, bh.state.RecordsChan) // Timestamp processor. tsProcessor := processors.NewVoidTimeSetter(bh.logger) @@ -245,7 +245,7 @@ func (bh *BackupHandler) backupSync(ctx context.Context) error { writeWorkers := bh.makeWriteWorkers(backupWriters) - handler := newBackupRecordsHandler(bh.config, bh.aerospikeClient, bh.logger, bh.scanLimiter) + handler := newBackupRecordsHandler(bh.config, bh.aerospikeClient, bh.logger, bh.scanLimiter, bh.state) bh.stats.TotalRecords, err = handler.countRecords(ctx, bh.infoClient) if err != nil { @@ -321,6 +321,7 @@ func (bh *BackupHandler) newConfiguredWriter(ctx context.Context) (io.WriteClose if bh.state != nil { suffix = bh.state.getFileSuffix() } + filename := bh.encoder.GenerateFilename(suffix) storageWriter, err := bh.writer.NewWriter(ctx, filename) @@ -454,6 +455,7 @@ func (bh *BackupHandler) backupSIndexes( sindexWriteWorker := pipeline.NewWriteWorker(sindexWriter, bh.limiter) sindexPipeline := pipeline.NewPipeline[*models.Token]( + true, []pipeline.Worker[*models.Token]{sindexReadWorker}, []pipeline.Worker[*models.Token]{sindexWriteWorker}, ) @@ -478,6 +480,7 @@ func (bh *BackupHandler) backupUDFs( udfWriteWorker := pipeline.NewWriteWorker(udfWriter, bh.limiter) udfPipeline := pipeline.NewPipeline[*models.Token]( + true, []pipeline.Worker[*models.Token]{udfReadWorker}, []pipeline.Worker[*models.Token]{udfWriteWorker}, ) diff --git a/handler_backup_records.go b/handler_backup_records.go index f8523181..73a29ee1 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -37,6 +37,7 @@ type backupRecordsHandler struct { aerospikeClient AerospikeClient logger *slog.Logger scanLimiter *semaphore.Weighted + state *State } func newBackupRecordsHandler( @@ -44,6 +45,7 @@ func newBackupRecordsHandler( ac AerospikeClient, logger *slog.Logger, scanLimiter *semaphore.Weighted, + state *State, ) *backupRecordsHandler { logger.Debug("created new backup records handler") @@ -52,6 +54,7 @@ func newBackupRecordsHandler( aerospikeClient: ac, logger: logger, scanLimiter: scanLimiter, + state: state, } return h @@ -72,9 +75,9 @@ func (bh *backupRecordsHandler) run( processors.NewVoidTimeSetter(bh.logger), processors.NewTPSLimiter[*models.Token]( ctx, bh.config.RecordsPerSecond), - )) + ), bh.config.ParallelRead) - return pipeline.NewPipeline(readWorkers, composeProcessor, writers).Run(ctx) + return pipeline.NewPipeline(true, readWorkers, composeProcessor, writers).Run(ctx) } func (bh *backupRecordsHandler) countRecords(ctx context.Context, infoClient *asinfo.InfoClient) (uint64, error) { @@ -120,7 +123,7 @@ func (bh *backupRecordsHandler) countRecordsUsingScanByPartitions(ctx context.Co // with this filter. pf := *bh.config.PartitionFilters[j] readerConfig := bh.recordReaderConfigForPartitions(&pf, scanPolicy) - recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger) + recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger, nil) for { if _, err := recordReader.Read(); err != nil { @@ -162,7 +165,7 @@ func (bh *backupRecordsHandler) countRecordsUsingScanByNodes(ctx context.Context var count uint64 readerConfig := bh.recordReaderConfigForNode(nodes, scanPolicy) - recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger) + recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger, nil) for { if _, err := recordReader.Read(); err != nil { @@ -223,6 +226,7 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( bh.aerospikeClient, recordReaderConfig, bh.logger, + bh.state.RecordsChan, ) readWorkers[i] = pipeline.NewReadWorker[*models.Token](recordReader) @@ -263,6 +267,7 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForNodes( bh.aerospikeClient, recordReaderConfig, bh.logger, + bh.state.RecordsChan, ) readWorkers[i] = pipeline.NewReadWorker[*models.Token](recordReader) diff --git a/handler_restore.go b/handler_restore.go index c2febe84..601cdc1a 100644 --- a/handler_restore.go +++ b/handler_restore.go @@ -229,9 +229,9 @@ func (rh *RestoreHandler) runRestorePipeline(ctx context.Context, readers []pipe processors.NewChangeNamespace(nsSource, nsDest), processors.NewExpirationSetter(&rh.stats.RecordsExpired, rh.config.ExtraTTL, rh.logger), processors.NewTPSLimiter[*models.Token](ctx, rh.config.RecordsPerSecond), - )) + ), rh.config.Parallel) - return pipeline.NewPipeline(readers, composeProcessor, writeWorkers).Run(ctx) + return pipeline.NewPipeline(true, readers, composeProcessor, writeWorkers).Run(ctx) } func (rh *RestoreHandler) useBatchWrites() (bool, error) { @@ -244,7 +244,15 @@ func (rh *RestoreHandler) useBatchWrites() (bool, error) { return infoClient.SupportsBatchWrite() } -func newTokenWorker(processor processors.TokenProcessor) []pipeline.Worker[*models.Token] { +func newTokenWorker(processor processors.TokenProcessor, parallel int) []pipeline.Worker[*models.Token] { + if parallel > 0 { + workers := make([]pipeline.Worker[*models.Token], 0, parallel) + for i := 0; i < parallel; i++ { + workers = append(workers, pipeline.NewProcessorWorker(processor)) + } + return workers + } + return []pipeline.Worker[*models.Token]{ pipeline.NewProcessorWorker(processor), } diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index 06ea837a..c28d0895 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -95,6 +95,7 @@ type RecordReader struct { logger *slog.Logger config *RecordReaderConfig scanResult *recordSets // initialized on first Read() call + stateChan chan<- models.PartitionFilterSerialized } // NewRecordReader creates a new RecordReader. @@ -103,16 +104,18 @@ func NewRecordReader( client scanner, cfg *RecordReaderConfig, logger *slog.Logger, + stateChan chan<- models.PartitionFilterSerialized, ) *RecordReader { id := uuid.NewString() logger = logging.WithReader(logger, id, logging.ReaderTypeRecord) logger.Debug("created new aerospike record reader") return &RecordReader{ - ctx: ctx, - config: cfg, - client: client, - logger: logger, + ctx: ctx, + config: cfg, + client: client, + logger: logger, + stateChan: stateChan, } } @@ -127,6 +130,19 @@ func (r *RecordReader) Read() (*models.Token, error) { r.scanResult = scan } + var ( + pfs models.PartitionFilterSerialized + err error + ) + // For indexes and udf, partition filter will be nil. + if r.config.partitionFilter != nil && r.stateChan != nil { + pfs, err = models.NewPartitionFilterSerialized(r.config.partitionFilter) + if err != nil { + return nil, fmt.Errorf("failed to serialize partition filter: %w", err) + } + // r.stateChan <- pfs + } + res, active := <-r.scanResult.Results() if !active { r.logger.Debug("scan finished") @@ -142,17 +158,7 @@ func (r *RecordReader) Read() (*models.Token, error) { Record: res.Record, } - recToken := models.NewRecordToken(&rec, 0, nil) - - // For indexes and udf, partition filter will be nil. - if r.config.partitionFilter != nil { - pfs, err := models.NewPartitionFilterSerialized(r.config.partitionFilter) - if err != nil { - return nil, fmt.Errorf("failed to serialize partition filter: %w", err) - } - - recToken = models.NewRecordToken(&rec, 0, pfs) - } + recToken := models.NewRecordToken(&rec, 0, pfs) return recToken, nil } diff --git a/io/aerospike/result_sets.go b/io/aerospike/result_sets.go index 69a5ccc7..ac42d68b 100644 --- a/io/aerospike/result_sets.go +++ b/io/aerospike/result_sets.go @@ -16,18 +16,29 @@ package aerospike import ( "log/slog" + "sync" a "github.com/aerospike/aerospike-client-go/v7" "github.com/aerospike/backup-go/internal/util" + "github.com/aerospike/backup-go/models" ) // recordSets contains multiple Aerospike Recordset objects. type recordSets struct { - resultsChannel <-chan *a.Result + resultsChannel <-chan *customRecord logger *slog.Logger data []*a.Recordset } +type customRecord struct { + result *a.Result + filter models.PartitionFilterSerialized +} + +func newCustomRecord(result *a.Result, filter *a.) *customRecord { + +} + func newRecordSets(data []*a.Recordset, logger *slog.Logger) *recordSets { resultChannels := make([]<-chan *a.Result, 0, len(data)) for _, recSet := range data { @@ -52,6 +63,39 @@ func (r *recordSets) Close() { } // Results returns the results channel of the recordSets. -func (r *recordSets) Results() <-chan *a.Result { +func (r *recordSets) Results() <-chan *customRecord { return r.resultsChannel } + +func MergeResultSets(channels []<-chan *a.Result) <-chan *a.PartitionFilter { + out := make(chan *a.PartitionFilter) + + if len(channels) == 0 { + close(out) + return out + } + + var wg sync.WaitGroup + // Run an output goroutine for each input channel. + output := func(c <-chan *a.Result) { + for n := range c { + out <- n + } + + wg.Done() + } + + wg.Add(len(channels)) + + for _, c := range channels { + go output(c) + } + + // Run a goroutine to close out once all the output goroutines are done. + go func() { + wg.Wait() + close(out) + }() + + return out +} diff --git a/io/encoding/asb/decode.go b/io/encoding/asb/decode.go index af8a2c5e..e557bf80 100644 --- a/io/encoding/asb/decode.go +++ b/io/encoding/asb/decode.go @@ -161,7 +161,7 @@ func (r *Decoder) NextToken() (*models.Token, error) { case *models.UDF: return models.NewUDFToken(v, size), nil case *models.Record: - return models.NewRecordToken(v, size, nil), nil + return models.NewRecordToken(v, size, models.PartitionFilterSerialized{}), nil default: return nil, fmt.Errorf("unsupported token type %T", v) } diff --git a/models/data_models.go b/models/data_models.go index 11f2959f..a2100e8d 100644 --- a/models/data_models.go +++ b/models/data_models.go @@ -102,11 +102,11 @@ type Token struct { Type TokenType Size uint64 // Current filter state. - Filter *PartitionFilterSerialized + Filter PartitionFilterSerialized } // NewRecordToken creates a new token with the given record. -func NewRecordToken(r *Record, size uint64, filter *PartitionFilterSerialized) *Token { +func NewRecordToken(r *Record, size uint64, filter PartitionFilterSerialized) *Token { return &Token{ Record: r, Type: TokenTypeRecord, diff --git a/models/partition_filter_serialized.go b/models/partition_filter_serialized.go index 5a511a92..a9d67a3a 100644 --- a/models/partition_filter_serialized.go +++ b/models/partition_filter_serialized.go @@ -30,13 +30,17 @@ type PartitionFilterSerialized struct { } // NewPartitionFilterSerialized serialize *a.PartitionFilter and returns new PartitionFilterSerialized instance. -func NewPartitionFilterSerialized(pf *a.PartitionFilter) (*PartitionFilterSerialized, error) { +func NewPartitionFilterSerialized(pf *a.PartitionFilter) (PartitionFilterSerialized, error) { + if pf == nil || pf.IsDone() { + return PartitionFilterSerialized{}, nil + } + c, err := pf.EncodeCursor() if err != nil { - return nil, fmt.Errorf("failed to encode cursor: %w", err) + return PartitionFilterSerialized{}, fmt.Errorf("failed to encode cursor: %w", err) } - return &PartitionFilterSerialized{ + return PartitionFilterSerialized{ Begin: pf.Begin, Count: pf.Count, Digest: pf.Digest, diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index eb1ecbe4..2214ce57 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -42,14 +42,17 @@ type Pipeline[T any] struct { receive <-chan T send chan<- T stages []*stage[T] + // For synced pipeline we must create same number of workers for each stage. + // Then we will initialize communication channels strait from worker to worker through stages. + isSynced bool } var _ Worker[any] = (*Pipeline[any])(nil) -const channelSize = 256 +// const channelSize = 1 // NewPipeline creates a new DataPipeline. -func NewPipeline[T any](workGroups ...[]Worker[T]) *Pipeline[T] { +func NewPipeline[T any](isSynced bool, workGroups ...[]Worker[T]) *Pipeline[T] { stages := make([]*stage[T], len(workGroups)) for i, workers := range workGroups { @@ -57,7 +60,8 @@ func NewPipeline[T any](workGroups ...[]Worker[T]) *Pipeline[T] { } return &Pipeline[T]{ - stages: stages, + stages: stages, + isSynced: isSynced, } } @@ -90,21 +94,46 @@ func (dp *Pipeline[T]) Run(ctx context.Context) error { errors := make(chan error, len(dp.stages)) - var lastSend chan T + var ( + lastSend []<-chan T + emptySendChans []chan<- T + emptyReceiveChans []<-chan T + ) for _, s := range dp.stages { - send := make(chan T, channelSize) - s.SetSendChan(send) + sendChans := make([]chan<- T, 0, len(s.workers)) + receiveChans := make([]<-chan T, 0, len(s.workers)) + + emptySendChans = make([]chan<- T, 0, len(s.workers)) + emptyReceiveChans = make([]<-chan T, 0, len(s.workers)) + + if dp.isSynced { + for i := 0; i < len(s.workers); i++ { + send := make(chan T) + sendChans = append(sendChans, send) + receiveChans = append(receiveChans, send) + + empty := make(chan T) + emptySendChans = append(emptySendChans, empty) + emptyReceiveChans = append(emptyReceiveChans, empty) + } + } else { + send := make(chan T) + sendChans = append(sendChans, send) + receiveChans = append(receiveChans, send) + } + + s.SetSendChan(sendChans) s.SetReceiveChan(lastSend) - lastSend = send + lastSend = receiveChans } // set the receive and send channels for first // and last stages to the pipeline's receive and send channels - dp.stages[0].SetReceiveChan(dp.receive) - dp.stages[len(dp.stages)-1].SetSendChan(dp.send) + dp.stages[0].SetReceiveChan(emptyReceiveChans) + dp.stages[len(dp.stages)-1].SetSendChan(emptySendChans) wg := &sync.WaitGroup{} for _, s := range dp.stages { @@ -133,16 +162,16 @@ func (dp *Pipeline[T]) Run(ctx context.Context) error { } type stage[T any] struct { - receive <-chan T - send chan<- T + receive []<-chan T + send []chan<- T workers []Worker[T] } -func (s *stage[T]) SetReceiveChan(c <-chan T) { +func (s *stage[T]) SetReceiveChan(c []<-chan T) { s.receive = c } -func (s *stage[T]) SetSendChan(c chan<- T) { +func (s *stage[T]) SetSendChan(c []chan<- T) { s.send = c } @@ -159,9 +188,9 @@ func (s *stage[T]) Run(ctx context.Context) error { return nil } - for _, w := range s.workers { - w.SetReceiveChan(s.receive) - w.SetSendChan(s.send) + for i, w := range s.workers { + w.SetReceiveChan(s.receive[i]) + w.SetSendChan(s.send[i]) } ctx, cancel := context.WithCancel(ctx) @@ -187,8 +216,11 @@ func (s *stage[T]) Run(ctx context.Context) error { wg.Wait() - if s.send != nil { - close(s.send) + for i := range s.send { + if s.send[i] != nil { + close(s.send[i]) + } + } close(errors) diff --git a/state.go b/state.go index 19aa16d2..99388f59 100644 --- a/state.go +++ b/state.go @@ -36,9 +36,9 @@ type State struct { // Is used to create prefix for backup files. Counter int // RecordsChan communication channel to save current filter state. - RecordsChan chan *models.PartitionFilterSerialized + RecordsChan chan models.PartitionFilterSerialized // RecordStates store states of all filters. - RecordStates map[string]*models.PartitionFilterSerialized + RecordStates map[string]models.PartitionFilterSerialized // Mutex for RecordStates operations. // Ordinary mutex is used, because we must not allow any writings when we read state. mu sync.Mutex @@ -93,8 +93,8 @@ func newState( s := &State{ ctx: ctx, // RecordsChan must not be buffered, so we can stop all operations. - RecordsChan: make(chan *models.PartitionFilterSerialized), - RecordStates: make(map[string]*models.PartitionFilterSerialized), + RecordsChan: make(chan models.PartitionFilterSerialized), + RecordStates: make(map[string]models.PartitionFilterSerialized), FileName: config.StateFile, DumpDuration: config.StateFileDumpDuration, writer: writer, @@ -130,7 +130,7 @@ func newStateFromFile( s.ctx = ctx s.writer = writer s.logger = logger - s.RecordsChan = make(chan *models.PartitionFilterSerialized) + s.RecordsChan = make(chan models.PartitionFilterSerialized) s.Counter++ logger.Debug("loaded state file successfully") @@ -223,7 +223,7 @@ func (s *State) serveRecords() { case <-s.ctx.Done(): return case state := <-s.RecordsChan: - if state == nil { + if state.Begin == 0 && state.Count == 0 && state.Digest == nil { continue } diff --git a/writers.go b/writers.go index cec668ff..d3df952a 100644 --- a/writers.go +++ b/writers.go @@ -88,7 +88,7 @@ type tokenWriter struct { encoder Encoder output io.Writer logger *slog.Logger - stateChan chan<- *models.PartitionFilterSerialized + stateChan chan<- models.PartitionFilterSerialized } // newTokenWriter creates a new tokenWriter. @@ -96,7 +96,7 @@ func newTokenWriter( encoder Encoder, output io.Writer, logger *slog.Logger, - stateChan chan<- *models.PartitionFilterSerialized, + stateChan chan<- models.PartitionFilterSerialized, ) *tokenWriter { id := uuid.NewString() logger = logging.WithWriter(logger, id, logging.WriterTypeToken) From 8dcf706c78294350c42279ff65b1cbabbf934951 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Mon, 14 Oct 2024 14:34:39 +0300 Subject: [PATCH 08/25] not working --- handler_backup.go | 2 +- handler_backup_records.go | 8 +- io/aerospike/custom_record_reader.go | 149 ++++++++++++++++++ io/aerospike/custom_record_set.go | 59 +++++++ io/aerospike/record_reader.go | 45 +++--- .../{result_sets.go => record_sets.go} | 48 +----- io/aerospike/sindex_reader.go | 4 + io/aerospike/udf_reader.go | 4 + io/encoding/asb/decode.go | 2 +- models/data_models.go | 6 +- pipeline/read_worker.go | 3 +- token_reader.go | 4 + writers.go | 2 +- 13 files changed, 254 insertions(+), 82 deletions(-) create mode 100644 io/aerospike/custom_record_reader.go create mode 100644 io/aerospike/custom_record_set.go rename io/aerospike/{result_sets.go => record_sets.go} (65%) diff --git a/handler_backup.go b/handler_backup.go index 796100c0..06a547c4 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -187,7 +187,7 @@ func (bh *BackupHandler) getEstimateSamples(ctx context.Context, recordsNumber i nodes := bh.aerospikeClient.GetNodes() handler := newBackupRecordsHandler(bh.config, bh.aerospikeClient, bh.logger, bh.scanLimiter, bh.state) readerConfig := handler.recordReaderConfigForNode(nodes, &scanPolicy) - recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger, bh.state.RecordsChan) + recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger) // Timestamp processor. tsProcessor := processors.NewVoidTimeSetter(bh.logger) diff --git a/handler_backup_records.go b/handler_backup_records.go index 73a29ee1..5efb3f6a 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -123,7 +123,7 @@ func (bh *backupRecordsHandler) countRecordsUsingScanByPartitions(ctx context.Co // with this filter. pf := *bh.config.PartitionFilters[j] readerConfig := bh.recordReaderConfigForPartitions(&pf, scanPolicy) - recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger, nil) + recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger) for { if _, err := recordReader.Read(); err != nil { @@ -165,7 +165,7 @@ func (bh *backupRecordsHandler) countRecordsUsingScanByNodes(ctx context.Context var count uint64 readerConfig := bh.recordReaderConfigForNode(nodes, scanPolicy) - recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger, nil) + recordReader := aerospike.NewRecordReader(ctx, bh.aerospikeClient, readerConfig, bh.logger) for { if _, err := recordReader.Read(); err != nil { @@ -226,7 +226,6 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( bh.aerospikeClient, recordReaderConfig, bh.logger, - bh.state.RecordsChan, ) readWorkers[i] = pipeline.NewReadWorker[*models.Token](recordReader) @@ -267,7 +266,6 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForNodes( bh.aerospikeClient, recordReaderConfig, bh.logger, - bh.state.RecordsChan, ) readWorkers[i] = pipeline.NewReadWorker[*models.Token](recordReader) @@ -295,6 +293,7 @@ func (bh *backupRecordsHandler) recordReaderConfigForPartitions( }, bh.scanLimiter, bh.config.NoTTLOnly, + 1000, ) } @@ -315,5 +314,6 @@ func (bh *backupRecordsHandler) recordReaderConfigForNode( }, bh.scanLimiter, bh.config.NoTTLOnly, + 1000, ) } diff --git a/io/aerospike/custom_record_reader.go b/io/aerospike/custom_record_reader.go new file mode 100644 index 00000000..b2abdfb5 --- /dev/null +++ b/io/aerospike/custom_record_reader.go @@ -0,0 +1,149 @@ +package aerospike + +import ( + "fmt" + "io" + + a "github.com/aerospike/aerospike-client-go/v7" + "github.com/aerospike/backup-go/models" +) + +type scanResult struct { + records []*a.Result + Filter models.PartitionFilterSerialized +} + +func newScanResult(bufferSize int64) *scanResult { + return &scanResult{ + records: make([]*a.Result, bufferSize), + } +} + +// CustomRead reads the next record from the Aerospike database. +func (r *RecordReader) CustomRead() (*models.Token, error) { + if !r.isCustomScanStarted() { + scan, err := r.startCustomScan() + if err != nil { + return nil, fmt.Errorf("failed to start scan: %w", err) + } + + r.customScanResults = scan + } + + res, active := <-r.customScanResults.Results() + if !active { + r.logger.Debug("scan finished") + return nil, io.EOF + } + if res.Result == nil { + return nil, io.EOF + } + if res.Result.Err != nil { + r.logger.Error("error reading record", "error", res.Result.Err) + return nil, res.Result.Err + } + + rec := models.Record{ + Record: res.Result.Record, + } + + recToken := models.NewRecordToken(&rec, 0, res.Filter) + + return recToken, nil +} + +// startCustomScan starts the scan for the RecordReader only for state save! +func (r *RecordReader) startCustomScan() (*customRecordSets, error) { + fmt.Println("START CUSTOM SCAN") + + scanPolicy := *r.config.scanPolicy + scanPolicy.FilterExpression = getScanExpression(r.config.timeBounds, r.config.noTTLOnly) + + setsToScan := r.config.setList + if len(setsToScan) == 0 { + setsToScan = []string{""} + } + + if r.config.scanLimiter != nil { + err := r.config.scanLimiter.Acquire(r.ctx, int64(len(setsToScan))) + if err != nil { + return nil, err + } + } + + scans := make([]*scanResult, 0, len(setsToScan)) + + for _, set := range setsToScan { + switch { + case r.config.pageSize > 0: + recSets, err := r.scanPartitions( + &scanPolicy, + r.config.partitionFilter, + set, + ) + if err != nil { + return nil, err + } + + scans = append(scans, recSets...) + default: + return nil, fmt.Errorf("invalid scan parameters") + } + } + + return newCustomRecordSets(scans, r.logger), nil +} + +func (r *RecordReader) scanPartitions(scanPolicy *a.ScanPolicy, + partitionFilter *a.PartitionFilter, + set string, +) ([]*scanResult, error) { + results := make([]*scanResult, 0) + scanPolicy.MaxRecords = r.config.pageSize + pf := *partitionFilter + + for { + curFilter, err := models.NewPartitionFilterSerialized(&pf) + if err != nil { + return nil, fmt.Errorf("failed to serialize partition filter: %w", err) + } + + recSet, aErr := r.client.ScanPartitions( + scanPolicy, + &pf, + r.config.namespace, + set, + r.config.binList..., + ) + if aErr != nil { + return nil, fmt.Errorf("failed to scan sets: %w", aErr) + } + + // result contains []*a.Result and serialized filter models.PartitionFilterSerialized + result := newScanResult(r.config.pageSize) + + var counter int64 + for res := range recSet.Results() { + counter++ + if res.Err != nil { + fmt.Println("ERROR:", res.Err) + continue + } else { + result.records = append(result.records, res) + } + } + + if aErr = recSet.Close(); aErr != nil { + return nil, fmt.Errorf("failed to close record set: %w", aErr) + } + + result.Filter = curFilter + + results = append(results, result) + if counter == 0 { + break + } + } + fmt.Println("end scan") + return results, nil +} diff --git a/io/aerospike/custom_record_set.go b/io/aerospike/custom_record_set.go new file mode 100644 index 00000000..8a284984 --- /dev/null +++ b/io/aerospike/custom_record_set.go @@ -0,0 +1,59 @@ +package aerospike + +import ( + "fmt" + "log/slog" + + a "github.com/aerospike/aerospike-client-go/v7" + "github.com/aerospike/backup-go/models" +) + +type customResult struct { + Result *a.Result + Filter *models.PartitionFilterSerialized +} + +func newCustomResult(result *a.Result, filter *models.PartitionFilterSerialized) *customResult { + return &customResult{ + Result: result, + Filter: filter, + } +} + +// recordSets contains multiple Aerospike Recordset objects. +type customRecordSets struct { + resultsChannel <-chan *customResult + logger *slog.Logger + data []*scanResult +} + +func newCustomRecordSets(data []*scanResult, logger *slog.Logger) *customRecordSets { + out := make(chan *customResult) + go streamData(data, out) + + return &customRecordSets{ + resultsChannel: out, + data: data, + logger: logger, + } +} + +// Results returns the results channel of the recordSets. +func (r *customRecordSets) Results() <-chan *customResult { + return r.resultsChannel +} + +func streamData(data []*scanResult, out chan *customResult) { + if len(data) == 0 { + close(out) + } + + for _, d := range data { + for _, n := range d.records { + fmt.Println(n) + out <- newCustomResult(n, &d.Filter) + } + } + fmt.Println("closing") + close(out) +} diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index c28d0895..509e4e34 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -39,6 +39,7 @@ type RecordReaderConfig struct { setList []string binList []string noTTLOnly bool + pageSize int64 } // NewRecordReaderConfig creates a new RecordReaderConfig. @@ -51,6 +52,7 @@ func NewRecordReaderConfig(namespace string, timeBounds models.TimeBounds, scanLimiter *semaphore.Weighted, noTTLOnly bool, + pageSize int64, ) *RecordReaderConfig { return &RecordReaderConfig{ namespace: namespace, @@ -62,6 +64,7 @@ func NewRecordReaderConfig(namespace string, timeBounds: timeBounds, scanLimiter: scanLimiter, noTTLOnly: noTTLOnly, + pageSize: pageSize, } } @@ -90,12 +93,12 @@ type scanner interface { // It reads records from an Aerospike database and returns them as // *models.Record. type RecordReader struct { - ctx context.Context - client scanner - logger *slog.Logger - config *RecordReaderConfig - scanResult *recordSets // initialized on first Read() call - stateChan chan<- models.PartitionFilterSerialized + ctx context.Context + client scanner + logger *slog.Logger + config *RecordReaderConfig + scanResult *recordSets // initialized on first Read() call + customScanResults *customRecordSets } // NewRecordReader creates a new RecordReader. @@ -104,18 +107,17 @@ func NewRecordReader( client scanner, cfg *RecordReaderConfig, logger *slog.Logger, - stateChan chan<- models.PartitionFilterSerialized, + ) *RecordReader { id := uuid.NewString() logger = logging.WithReader(logger, id, logging.ReaderTypeRecord) logger.Debug("created new aerospike record reader") return &RecordReader{ - ctx: ctx, - config: cfg, - client: client, - logger: logger, - stateChan: stateChan, + ctx: ctx, + config: cfg, + client: client, + logger: logger, } } @@ -130,19 +132,6 @@ func (r *RecordReader) Read() (*models.Token, error) { r.scanResult = scan } - var ( - pfs models.PartitionFilterSerialized - err error - ) - // For indexes and udf, partition filter will be nil. - if r.config.partitionFilter != nil && r.stateChan != nil { - pfs, err = models.NewPartitionFilterSerialized(r.config.partitionFilter) - if err != nil { - return nil, fmt.Errorf("failed to serialize partition filter: %w", err) - } - // r.stateChan <- pfs - } - res, active := <-r.scanResult.Results() if !active { r.logger.Debug("scan finished") @@ -158,7 +147,7 @@ func (r *RecordReader) Read() (*models.Token, error) { Record: res.Record, } - recToken := models.NewRecordToken(&rec, 0, pfs) + recToken := models.NewRecordToken(&rec, 0, nil) return recToken, nil } @@ -264,6 +253,10 @@ func (r *RecordReader) isScanStarted() bool { return r.scanResult != nil } +func (r *RecordReader) isCustomScanStarted() bool { + return r.customScanResults != nil +} + func getScanExpression(bounds models.TimeBounds, noTTLOnly bool) *a.Expression { expressions := make([]*a.Expression, 0) diff --git a/io/aerospike/result_sets.go b/io/aerospike/record_sets.go similarity index 65% rename from io/aerospike/result_sets.go rename to io/aerospike/record_sets.go index ac42d68b..69a5ccc7 100644 --- a/io/aerospike/result_sets.go +++ b/io/aerospike/record_sets.go @@ -16,29 +16,18 @@ package aerospike import ( "log/slog" - "sync" a "github.com/aerospike/aerospike-client-go/v7" "github.com/aerospike/backup-go/internal/util" - "github.com/aerospike/backup-go/models" ) // recordSets contains multiple Aerospike Recordset objects. type recordSets struct { - resultsChannel <-chan *customRecord + resultsChannel <-chan *a.Result logger *slog.Logger data []*a.Recordset } -type customRecord struct { - result *a.Result - filter models.PartitionFilterSerialized -} - -func newCustomRecord(result *a.Result, filter *a.) *customRecord { - -} - func newRecordSets(data []*a.Recordset, logger *slog.Logger) *recordSets { resultChannels := make([]<-chan *a.Result, 0, len(data)) for _, recSet := range data { @@ -63,39 +52,6 @@ func (r *recordSets) Close() { } // Results returns the results channel of the recordSets. -func (r *recordSets) Results() <-chan *customRecord { +func (r *recordSets) Results() <-chan *a.Result { return r.resultsChannel } - -func MergeResultSets(channels []<-chan *a.Result) <-chan *a.PartitionFilter { - out := make(chan *a.PartitionFilter) - - if len(channels) == 0 { - close(out) - return out - } - - var wg sync.WaitGroup - // Run an output goroutine for each input channel. - output := func(c <-chan *a.Result) { - for n := range c { - out <- n - } - - wg.Done() - } - - wg.Add(len(channels)) - - for _, c := range channels { - go output(c) - } - - // Run a goroutine to close out once all the output goroutines are done. - go func() { - wg.Wait() - close(out) - }() - - return out -} diff --git a/io/aerospike/sindex_reader.go b/io/aerospike/sindex_reader.go index f5567b2f..c8abaeb7 100644 --- a/io/aerospike/sindex_reader.go +++ b/io/aerospike/sindex_reader.go @@ -77,6 +77,10 @@ func (r *SindexReader) Read() (*models.Token, error) { return nil, io.EOF } +func (r *SindexReader) CustomRead() (*models.Token, error) { + return r.Read() +} + // Close satisfies the DataReader interface // but is a no-op for the SIndexReader. func (r *SindexReader) Close() {} diff --git a/io/aerospike/udf_reader.go b/io/aerospike/udf_reader.go index 837a4522..8adf6088 100644 --- a/io/aerospike/udf_reader.go +++ b/io/aerospike/udf_reader.go @@ -76,6 +76,10 @@ func (r *UdfReader) Read() (*models.Token, error) { return nil, io.EOF } +func (r *UdfReader) CustomRead() (*models.Token, error) { + return r.Read() +} + // Close satisfies the DataReader interface // but is a no-op for the UDFReader. func (r *UdfReader) Close() {} diff --git a/io/encoding/asb/decode.go b/io/encoding/asb/decode.go index e557bf80..af8a2c5e 100644 --- a/io/encoding/asb/decode.go +++ b/io/encoding/asb/decode.go @@ -161,7 +161,7 @@ func (r *Decoder) NextToken() (*models.Token, error) { case *models.UDF: return models.NewUDFToken(v, size), nil case *models.Record: - return models.NewRecordToken(v, size, models.PartitionFilterSerialized{}), nil + return models.NewRecordToken(v, size, nil), nil default: return nil, fmt.Errorf("unsupported token type %T", v) } diff --git a/models/data_models.go b/models/data_models.go index a2100e8d..f21efd55 100644 --- a/models/data_models.go +++ b/models/data_models.go @@ -36,6 +36,8 @@ type Record struct { // VoidTime is the time in seconds since the citrusleaf epoch when the // record will expire. VoidTime int64 + + // TODO: put filter here } type SIPathBinType byte @@ -102,11 +104,11 @@ type Token struct { Type TokenType Size uint64 // Current filter state. - Filter PartitionFilterSerialized + Filter *PartitionFilterSerialized } // NewRecordToken creates a new token with the given record. -func NewRecordToken(r *Record, size uint64, filter PartitionFilterSerialized) *Token { +func NewRecordToken(r *Record, size uint64, filter *PartitionFilterSerialized) *Token { return &Token{ Record: r, Type: TokenTypeRecord, diff --git a/pipeline/read_worker.go b/pipeline/read_worker.go index dfe50666..007372ac 100644 --- a/pipeline/read_worker.go +++ b/pipeline/read_worker.go @@ -25,6 +25,7 @@ import ( //go:generate mockery --name dataReader type dataReader[T any] interface { Read() (T, error) + CustomRead() (T, error) Close() } @@ -58,7 +59,7 @@ func (w *readWorker[T]) Run(ctx context.Context) error { defer w.reader.Close() for { - data, err := w.reader.Read() + data, err := w.reader.CustomRead() if err != nil { if errors.Is(err, io.EOF) { return nil diff --git a/token_reader.go b/token_reader.go index 39cf7d22..a16bdc97 100644 --- a/token_reader.go +++ b/token_reader.go @@ -79,6 +79,10 @@ func (tr *tokenReader) Read() (*models.Token, error) { } } +func (tr *tokenReader) CustomRead() (*models.Token, error) { + return tr.Read() +} + // Close satisfies the DataReader interface // but is a no-op for the tokenReader. func (tr *tokenReader) Close() { diff --git a/writers.go b/writers.go index d3df952a..e4ed1b3e 100644 --- a/writers.go +++ b/writers.go @@ -118,7 +118,7 @@ func (w *tokenWriter) Write(v *models.Token) (int, error) { } if w.stateChan != nil { - w.stateChan <- v.Filter + w.stateChan <- *v.Filter } return w.output.Write(data) From 8df3cf9ad44ee3340bce6d4fcdceb7437f9126f7 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Mon, 14 Oct 2024 17:34:34 +0300 Subject: [PATCH 09/25] working prorotype --- handler_backup_records.go | 2 +- io/aerospike/custom_record_reader.go | 12 ++++-------- io/aerospike/custom_record_set.go | 6 +++--- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/handler_backup_records.go b/handler_backup_records.go index 5efb3f6a..7b3dfe3a 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -314,6 +314,6 @@ func (bh *backupRecordsHandler) recordReaderConfigForNode( }, bh.scanLimiter, bh.config.NoTTLOnly, - 1000, + 100, ) } diff --git a/io/aerospike/custom_record_reader.go b/io/aerospike/custom_record_reader.go index b2abdfb5..ad1d25b7 100644 --- a/io/aerospike/custom_record_reader.go +++ b/io/aerospike/custom_record_reader.go @@ -15,7 +15,7 @@ type scanResult struct { func newScanResult(bufferSize int64) *scanResult { return &scanResult{ - records: make([]*a.Result, bufferSize), + records: make([]*a.Result, 0, bufferSize), } } @@ -54,8 +54,6 @@ func (r *RecordReader) CustomRead() (*models.Token, error) { // startCustomScan starts the scan for the RecordReader only for state save! func (r *RecordReader) startCustomScan() (*customRecordSets, error) { - fmt.Println("START CUSTOM SCAN") - scanPolicy := *r.config.scanPolicy scanPolicy.FilterExpression = getScanExpression(r.config.timeBounds, r.config.noTTLOnly) @@ -100,17 +98,16 @@ func (r *RecordReader) scanPartitions(scanPolicy *a.ScanPolicy, ) ([]*scanResult, error) { results := make([]*scanResult, 0) scanPolicy.MaxRecords = r.config.pageSize - pf := *partitionFilter for { - curFilter, err := models.NewPartitionFilterSerialized(&pf) + curFilter, err := models.NewPartitionFilterSerialized(partitionFilter) if err != nil { return nil, fmt.Errorf("failed to serialize partition filter: %w", err) } recSet, aErr := r.client.ScanPartitions( scanPolicy, - &pf, + partitionFilter, r.config.namespace, set, r.config.binList..., @@ -126,7 +123,6 @@ func (r *RecordReader) scanPartitions(scanPolicy *a.ScanPolicy, for res := range recSet.Results() { counter++ if res.Err != nil { - fmt.Println("ERROR:", res.Err) continue } else { result.records = append(result.records, res) @@ -144,6 +140,6 @@ func (r *RecordReader) scanPartitions(scanPolicy *a.ScanPolicy, break } } - fmt.Println("end scan") + return results, nil } diff --git a/io/aerospike/custom_record_set.go b/io/aerospike/custom_record_set.go index 8a284984..b85580b0 100644 --- a/io/aerospike/custom_record_set.go +++ b/io/aerospike/custom_record_set.go @@ -1,7 +1,6 @@ package aerospike import ( - "fmt" "log/slog" a "github.com/aerospike/aerospike-client-go/v7" @@ -49,11 +48,12 @@ func streamData(data []*scanResult, out chan *customResult) { } for _, d := range data { + for _, n := range d.records { - fmt.Println(n) + out <- newCustomResult(n, &d.Filter) } } - fmt.Println("closing") + close(out) } From c4750c05fb556b437c4549c614a123ad4f1bee10 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 12:22:25 +0300 Subject: [PATCH 10/25] FMWK-570-backup-restore-state - paginated reading --- io/aerospike/custom_record_reader.go | 145 ------------------- io/aerospike/custom_record_set.go | 59 -------- io/aerospike/record_reader.go | 35 +++-- io/aerospike/record_reader_paginated.go | 178 ++++++++++++++++++++++++ io/aerospike/sindex_reader.go | 4 - io/aerospike/udf_reader.go | 4 - models/data_models.go | 5 +- pipeline/read_worker.go | 3 +- token_reader.go | 4 - 9 files changed, 203 insertions(+), 234 deletions(-) delete mode 100644 io/aerospike/custom_record_reader.go delete mode 100644 io/aerospike/custom_record_set.go create mode 100644 io/aerospike/record_reader_paginated.go diff --git a/io/aerospike/custom_record_reader.go b/io/aerospike/custom_record_reader.go deleted file mode 100644 index ad1d25b7..00000000 --- a/io/aerospike/custom_record_reader.go +++ /dev/null @@ -1,145 +0,0 @@ -package aerospike - -import ( - "fmt" - "io" - - a "github.com/aerospike/aerospike-client-go/v7" - "github.com/aerospike/backup-go/models" -) - -type scanResult struct { - records []*a.Result - Filter models.PartitionFilterSerialized -} - -func newScanResult(bufferSize int64) *scanResult { - return &scanResult{ - records: make([]*a.Result, 0, bufferSize), - } -} - -// CustomRead reads the next record from the Aerospike database. -func (r *RecordReader) CustomRead() (*models.Token, error) { - if !r.isCustomScanStarted() { - scan, err := r.startCustomScan() - if err != nil { - return nil, fmt.Errorf("failed to start scan: %w", err) - } - - r.customScanResults = scan - } - - res, active := <-r.customScanResults.Results() - if !active { - r.logger.Debug("scan finished") - return nil, io.EOF - } - if res.Result == nil { - return nil, io.EOF - } - if res.Result.Err != nil { - r.logger.Error("error reading record", "error", res.Result.Err) - return nil, res.Result.Err - } - - rec := models.Record{ - Record: res.Result.Record, - } - - recToken := models.NewRecordToken(&rec, 0, res.Filter) - - return recToken, nil -} - -// startCustomScan starts the scan for the RecordReader only for state save! -func (r *RecordReader) startCustomScan() (*customRecordSets, error) { - scanPolicy := *r.config.scanPolicy - scanPolicy.FilterExpression = getScanExpression(r.config.timeBounds, r.config.noTTLOnly) - - setsToScan := r.config.setList - if len(setsToScan) == 0 { - setsToScan = []string{""} - } - - if r.config.scanLimiter != nil { - err := r.config.scanLimiter.Acquire(r.ctx, int64(len(setsToScan))) - if err != nil { - return nil, err - } - } - - scans := make([]*scanResult, 0, len(setsToScan)) - - for _, set := range setsToScan { - switch { - case r.config.pageSize > 0: - recSets, err := r.scanPartitions( - &scanPolicy, - r.config.partitionFilter, - set, - ) - if err != nil { - return nil, err - } - - scans = append(scans, recSets...) - default: - return nil, fmt.Errorf("invalid scan parameters") - } - } - - return newCustomRecordSets(scans, r.logger), nil -} - -func (r *RecordReader) scanPartitions(scanPolicy *a.ScanPolicy, - partitionFilter *a.PartitionFilter, - set string, -) ([]*scanResult, error) { - results := make([]*scanResult, 0) - scanPolicy.MaxRecords = r.config.pageSize - - for { - curFilter, err := models.NewPartitionFilterSerialized(partitionFilter) - if err != nil { - return nil, fmt.Errorf("failed to serialize partition filter: %w", err) - } - - recSet, aErr := r.client.ScanPartitions( - scanPolicy, - partitionFilter, - r.config.namespace, - set, - r.config.binList..., - ) - if aErr != nil { - return nil, fmt.Errorf("failed to scan sets: %w", aErr) - } - - // result contains []*a.Result and serialized filter models.PartitionFilterSerialized - result := newScanResult(r.config.pageSize) - - var counter int64 - for res := range recSet.Results() { - counter++ - if res.Err != nil { - continue - } else { - result.records = append(result.records, res) - } - } - - if aErr = recSet.Close(); aErr != nil { - return nil, fmt.Errorf("failed to close record set: %w", aErr) - } - - result.Filter = curFilter - - results = append(results, result) - if counter == 0 { - break - } - } - - return results, nil -} diff --git a/io/aerospike/custom_record_set.go b/io/aerospike/custom_record_set.go deleted file mode 100644 index b85580b0..00000000 --- a/io/aerospike/custom_record_set.go +++ /dev/null @@ -1,59 +0,0 @@ -package aerospike - -import ( - "log/slog" - - a "github.com/aerospike/aerospike-client-go/v7" - "github.com/aerospike/backup-go/models" -) - -type customResult struct { - Result *a.Result - Filter *models.PartitionFilterSerialized -} - -func newCustomResult(result *a.Result, filter *models.PartitionFilterSerialized) *customResult { - return &customResult{ - Result: result, - Filter: filter, - } -} - -// recordSets contains multiple Aerospike Recordset objects. -type customRecordSets struct { - resultsChannel <-chan *customResult - logger *slog.Logger - data []*scanResult -} - -func newCustomRecordSets(data []*scanResult, logger *slog.Logger) *customRecordSets { - out := make(chan *customResult) - go streamData(data, out) - - return &customRecordSets{ - resultsChannel: out, - data: data, - logger: logger, - } -} - -// Results returns the results channel of the recordSets. -func (r *customRecordSets) Results() <-chan *customResult { - return r.resultsChannel -} - -func streamData(data []*scanResult, out chan *customResult) { - if len(data) == 0 { - close(out) - } - - for _, d := range data { - - for _, n := range d.records { - - out <- newCustomResult(n, &d.Filter) - } - } - - close(out) -} diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index 509e4e34..20c2a9ee 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -39,7 +39,10 @@ type RecordReaderConfig struct { setList []string binList []string noTTLOnly bool - pageSize int64 + + // pageSize used for paginated scan for saving reading state. + // If pageSize = 0, we think that we use normal scan. + pageSize int64 } // NewRecordReaderConfig creates a new RecordReaderConfig. @@ -93,12 +96,13 @@ type scanner interface { // It reads records from an Aerospike database and returns them as // *models.Record. type RecordReader struct { - ctx context.Context - client scanner - logger *slog.Logger - config *RecordReaderConfig - scanResult *recordSets // initialized on first Read() call - customScanResults *customRecordSets + ctx context.Context + client scanner + logger *slog.Logger + config *RecordReaderConfig + scanResult *recordSets // initialized on first Read() call + // pageRecordsChan chan is initialized only if pageSize > 0. + pageRecordsChan chan *pageRecord } // NewRecordReader creates a new RecordReader. @@ -107,22 +111,31 @@ func NewRecordReader( client scanner, cfg *RecordReaderConfig, logger *slog.Logger, - ) *RecordReader { id := uuid.NewString() logger = logging.WithReader(logger, id, logging.ReaderTypeRecord) logger.Debug("created new aerospike record reader") - return &RecordReader{ + r := &RecordReader{ ctx: ctx, config: cfg, client: client, logger: logger, } + + return r } // Read reads the next record from the Aerospike database. func (r *RecordReader) Read() (*models.Token, error) { + // If pageSize is set, we use paginated read. + if r.config.pageSize > 0 { + return r.readPage() + } + return r.Read() +} + +func (r *RecordReader) read() (*models.Token, error) { if !r.isScanStarted() { scan, err := r.startScan() if err != nil { @@ -253,10 +266,6 @@ func (r *RecordReader) isScanStarted() bool { return r.scanResult != nil } -func (r *RecordReader) isCustomScanStarted() bool { - return r.customScanResults != nil -} - func getScanExpression(bounds models.TimeBounds, noTTLOnly bool) *a.Expression { expressions := make([]*a.Expression, 0) diff --git a/io/aerospike/record_reader_paginated.go b/io/aerospike/record_reader_paginated.go new file mode 100644 index 00000000..e7ecef8c --- /dev/null +++ b/io/aerospike/record_reader_paginated.go @@ -0,0 +1,178 @@ +package aerospike + +import ( + "fmt" + "io" + + a "github.com/aerospike/aerospike-client-go/v7" + "github.com/aerospike/backup-go/models" +) + +// pageRecord contains records and serialized filter. +type pageRecord struct { + result *a.Result + filter *models.PartitionFilterSerialized +} + +func newPageRecord(result *a.Result, filter *models.PartitionFilterSerialized) *pageRecord { + return &pageRecord{ + result: result, + filter: filter, + } +} + +// readPage reads the next record from pageRecord from the Aerospike database. +func (r *RecordReader) readPage() (*models.Token, error) { + errChan := make(chan error) + if r.pageRecordsChan == nil { + r.pageRecordsChan = make(chan *pageRecord) + go r.startScanPaginated(errChan) + } + + select { + case err := <-errChan: + if err != nil { + return nil, err + } + case res, active := <-r.pageRecordsChan: + if !active { + r.logger.Debug("scan finished") + return nil, io.EOF + } + + if res.result == nil { + return nil, io.EOF + } + + if res.result.Err != nil { + r.logger.Error("error reading record", "error", res.result.Err) + return nil, res.result.Err + } + + rec := models.Record{ + Record: res.result.Record, + } + + recToken := models.NewRecordToken(&rec, 0, res.filter) + + return recToken, nil + } + + return nil, nil +} + +// startScanPaginated starts the scan for the RecordReader only for state save! +func (r *RecordReader) startScanPaginated(localErrChan chan error) { + scanPolicy := *r.config.scanPolicy + scanPolicy.FilterExpression = getScanExpression(r.config.timeBounds, r.config.noTTLOnly) + + setsToScan := r.config.setList + if len(setsToScan) == 0 { + setsToScan = []string{""} + } + + if r.config.scanLimiter != nil { + err := r.config.scanLimiter.Acquire(r.ctx, int64(len(setsToScan))) + if err != nil { + localErrChan <- err + return + } + } + + for _, set := range setsToScan { + resultChan, errChan := r.streamPartitionPages( + &scanPolicy, + set, + ) + + for { + select { + case err, ok := <-errChan: + if !ok { + break + } + + if err != nil { + localErrChan <- err + return + } + case result, ok := <-resultChan: + if !ok { + // After we finish all the readings, we close pageRecord chan. + close(r.pageRecordsChan) + close(localErrChan) + return + } + + for i := range result { + r.pageRecordsChan <- result[i] + } + + } + } + } +} + +// streamPartitionPages reads the whole pageRecord and send it to resultChan. +func (r *RecordReader) streamPartitionPages( + scanPolicy *a.ScanPolicy, + set string, +) (resultChan chan []*pageRecord, errChan chan error) { + scanPolicy.MaxRecords = r.config.pageSize + // resultChan must not be buffered, we send the whole pageRecord to resultChan. + // So if we make it buffered, we will consume a lot of RAM. + resultChan = make(chan []*pageRecord) + errChan = make(chan error) + + go func() { + // For one iteration, we scan 1 pageRecord. + for { + curFilter, err := models.NewPartitionFilterSerialized(r.config.partitionFilter) + if err != nil { + errChan <- fmt.Errorf("failed to serialize partition filter: %w", err) + } + + recSet, aErr := r.client.ScanPartitions( + scanPolicy, + r.config.partitionFilter, + r.config.namespace, + set, + r.config.binList..., + ) + if aErr != nil { + errChan <- fmt.Errorf("failed to scan sets: %w", aErr.Unwrap()) + } + + // result contains []*a.Result and serialized filter models.PartitionFilterSerialized + result := make([]*pageRecord, 0, r.config.pageSize) + + // to count records on pageRecord. + var counter int64 + for res := range recSet.Results() { + counter++ + + if res.Err != nil { + continue + } else { + // Save to pageRecord filter that returns current pageRecord. + result = append(result, newPageRecord(res, &curFilter)) + } + } + + if aErr = recSet.Close(); aErr != nil { + errChan <- fmt.Errorf("failed to close record set: %w", aErr.Unwrap()) + } + + resultChan <- result + // If there were no records on the pageRecord, we think that it was last pageRecord and exit. + if counter == 0 { + close(resultChan) + close(errChan) + + return + } + } + }() + + return resultChan, errChan +} diff --git a/io/aerospike/sindex_reader.go b/io/aerospike/sindex_reader.go index c8abaeb7..f5567b2f 100644 --- a/io/aerospike/sindex_reader.go +++ b/io/aerospike/sindex_reader.go @@ -77,10 +77,6 @@ func (r *SindexReader) Read() (*models.Token, error) { return nil, io.EOF } -func (r *SindexReader) CustomRead() (*models.Token, error) { - return r.Read() -} - // Close satisfies the DataReader interface // but is a no-op for the SIndexReader. func (r *SindexReader) Close() {} diff --git a/io/aerospike/udf_reader.go b/io/aerospike/udf_reader.go index 8adf6088..837a4522 100644 --- a/io/aerospike/udf_reader.go +++ b/io/aerospike/udf_reader.go @@ -76,10 +76,6 @@ func (r *UdfReader) Read() (*models.Token, error) { return nil, io.EOF } -func (r *UdfReader) CustomRead() (*models.Token, error) { - return r.Read() -} - // Close satisfies the DataReader interface // but is a no-op for the UDFReader. func (r *UdfReader) Close() {} diff --git a/models/data_models.go b/models/data_models.go index f21efd55..9863d664 100644 --- a/models/data_models.go +++ b/models/data_models.go @@ -36,8 +36,6 @@ type Record struct { // VoidTime is the time in seconds since the citrusleaf epoch when the // record will expire. VoidTime int64 - - // TODO: put filter here } type SIPathBinType byte @@ -103,7 +101,8 @@ type Token struct { Record *Record Type TokenType Size uint64 - // Current filter state. + // Filter represents serialized partition filter for page, that record belongs to. + // Is used only on pagination read, to save reading states. Filter *PartitionFilterSerialized } diff --git a/pipeline/read_worker.go b/pipeline/read_worker.go index 007372ac..dfe50666 100644 --- a/pipeline/read_worker.go +++ b/pipeline/read_worker.go @@ -25,7 +25,6 @@ import ( //go:generate mockery --name dataReader type dataReader[T any] interface { Read() (T, error) - CustomRead() (T, error) Close() } @@ -59,7 +58,7 @@ func (w *readWorker[T]) Run(ctx context.Context) error { defer w.reader.Close() for { - data, err := w.reader.CustomRead() + data, err := w.reader.Read() if err != nil { if errors.Is(err, io.EOF) { return nil diff --git a/token_reader.go b/token_reader.go index a16bdc97..39cf7d22 100644 --- a/token_reader.go +++ b/token_reader.go @@ -79,10 +79,6 @@ func (tr *tokenReader) Read() (*models.Token, error) { } } -func (tr *tokenReader) CustomRead() (*models.Token, error) { - return tr.Read() -} - // Close satisfies the DataReader interface // but is a no-op for the tokenReader. func (tr *tokenReader) Close() { From d6cc1e6ad55bb32cf1dc47bf29fba712b86a597c Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 14:33:02 +0300 Subject: [PATCH 11/25] FMWK-570-backup-restore-state - library refactroring --- config_backup.go | 21 +++++++++++++++++++++ handler_backup.go | 14 ++++++++++---- handler_backup_records.go | 11 ++++++++--- handler_restore.go | 8 +++++++- io/aerospike/record_reader.go | 3 ++- io/aerospike/record_reader_paginated.go | 8 ++++---- pipeline/pipeline.go | 23 +++++++++++++++++++---- pipeline/pipline_test.go | 18 +++++++++++------- state.go | 2 +- 9 files changed, 83 insertions(+), 25 deletions(-) diff --git a/config_backup.go b/config_backup.go index 2678770d..5094b8a5 100644 --- a/config_backup.go +++ b/config_backup.go @@ -116,6 +116,14 @@ type BackupConfig struct { // that was generated from the interrupted/failed run. // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. Continue bool + // PageSize how many records will be read on one iteration for continuation backup. + // Affects size if overlap on resuming backup after an error. + // By default, it must be zero. If any value is set, reading from Aerospike will be paginated. + // Which affects the performance and RAM usage. + PageSize int64 + // SyncPipelines if set to true, the same number of workers will be created for each stage of the pipeline. + // Each worker will be connected to the next stage worker with a separate channel. + SyncPipelines bool } // NewDefaultBackupConfig returns a new BackupConfig with default values. @@ -157,6 +165,7 @@ func (c *BackupConfig) isFullBackup() bool { return c.ModAfter == nil && c.isDefaultPartitionFilter() } +//nolint:gocyclo // validate func is long func with a lot of checks. func (c *BackupConfig) validate() error { if c.ParallelRead < MinParallel || c.ParallelRead > MaxParallel { return fmt.Errorf("parallel read must be between 1 and 1024, got %d", c.ParallelRead) @@ -192,6 +201,18 @@ func (c *BackupConfig) validate() error { return fmt.Errorf("filelimit value must not be negative, got %d", c.FileLimit) } + if c.StateFile != "" && c.PageSize == 0 { + return fmt.Errorf("page size must be set if saving state to state file is enabled") + } + + if c.Continue && c.StateFile == "" { + return fmt.Errorf("state file must be set if continue is enabled") + } + + if c.StateFile != "" && !c.SyncPipelines { + return fmt.Errorf("sync pipelines must be enabled if stage file is set") + } + if err := c.CompressionPolicy.validate(); err != nil { return fmt.Errorf("compression policy invalid: %w", err) } diff --git a/handler_backup.go b/handler_backup.go index 06a547c4..d1841b84 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -454,11 +454,14 @@ func (bh *BackupHandler) backupSIndexes( sindexWriter = newWriterWithTokenStats(sindexWriter, &bh.stats, bh.logger) sindexWriteWorker := pipeline.NewWriteWorker(sindexWriter, bh.limiter) - sindexPipeline := pipeline.NewPipeline[*models.Token]( - true, + sindexPipeline, err := pipeline.NewPipeline[*models.Token]( + bh.config.SyncPipelines, []pipeline.Worker[*models.Token]{sindexReadWorker}, []pipeline.Worker[*models.Token]{sindexWriteWorker}, ) + if err != nil { + return err + } return sindexPipeline.Run(ctx) } @@ -479,11 +482,14 @@ func (bh *BackupHandler) backupUDFs( udfWriter = newWriterWithTokenStats(udfWriter, &bh.stats, bh.logger) udfWriteWorker := pipeline.NewWriteWorker(udfWriter, bh.limiter) - udfPipeline := pipeline.NewPipeline[*models.Token]( - true, + udfPipeline, err := pipeline.NewPipeline[*models.Token]( + bh.config.SyncPipelines, []pipeline.Worker[*models.Token]{udfReadWorker}, []pipeline.Worker[*models.Token]{udfWriteWorker}, ) + if err != nil { + return err + } return udfPipeline.Run(ctx) } diff --git a/handler_backup_records.go b/handler_backup_records.go index 7b3dfe3a..42477069 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -77,7 +77,12 @@ func (bh *backupRecordsHandler) run( ctx, bh.config.RecordsPerSecond), ), bh.config.ParallelRead) - return pipeline.NewPipeline(true, readWorkers, composeProcessor, writers).Run(ctx) + pl, err := pipeline.NewPipeline(bh.config.SyncPipelines, readWorkers, composeProcessor, writers) + if err != nil { + return err + } + + return pl.Run(ctx) } func (bh *backupRecordsHandler) countRecords(ctx context.Context, infoClient *asinfo.InfoClient) (uint64, error) { @@ -293,7 +298,7 @@ func (bh *backupRecordsHandler) recordReaderConfigForPartitions( }, bh.scanLimiter, bh.config.NoTTLOnly, - 1000, + bh.config.PageSize, ) } @@ -314,6 +319,6 @@ func (bh *backupRecordsHandler) recordReaderConfigForNode( }, bh.scanLimiter, bh.config.NoTTLOnly, - 100, + bh.config.PageSize, ) } diff --git a/handler_restore.go b/handler_restore.go index 601cdc1a..b5ca0375 100644 --- a/handler_restore.go +++ b/handler_restore.go @@ -231,7 +231,12 @@ func (rh *RestoreHandler) runRestorePipeline(ctx context.Context, readers []pipe processors.NewTPSLimiter[*models.Token](ctx, rh.config.RecordsPerSecond), ), rh.config.Parallel) - return pipeline.NewPipeline(true, readers, composeProcessor, writeWorkers).Run(ctx) + pl, err := pipeline.NewPipeline(false, readers, composeProcessor, writeWorkers) + if err != nil { + return err + } + + return pl.Run(ctx) } func (rh *RestoreHandler) useBatchWrites() (bool, error) { @@ -250,6 +255,7 @@ func newTokenWorker(processor processors.TokenProcessor, parallel int) []pipelin for i := 0; i < parallel; i++ { workers = append(workers, pipeline.NewProcessorWorker(processor)) } + return workers } diff --git a/io/aerospike/record_reader.go b/io/aerospike/record_reader.go index 20c2a9ee..72c41d74 100644 --- a/io/aerospike/record_reader.go +++ b/io/aerospike/record_reader.go @@ -132,7 +132,8 @@ func (r *RecordReader) Read() (*models.Token, error) { if r.config.pageSize > 0 { return r.readPage() } - return r.Read() + + return r.read() } func (r *RecordReader) read() (*models.Token, error) { diff --git a/io/aerospike/record_reader_paginated.go b/io/aerospike/record_reader_paginated.go index e7ecef8c..3e365c8a 100644 --- a/io/aerospike/record_reader_paginated.go +++ b/io/aerospike/record_reader_paginated.go @@ -24,6 +24,7 @@ func newPageRecord(result *a.Result, filter *models.PartitionFilterSerialized) * // readPage reads the next record from pageRecord from the Aerospike database. func (r *RecordReader) readPage() (*models.Token, error) { errChan := make(chan error) + if r.pageRecordsChan == nil { r.pageRecordsChan = make(chan *pageRecord) go r.startScanPaginated(errChan) @@ -101,13 +102,13 @@ func (r *RecordReader) startScanPaginated(localErrChan chan error) { // After we finish all the readings, we close pageRecord chan. close(r.pageRecordsChan) close(localErrChan) + return } for i := range result { r.pageRecordsChan <- result[i] } - } } } @@ -153,10 +154,9 @@ func (r *RecordReader) streamPartitionPages( if res.Err != nil { continue - } else { - // Save to pageRecord filter that returns current pageRecord. - result = append(result, newPageRecord(res, &curFilter)) } + // Save to pageRecord filter that returns current pageRecord. + result = append(result, newPageRecord(res, &curFilter)) } if aErr = recSet.Close(); aErr != nil { diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index 2214ce57..c516c2f2 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -16,6 +16,7 @@ package pipeline import ( "context" + "fmt" "sync" ) @@ -52,9 +53,23 @@ var _ Worker[any] = (*Pipeline[any])(nil) // const channelSize = 1 // NewPipeline creates a new DataPipeline. -func NewPipeline[T any](isSynced bool, workGroups ...[]Worker[T]) *Pipeline[T] { +func NewPipeline[T any](isSynced bool, workGroups ...[]Worker[T]) (*Pipeline[T], error) { + if len(workGroups) == 0 { + return nil, fmt.Errorf("workGroups is empty") + } + stages := make([]*stage[T], len(workGroups)) + // Check that all working groups have same number of workers. + if isSynced { + firstLen := len(workGroups[0]) + for i := range workGroups { + if len(workGroups[i]) != firstLen { + return nil, fmt.Errorf("all workers groups must be same length in sync mode") + } + } + } + for i, workers := range workGroups { stages[i] = newStage(workers...) } @@ -62,7 +77,7 @@ func NewPipeline[T any](isSynced bool, workGroups ...[]Worker[T]) *Pipeline[T] { return &Pipeline[T]{ stages: stages, isSynced: isSynced, - } + }, nil } // SetReceiveChan sets the receive channel for the pipeline. @@ -95,7 +110,8 @@ func (dp *Pipeline[T]) Run(ctx context.Context) error { errors := make(chan error, len(dp.stages)) var ( - lastSend []<-chan T + lastSend []<-chan T + // To initialize pipeline workers correctly, we need to create empty channels for first and last stages. emptySendChans []chan<- T emptyReceiveChans []<-chan T ) @@ -220,7 +236,6 @@ func (s *stage[T]) Run(ctx context.Context) error { if s.send[i] != nil { close(s.send[i]) } - } close(errors) diff --git a/pipeline/pipline_test.go b/pipeline/pipline_test.go index 857cb685..12838ea7 100644 --- a/pipeline/pipline_test.go +++ b/pipeline/pipline_test.go @@ -35,7 +35,8 @@ func (suite *pipelineTestSuite) TestNewDataPipeline() { workers := [][]Worker[string]{{w1, w2}, {w3}} - pipeline := NewPipeline(workers...) + pipeline, err := NewPipeline(false, workers...) + suite.Require().Nil(err) suite.NotNil(pipeline) } @@ -57,11 +58,12 @@ func (suite *pipelineTestSuite) TestDataPipelineRun() { workers := [][]Worker[string]{{w1, w2}, {w3}} - pipeline := NewPipeline(workers...) + pipeline, err := NewPipeline(false, workers...) + suite.Require().Nil(err) suite.NotNil(pipeline) ctx := context.Background() - err := pipeline.Run(ctx) + err = pipeline.Run(ctx) suite.Nil(err) } @@ -126,7 +128,8 @@ func (suite *pipelineTestSuite) TestDataPipelineRunWithChannels() { w4.EXPECT().Run(ctx) workers := [][]Worker[string]{{w1, w2}, {w3}, {w4}} - pipeline := NewPipeline(workers...) + pipeline, err := NewPipeline(false, workers...) + suite.Require().Nil(err) suite.NotNil(pipeline) receive := make(chan string, 2) @@ -139,7 +142,7 @@ func (suite *pipelineTestSuite) TestDataPipelineRunWithChannels() { receive <- "1" close(receive) - err := pipeline.Run(ctx) + err = pipeline.Run(ctx) suite.Nil(err) suite.Equal(2, len(send)) @@ -174,11 +177,12 @@ func (suite *pipelineTestSuite) TestDataPipelineRunWorkerFails() { workers := [][]Worker[string]{{w1, w2}, {w3}, {w4}} - pipeline := NewPipeline(workers...) + pipeline, err := NewPipeline(false, workers...) + suite.Require().Nil(err) suite.NotNil(pipeline) ctx := context.Background() - err := pipeline.Run(ctx) + err = pipeline.Run(ctx) suite.NotNil(err) } diff --git a/state.go b/state.go index 99388f59..ff185dfe 100644 --- a/state.go +++ b/state.go @@ -33,7 +33,7 @@ type State struct { ctx context.Context // Counter to count how many times State instance was initialized. - // Is used to create prefix for backup files. + // Is used to create suffix for backup files. Counter int // RecordsChan communication channel to save current filter state. RecordsChan chan models.PartitionFilterSerialized From 3981aa86de326b369b68f53766f60f60625e7488 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 15:18:38 +0300 Subject: [PATCH 12/25] FMWK-570-backup-restore-state - fix pipelines --- io/aerospike/record_reader_paginated.go | 14 ++++++++++++ pipeline/pipeline.go | 30 +++++++++++++++++++------ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/io/aerospike/record_reader_paginated.go b/io/aerospike/record_reader_paginated.go index 3e365c8a..a4d60963 100644 --- a/io/aerospike/record_reader_paginated.go +++ b/io/aerospike/record_reader_paginated.go @@ -1,3 +1,17 @@ +// Copyright 2024 Aerospike, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package aerospike import ( diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index c516c2f2..b51b72cf 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -50,7 +50,7 @@ type Pipeline[T any] struct { var _ Worker[any] = (*Pipeline[any])(nil) -// const channelSize = 1 +const channelSize = 256 // NewPipeline creates a new DataPipeline. func NewPipeline[T any](isSynced bool, workGroups ...[]Worker[T]) (*Pipeline[T], error) { @@ -71,7 +71,7 @@ func NewPipeline[T any](isSynced bool, workGroups ...[]Worker[T]) (*Pipeline[T], } for i, workers := range workGroups { - stages[i] = newStage(workers...) + stages[i] = newStage(isSynced, workers...) } return &Pipeline[T]{ @@ -125,6 +125,7 @@ func (dp *Pipeline[T]) Run(ctx context.Context) error { if dp.isSynced { for i := 0; i < len(s.workers); i++ { + // For synced mode, we don't add buffer to channels, not to lose any data. send := make(chan T) sendChans = append(sendChans, send) receiveChans = append(receiveChans, send) @@ -134,9 +135,13 @@ func (dp *Pipeline[T]) Run(ctx context.Context) error { emptyReceiveChans = append(emptyReceiveChans, empty) } } else { - send := make(chan T) + send := make(chan T, channelSize) sendChans = append(sendChans, send) receiveChans = append(receiveChans, send) + + empty := make(chan T) + emptySendChans = append(emptySendChans, empty) + emptyReceiveChans = append(emptyReceiveChans, empty) } s.SetSendChan(sendChans) @@ -181,6 +186,8 @@ type stage[T any] struct { receive []<-chan T send []chan<- T workers []Worker[T] + // if synced, we distribute communication channels through workers. + isSynced bool } func (s *stage[T]) SetReceiveChan(c []<-chan T) { @@ -191,9 +198,10 @@ func (s *stage[T]) SetSendChan(c []chan<- T) { s.send = c } -func newStage[T any](workers ...Worker[T]) *stage[T] { +func newStage[T any](isSynced bool, workers ...Worker[T]) *stage[T] { s := stage[T]{ - workers: workers, + workers: workers, + isSynced: isSynced, } return &s @@ -205,8 +213,16 @@ func (s *stage[T]) Run(ctx context.Context) error { } for i, w := range s.workers { - w.SetReceiveChan(s.receive[i]) - w.SetSendChan(s.send[i]) + if s.isSynced { + // If it is not sync mode, there will be 1 channel in each slice. + w.SetReceiveChan(s.receive[i]) + w.SetSendChan(s.send[i]) + + continue + } + // Else we distribute all channels to workers. + w.SetReceiveChan(s.receive[0]) + w.SetSendChan(s.send[0]) } ctx, cancel := context.WithCancel(ctx) From 547b0451b9c1458cd3b54c2285cd821e48f5140c Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 16:06:34 +0300 Subject: [PATCH 13/25] FMWK-570-backup-restore-state - tests fix --- config_backup.go | 2 +- pipeline/pipeline.go | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/config_backup.go b/config_backup.go index 5094b8a5..23c9bf6c 100644 --- a/config_backup.go +++ b/config_backup.go @@ -122,7 +122,7 @@ type BackupConfig struct { // Which affects the performance and RAM usage. PageSize int64 // SyncPipelines if set to true, the same number of workers will be created for each stage of the pipeline. - // Each worker will be connected to the next stage worker with a separate channel. + // Each worker will be connected to the next stage worker with a separate unbuffered channel. SyncPipelines bool } diff --git a/pipeline/pipeline.go b/pipeline/pipeline.go index b51b72cf..0516ca55 100644 --- a/pipeline/pipeline.go +++ b/pipeline/pipeline.go @@ -139,9 +139,8 @@ func (dp *Pipeline[T]) Run(ctx context.Context) error { sendChans = append(sendChans, send) receiveChans = append(receiveChans, send) - empty := make(chan T) - emptySendChans = append(emptySendChans, empty) - emptyReceiveChans = append(emptyReceiveChans, empty) + emptySendChans = append(emptySendChans, dp.send) + emptyReceiveChans = append(emptyReceiveChans, dp.receive) } s.SetSendChan(sendChans) From 9adb40c1a73bcc91914e3d146bf857fda86e3a10 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 16:15:17 +0300 Subject: [PATCH 14/25] FMWK-570-backup-restore-state - added output files prefix paramter --- config_backup.go | 6 ++++-- handler_backup.go | 2 +- io/encoding/asb/encode.go | 4 ++-- io_encoding.go | 2 +- mocks/Encoder_mock.go | 21 +++++++++++---------- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/config_backup.go b/config_backup.go index 23c9bf6c..f0be9e50 100644 --- a/config_backup.go +++ b/config_backup.go @@ -116,14 +116,16 @@ type BackupConfig struct { // that was generated from the interrupted/failed run. // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. Continue bool - // PageSize how many records will be read on one iteration for continuation backup. + // How many records will be read on one iteration for continuation backup. // Affects size if overlap on resuming backup after an error. // By default, it must be zero. If any value is set, reading from Aerospike will be paginated. // Which affects the performance and RAM usage. PageSize int64 - // SyncPipelines if set to true, the same number of workers will be created for each stage of the pipeline. + // If set to true, the same number of workers will be created for each stage of the pipeline. // Each worker will be connected to the next stage worker with a separate unbuffered channel. SyncPipelines bool + // When using directory parameter, prepend a prefix to the names of the generated files. + OutputFilePrefix string } // NewDefaultBackupConfig returns a new BackupConfig with default values. diff --git a/handler_backup.go b/handler_backup.go index d1841b84..4d1f074a 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -322,7 +322,7 @@ func (bh *BackupHandler) newConfiguredWriter(ctx context.Context) (io.WriteClose suffix = bh.state.getFileSuffix() } - filename := bh.encoder.GenerateFilename(suffix) + filename := bh.encoder.GenerateFilename(bh.config.OutputFilePrefix, suffix) storageWriter, err := bh.writer.NewWriter(ctx, filename) if err != nil { diff --git a/io/encoding/asb/encode.go b/io/encoding/asb/encode.go index 001def2c..f4dbcab0 100644 --- a/io/encoding/asb/encode.go +++ b/io/encoding/asb/encode.go @@ -48,8 +48,8 @@ func NewEncoder(namespace string, compact bool) *Encoder { } // GenerateFilename generates a file name for the given namespace. -func (e *Encoder) GenerateFilename(suffix string) string { - return fmt.Sprintf("%s_%d%s.asb", e.namespace, e.id.Add(1), suffix) +func (e *Encoder) GenerateFilename(prefix, suffix string) string { + return fmt.Sprintf("%s%s_%d%s.asb", prefix, e.namespace, e.id.Add(1), suffix) } // EncodeToken encodes a token to the ASB format. diff --git a/io_encoding.go b/io_encoding.go index d9920d68..fff19872 100644 --- a/io_encoding.go +++ b/io_encoding.go @@ -36,7 +36,7 @@ const ( type Encoder interface { EncodeToken(*models.Token) ([]byte, error) GetHeader() []byte - GenerateFilename(prefix string) string + GenerateFilename(prefix, suffix string) string } // NewEncoder returns a new Encoder according to `EncoderType`. diff --git a/mocks/Encoder_mock.go b/mocks/Encoder_mock.go index 98b6d1c6..6c4fa22d 100644 --- a/mocks/Encoder_mock.go +++ b/mocks/Encoder_mock.go @@ -78,17 +78,17 @@ func (_c *MockEncoder_EncodeToken_Call) RunAndReturn(run func(*models.Token) ([] return _c } -// GenerateFilename provides a mock function with given fields: prefix -func (_m *MockEncoder) GenerateFilename(prefix string) string { - ret := _m.Called(prefix) +// GenerateFilename provides a mock function with given fields: prefix, suffix +func (_m *MockEncoder) GenerateFilename(prefix string, suffix string) string { + ret := _m.Called(prefix, suffix) if len(ret) == 0 { panic("no return value specified for GenerateFilename") } var r0 string - if rf, ok := ret.Get(0).(func(string) string); ok { - r0 = rf(prefix) + if rf, ok := ret.Get(0).(func(string, string) string); ok { + r0 = rf(prefix, suffix) } else { r0 = ret.Get(0).(string) } @@ -103,13 +103,14 @@ type MockEncoder_GenerateFilename_Call struct { // GenerateFilename is a helper method to define mock.On call // - prefix string -func (_e *MockEncoder_Expecter) GenerateFilename(prefix interface{}) *MockEncoder_GenerateFilename_Call { - return &MockEncoder_GenerateFilename_Call{Call: _e.mock.On("GenerateFilename", prefix)} +// - suffix string +func (_e *MockEncoder_Expecter) GenerateFilename(prefix interface{}, suffix interface{}) *MockEncoder_GenerateFilename_Call { + return &MockEncoder_GenerateFilename_Call{Call: _e.mock.On("GenerateFilename", prefix, suffix)} } -func (_c *MockEncoder_GenerateFilename_Call) Run(run func(prefix string)) *MockEncoder_GenerateFilename_Call { +func (_c *MockEncoder_GenerateFilename_Call) Run(run func(prefix string, suffix string)) *MockEncoder_GenerateFilename_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(string)) + run(args[0].(string), args[1].(string)) }) return _c } @@ -119,7 +120,7 @@ func (_c *MockEncoder_GenerateFilename_Call) Return(_a0 string) *MockEncoder_Gen return _c } -func (_c *MockEncoder_GenerateFilename_Call) RunAndReturn(run func(string) string) *MockEncoder_GenerateFilename_Call { +func (_c *MockEncoder_GenerateFilename_Call) RunAndReturn(run func(string, string) string) *MockEncoder_GenerateFilename_Call { _c.Call.Return(run) return _c } From 423a71c936d893a143ff13ebbfd087cb78c6c9a8 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 17:00:56 +0300 Subject: [PATCH 15/25] FMWK-570-backup-restore-state - added tests --- handler_backup.go | 20 ++++- state.go | 22 ++--- state_test.go | 90 +++++++++++++++++++ tests/integration/integration_test.go | 125 ++++++++++++++++++++++++++ writers.go | 22 ++--- 5 files changed, 254 insertions(+), 25 deletions(-) diff --git a/handler_backup.go b/handler_backup.go index 4d1f074a..677ce96f 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -271,7 +271,7 @@ func (bh *BackupHandler) makeWriteWorkers( for i, w := range backupWriters { var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger, nil) if bh.state != nil { - dataWriter = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsChan) + dataWriter = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsStateChan) } dataWriter = newWriterWithTokenStats(dataWriter, &bh.stats, bh.logger) @@ -448,7 +448,14 @@ func (bh *BackupHandler) backupSIndexes( sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil)) if bh.state != nil { - sindexWriter = pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) + sindexWriter = pipeline.DataWriter[*models.Token]( + newTokenWriter( + bh.encoder, + writer, + bh.logger, + bh.state.RecordsStateChan, + ), + ) } sindexWriter = newWriterWithTokenStats(sindexWriter, &bh.stats, bh.logger) @@ -476,7 +483,14 @@ func (bh *BackupHandler) backupUDFs( udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil)) if bh.state != nil { - udfWriter = pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, bh.state.RecordsChan)) + udfWriter = pipeline.DataWriter[*models.Token]( + newTokenWriter( + bh.encoder, + writer, + bh.logger, + bh.state.RecordsStateChan, + ), + ) } udfWriter = newWriterWithTokenStats(udfWriter, &bh.stats, bh.logger) diff --git a/state.go b/state.go index ff185dfe..66bf4bda 100644 --- a/state.go +++ b/state.go @@ -35,8 +35,8 @@ type State struct { // Counter to count how many times State instance was initialized. // Is used to create suffix for backup files. Counter int - // RecordsChan communication channel to save current filter state. - RecordsChan chan models.PartitionFilterSerialized + // RecordsStateChan communication channel to save current filter state. + RecordsStateChan chan models.PartitionFilterSerialized // RecordStates store states of all filters. RecordStates map[string]models.PartitionFilterSerialized // Mutex for RecordStates operations. @@ -92,13 +92,13 @@ func newState( ) *State { s := &State{ ctx: ctx, - // RecordsChan must not be buffered, so we can stop all operations. - RecordsChan: make(chan models.PartitionFilterSerialized), - RecordStates: make(map[string]models.PartitionFilterSerialized), - FileName: config.StateFile, - DumpDuration: config.StateFileDumpDuration, - writer: writer, - logger: logger, + // RecordsStateChan must not be buffered, so we can stop all operations. + RecordsStateChan: make(chan models.PartitionFilterSerialized), + RecordStates: make(map[string]models.PartitionFilterSerialized), + FileName: config.StateFile, + DumpDuration: config.StateFileDumpDuration, + writer: writer, + logger: logger, } // Run watcher on initialization. go s.serve() @@ -130,7 +130,7 @@ func newStateFromFile( s.ctx = ctx s.writer = writer s.logger = logger - s.RecordsChan = make(chan models.PartitionFilterSerialized) + s.RecordsStateChan = make(chan models.PartitionFilterSerialized) s.Counter++ logger.Debug("loaded state file successfully") @@ -222,7 +222,7 @@ func (s *State) serveRecords() { select { case <-s.ctx.Done(): return - case state := <-s.RecordsChan: + case state := <-s.RecordsStateChan: if state.Begin == 0 && state.Count == 0 && state.Digest == nil { continue } diff --git a/state_test.go b/state_test.go index 51ee55a2..59fcbae8 100644 --- a/state_test.go +++ b/state_test.go @@ -13,3 +13,93 @@ // limitations under the License. package backup + +import ( + "context" + "log/slog" + "os" + "path/filepath" + "testing" + "time" + + a "github.com/aerospike/aerospike-client-go/v7" + "github.com/aerospike/backup-go/io/encoding/asb" + "github.com/aerospike/backup-go/io/local" + "github.com/aerospike/backup-go/models" + "github.com/stretchr/testify/require" +) + +const ( + testDuration = 1 * time.Second + testStateFile = "test_state_file" +) + +func TestState(t *testing.T) { + t.Parallel() + + testDir := t.TempDir() + tempFile := filepath.Join(testDir, testStateFile) + + testFilters := []*a.PartitionFilter{ + NewPartitionFilterByID(1), + NewPartitionFilterByID(2), + } + + ctx, cancel := context.WithCancel(context.Background()) + + cfg := NewDefaultBackupConfig() + cfg.StateFile = testStateFile + cfg.StateFileDumpDuration = testDuration + cfg.PageSize = 100000 + cfg.SyncPipelines = true + cfg.PartitionFilters = testFilters + + reader, err := local.NewReader( + local.WithDir(testDir), + ) + require.NoError(t, err) + + writer, err := local.NewWriter( + ctx, + local.WithValidator(asb.NewValidator()), + local.WithSkipDirCheck(), + local.WithDir(testDir), + local.WithUnbufferedWrite(), + ) + require.NoError(t, err) + + logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) + + // Check init. + state, err := NewState(ctx, cfg, reader, writer, logger) + require.NotNil(t, state) + require.NoError(t, err) + + for i := range testFilters { + pfs, err := models.NewPartitionFilterSerialized(testFilters[i]) + require.NoError(t, err) + state.RecordsStateChan <- pfs + } + + time.Sleep(testDuration * 3) + cancel() + + // Check that file exists. + _, err = os.Stat(tempFile) + require.NoError(t, err) + + // Nullify the link. + result := []*a.PartitionFilter{ + NewPartitionFilterByID(1), + NewPartitionFilterByID(2), + } + + // Check restore. + newCtx := context.Background() + cfg.Continue = true + newState, err := NewState(newCtx, cfg, reader, writer, logger) + require.NoError(t, err) + newPf, err := newState.loadPartitionFilters() + require.NoError(t, err) + require.Equal(t, newPf, result) +} diff --git a/tests/integration/integration_test.go b/tests/integration/integration_test.go index 5934e391..2ba258fd 100644 --- a/tests/integration/integration_test.go +++ b/tests/integration/integration_test.go @@ -21,6 +21,7 @@ import ( "fmt" "io" "log/slog" + "math/rand" "os" "testing" "time" @@ -1111,6 +1112,130 @@ func (suite *backupRestoreTestSuite) TestBackupEstimateOk() { suite.TearDownTest() } +func (suite *backupRestoreTestSuite) TestBackupContinuation() { + const totalRecords = 900 + batch := genRecords(suite.namespace, suite.set, totalRecords, testBins) + suite.SetupTest(batch) + + testFolder := suite.T().TempDir() + testFile := "test_state_file" + + for i := 0; i < 5; i++ { + randomNumber := rand.Intn(7-3+1) + 3 + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(time.Duration(randomNumber) * time.Second) + cancel() + }() + + first := suite.runFirstBackup(ctx, testFolder, testFile, i) + + ctx = context.Background() + second := suite.runContinueBackup(ctx, testFolder, testFile, i) + + suite.T().Log("first:", first, "second:", second) + result := (first + second) >= totalRecords + suite.T().Log(first + second) + suite.Equal(true, result) + } + + suite.TearDownTest() +} + +func (suite *backupRestoreTestSuite) runFirstBackup(ctx context.Context, testFolder, testStateFile string, i int, +) uint64 { + bFolder := fmt.Sprintf("%s_%d", testFolder, i) + + writers, err := local.NewWriter( + ctx, + local.WithValidator(asb.NewValidator()), + local.WithSkipDirCheck(), + local.WithDir(bFolder), + local.WithUnbufferedWrite(), + ) + if err != nil { + panic(err) + } + + readers, err := local.NewReader( + local.WithDir(bFolder), + ) + if err != nil { + panic(err) + } + + backupCfg := backup.NewDefaultBackupConfig() + backupCfg.Namespace = suite.namespace + backupCfg.ParallelRead = 10 + backupCfg.ParallelWrite = 10 + + backupCfg.StateFile = testStateFile + backupCfg.StateFileDumpDuration = 10 * time.Millisecond + backupCfg.Bandwidth = 1000000 + backupCfg.PageSize = 100 + backupCfg.SyncPipelines = true + + backupHandler, err := suite.backupClient.Backup(ctx, backupCfg, writers, readers) + if err != nil { + panic(err) + } + + // use backupHandler.Wait() to wait for the job to finish or fail + err = backupHandler.Wait(ctx) + if err != nil { + suite.T().Logf("Backup failed: %v", err) + } + + return backupHandler.GetStats().GetReadRecords() +} + +func (suite *backupRestoreTestSuite) runContinueBackup(ctx context.Context, testFolder, testStateFile string, i int, +) uint64 { + bFolder := fmt.Sprintf("%s_%d", testFolder, i) + + writers, err := local.NewWriter( + ctx, + local.WithValidator(asb.NewValidator()), + local.WithSkipDirCheck(), + local.WithDir(bFolder), + local.WithUnbufferedWrite(), + ) + if err != nil { + panic(err) + } + + readers, err := local.NewReader( + local.WithDir(bFolder), + ) + if err != nil { + panic(err) + } + + backupCfg := backup.NewDefaultBackupConfig() + backupCfg.Namespace = suite.namespace + backupCfg.ParallelRead = 10 + backupCfg.ParallelWrite = 10 + + backupCfg.StateFile = testStateFile + backupCfg.Continue = true + backupCfg.StateFileDumpDuration = 10 * time.Millisecond + backupCfg.PageSize = 100 + backupCfg.SyncPipelines = true + + backupHandler, err := suite.backupClient.Backup(ctx, backupCfg, writers, readers) + if err != nil { + panic(err) + } + + // use backupHandler.Wait() to wait for the job to finish or fail + err = backupHandler.Wait(ctx) + if err != nil { + suite.T().Logf("Backup failed: %v", err) + } + + return backupHandler.GetStats().GetReadRecords() +} + type byteReadWriterFactory struct { buffer *bytes.Buffer } diff --git a/writers.go b/writers.go index e4ed1b3e..49b81299 100644 --- a/writers.go +++ b/writers.go @@ -85,10 +85,10 @@ func (tw *tokenStatsWriter) Close() error { // It writes the types from the models package as encoded data // to an io.Writer. It uses an Encoder to encode the data. type tokenWriter struct { - encoder Encoder - output io.Writer - logger *slog.Logger - stateChan chan<- models.PartitionFilterSerialized + encoder Encoder + output io.Writer + logger *slog.Logger + recordsStateChan chan<- models.PartitionFilterSerialized } // newTokenWriter creates a new tokenWriter. @@ -96,17 +96,17 @@ func newTokenWriter( encoder Encoder, output io.Writer, logger *slog.Logger, - stateChan chan<- models.PartitionFilterSerialized, + recordsStateChan chan<- models.PartitionFilterSerialized, ) *tokenWriter { id := uuid.NewString() logger = logging.WithWriter(logger, id, logging.WriterTypeToken) logger.Debug("created new token writer") return &tokenWriter{ - encoder: encoder, - output: output, - logger: logger, - stateChan: stateChan, + encoder: encoder, + output: output, + logger: logger, + recordsStateChan: recordsStateChan, } } @@ -117,8 +117,8 @@ func (w *tokenWriter) Write(v *models.Token) (int, error) { return 0, fmt.Errorf("error encoding token: %w", err) } - if w.stateChan != nil { - w.stateChan <- *v.Filter + if w.recordsStateChan != nil && v.Filter != nil { + w.recordsStateChan <- *v.Filter } return w.output.Write(data) From f1d65dd56a32e4627b6d1b45867b06be15d477c0 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Tue, 15 Oct 2024 17:07:40 +0300 Subject: [PATCH 16/25] FMWK-570-backup-restore-state - compare values for test, not pointers --- state_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state_test.go b/state_test.go index 59fcbae8..56b71899 100644 --- a/state_test.go +++ b/state_test.go @@ -101,5 +101,5 @@ func TestState(t *testing.T) { require.NoError(t, err) newPf, err := newState.loadPartitionFilters() require.NoError(t, err) - require.Equal(t, newPf, result) + require.EqualValues(t, newPf, result) } From 2b916446adb2394a9848bcf68bc1fa167cef8d83 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Wed, 16 Oct 2024 09:42:41 +0300 Subject: [PATCH 17/25] FMWK-570-backup-restore-state - Added flags to CLI tool --- cmd/asbackup/readme.md | 140 +++++++++++++++++----------------- cmd/internal/app/configs.go | 10 ++- cmd/internal/flags/backup.go | 8 ++ cmd/internal/models/backup.go | 2 + 4 files changed, 91 insertions(+), 69 deletions(-) diff --git a/cmd/asbackup/readme.md b/cmd/asbackup/readme.md index 4f76fbba..9c730984 100644 --- a/cmd/asbackup/readme.md +++ b/cmd/asbackup/readme.md @@ -24,6 +24,9 @@ More info here https://goreleaser.com/quick-start/ ## Supported flags ``` +Welcome to the Aerospike backup CLI tool! +----------------------------------------- + Usage: asbackup [flags] @@ -70,63 +73,75 @@ Backup Flags: --socket-timeout int Socket timeout in milliseconds. If this value is 0, its set to total-timeout. If both are 0, there is no socket idle time limit (default 10000) -N, --nice int The limits for read/write storage bandwidth in MiB/s - -o, --output-file string Backup to a single backup file. Use - for stdout. Required, unless -d or -e is used. - -r, --remove-files Remove existing backup file (-o) or files (-d). - -F, --file-limit int Rotate backup files, when their size crosses the given - value (in bytes) Only used when backing up to a Directory. - -D, --after-digest string Backup records after record digest in record's partition plus all succeeding - partitions. Used to resume backup with last record received from previous - incomplete backup. - This argument is mutually exclusive to partition-list. - Format: base64 encoded string - Example: EjRWeJq83vEjRRI0VniavN7xI0U= - - -a, --modified-before string - Perform an incremental backup; only include records - that changed after the given date and time. The system's - local timezone applies. If only HH:MM:SS is specified, then - today's date is assumed as the date. If only YYYY-MM-DD is - specified, then 00:00:00 (midnight) is assumed as the time. - - -b, --modified-after string - Only include records that last changed before the given - date and time. May combined with --modified-after to specify a range. - -M, --max-records int The number of records approximately to back up. 0 - all records - -x, --no-bins Do not include bin data in the backup. - --sleep-between-retries int The amount of milliseconds to sleep between retries. (default 5) - -f, --filter-exp string Base64 encoded expression. Use the encoded filter expression in each scan call, - which can be used to do a partial backup. The expression to be used can be base64 - encoded through any client. This argument is mutually exclusive with multi-set backup. - - --parallel-nodes Specifies how to perform scan. If set to true, we launch parallel workers for nodes; - otherwise workers run in parallel for partitions. - --remove-artifacts Remove existing backup file (-o) or files (-d) without performing a backup. - -C, --compact Do not apply base-64 encoding to BLOBs; results in smaller backup files. - -l, --node-list string :[,:[,...]] - ::[,::[,...]] - Backup the given cluster nodes only. - The job is parallelized by number of nodes unless --parallel is set less than nodes number. - This argument is mutually exclusive to partition-list/after-digest arguments. - Default: backup all nodes in the cluster - --no-ttl-only Only include records that have no ttl set (persistent records). - --prefer-racks string [,[,...]] - A list of Aerospike Server rack IDs to prefer when reading records for a backup. - -X, --partition-list string List of partitions [...]]> to back up. Partition filters can be ranges, - individual partitions, or records after a specific digest within a single partition. - This argument is mutually exclusive to after-digest. - Filter: [-]| - begin partition: 0-4095 - partition count: 1-4096 Default: 1 - digest: base64 encoded string - Examples: 0-1000, 1000-1000, 2222, EjRWeJq83vEjRRI0VniavN7xI0U= - Default: 0-4096 (all partitions) - - -e, --estimate Estimate the backed-up record size from a random sample of - 10,000 (default) records at 99.9999%% confidence. - It ignores any filter: filter-exp, node-list, modified-after, modified-before, no-ttl-only, - after-digest, partition-list. - It calculates estimate size of full backup. - --estimate-samples int The number of samples to take when running a backup estimate. (default 10000) + -o, --output-file string Backup to a single backup file. Use - for stdout. Required, unless -d or -e is used. + -q, --output-file-prefix string When using directory parameter, prepend a prefix to the names of the generated files. + -r, --remove-files Remove existing backup file (-o) or files (-d). + -F, --file-limit int Rotate backup files, when their size crosses the given + value (in bytes) Only used when backing up to a Directory. + -D, --after-digest string Backup records after record digest in record's partition plus all succeeding + partitions. Used to resume backup with last record received from previous + incomplete backup. + This argument is mutually exclusive to partition-list. + Format: base64 encoded string + Example: EjRWeJq83vEjRRI0VniavN7xI0U= + + -a, --modified-before string + Perform an incremental backup; only include records + that changed after the given date and time. The system's + local timezone applies. If only HH:MM:SS is specified, then + today's date is assumed as the date. If only YYYY-MM-DD is + specified, then 00:00:00 (midnight) is assumed as the time. + + -b, --modified-after string + Only include records that last changed before the given + date and time. May combined with --modified-after to specify a range. + -M, --max-records int The number of records approximately to back up. 0 - all records + -x, --no-bins Do not include bin data in the backup. + --sleep-between-retries int The amount of milliseconds to sleep between retries. (default 5) + -f, --filter-exp string Base64 encoded expression. Use the encoded filter expression in each scan call, + which can be used to do a partial backup. The expression to be used can be base64 + encoded through any client. This argument is mutually exclusive with multi-set backup. + + --parallel-nodes Specifies how to perform scan. If set to true, we launch parallel workers for nodes; + otherwise workers run in parallel for partitions. + --remove-artifacts Remove existing backup file (-o) or files (-d) without performing a backup. + -C, --compact Do not apply base-64 encoding to BLOBs; results in smaller backup files. + -l, --node-list string :[,:[,...]] + ::[,::[,...]] + Backup the given cluster nodes only. + The job is parallelized by number of nodes unless --parallel is set less than nodes number. + This argument is mutually exclusive to partition-list/after-digest arguments. + Default: backup all nodes in the cluster + --no-ttl-only Only include records that have no ttl set (persistent records). + --prefer-racks string [,[,...]] + A list of Aerospike Server rack IDs to prefer when reading records for a backup. + -X, --partition-list string List of partitions [...]]> to back up. Partition filters can be ranges, + individual partitions, or records after a specific digest within a single partition. + This argument is mutually exclusive to after-digest. + Filter: [-]| + begin partition: 0-4095 + partition count: 1-4096 Default: 1 + digest: base64 encoded string + Examples: 0-1000, 1000-1000, 2222, EjRWeJq83vEjRRI0VniavN7xI0U= + Default: 0-4096 (all partitions) + + -e, --estimate Estimate the backed-up record size from a random sample of + 10,000 (default) records at 99.9999%% confidence. + It ignores any filter: filter-exp, node-list, modified-after, modified-before, no-ttl-only, + after-digest, partition-list. + It calculates estimate size of full backup. + --estimate-samples int The number of samples to take when running a backup estimate. (default 10000) + -c, --continue string Resumes an interrupted/failed backup from where it was left off, given the .state file + that was generated from the interrupted/failed run. + --state-file-dst .asb.state Either a path with a file name or a directory in which the backup state file will be + placed if the backup is interrupted/fails. If a path with a file name is used, that + exact path is where the backup file will be placed. If a directory is given, the backup + state will be placed in the directory with name .asb.state, or + `.asb.state` if `--output-file-prefix` is given. + --state-file-dump-duration int Intervals in milliseconds, how often dump state file to disk. (default 10000) + --scan-page-size int How many records will be read on one iteration for continuation backup. + Affects size if overlap on resuming backup after an error. + Is used only with --state-file-dst or --continue. (default 10000) Compression Flags: -z, --compress string Enables compressing of backup files using the specified compression algorithm. @@ -173,17 +188,6 @@ Azure Flags: ## Unsupported flags ``` ---continue Resumes an interrupted/failed backup from where it was left off, given the .state file - that was generated from the interrupted/failed run. - ---state-file-dst Either a path with a file name or a directory in which the backup state file will be - placed if the backup is interrupted/fails. If a path with a file name is used, that - exact path is where the backup file will be placed. If a directory is given, the backup - state will be placed in the directory with name `.asb.state`, or - `.asb.state` if `--output-file-prefix` is given. - --q, --output-file-prefix When using directory parameter, prepend a prefix to the names of the generated files. - --machine Output machine-readable status updates to the given path, typically a FIFO. --no-config-file Do not read any config file. Default: disabled diff --git a/cmd/internal/app/configs.go b/cmd/internal/app/configs.go index c2b52fdd..e49fc849 100644 --- a/cmd/internal/app/configs.go +++ b/cmd/internal/app/configs.go @@ -63,11 +63,19 @@ func mapBackupConfig( c.Compact = backupParams.Compact c.NoTTLOnly = backupParams.NoTTLOnly c.StateFileDumpDuration = time.Duration(backupParams.StateFileDumpDuration) * time.Millisecond - c.StateFile = backupParams.StateFileDst + c.OutputFilePrefix = backupParams.OutputFilePrefix if backupParams.Continue != "" { c.StateFile = backupParams.Continue c.Continue = true + c.SyncPipelines = true + c.PageSize = backupParams.ScanPageSize + } + + if backupParams.StateFileDst != "" { + c.StateFile = backupParams.StateFileDst + c.SyncPipelines = true + c.PageSize = backupParams.ScanPageSize } // Overwrite partitions if we use nodes. diff --git a/cmd/internal/flags/backup.go b/cmd/internal/flags/backup.go index 0fedfc21..55a96ddb 100644 --- a/cmd/internal/flags/backup.go +++ b/cmd/internal/flags/backup.go @@ -33,6 +33,9 @@ func (f *Backup) NewFlagSet() *pflag.FlagSet { flagSet.StringVarP(&f.OutputFile, "output-file", "o", "", "Backup to a single backup file. Use - for stdout. Required, unless -d or -e is used.") + flagSet.StringVarP(&f.OutputFilePrefix, "output-file-prefix", "q", + "", + "When using directory parameter, prepend a prefix to the names of the generated files.") flagSet.BoolVarP(&f.RemoveFiles, "remove-files", "r", false, "Remove existing backup file (-o) or files (-d).") @@ -137,6 +140,11 @@ func (f *Backup) NewFlagSet() *pflag.FlagSet { flagSet.Int64Var(&f.StateFileDumpDuration, "state-file-dump-duration", 10000, "Intervals in milliseconds, how often dump state file to disk.") + flagSet.Int64Var(&f.ScanPageSize, "scan-page-size", + 10000, + "How many records will be read on one iteration for continuation backup.\n"+ + "Affects size if overlap on resuming backup after an error.\n"+ + "Is used only with --state-file-dst or --continue.") return flagSet } diff --git a/cmd/internal/models/backup.go b/cmd/internal/models/backup.go index 33f48326..1a1abf58 100644 --- a/cmd/internal/models/backup.go +++ b/cmd/internal/models/backup.go @@ -37,6 +37,8 @@ type Backup struct { StateFileDst string StateFileDumpDuration int64 Continue string + ScanPageSize int64 + OutputFilePrefix string } // ShouldClearTarget check if we should clean target directory. From dba8c86fcd3eaf7e7dacf21ccd500a39669aad56 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Wed, 16 Oct 2024 10:56:11 +0300 Subject: [PATCH 18/25] FMWK-570-backup-restore-state - WIP on s3 writer --- cmd/internal/app/asbackup.go | 14 +++++++--- cmd/internal/app/asrestore.go | 11 ++++---- cmd/internal/app/readers.go | 33 +++++++++++++++++++---- cmd/internal/app/writers.go | 32 ++++++++++++++++++++++ cmd/internal/models/backup.go | 6 ++++- io/aws/s3/options.go | 10 +++++++ io/aws/s3/writer.go | 50 +++++++++++++++++++++++++++++++++-- io/azure/blob/options.go | 10 +++++++ io/gcp/storage/options.go | 10 +++++++ io/local/options.go | 4 +-- 10 files changed, 161 insertions(+), 19 deletions(-) diff --git a/cmd/internal/app/asbackup.go b/cmd/internal/app/asbackup.go index 31a6650f..1df6707c 100644 --- a/cmd/internal/app/asbackup.go +++ b/cmd/internal/app/asbackup.go @@ -67,7 +67,14 @@ func NewASBackup( ) // We initialize a writer only if output is configured. if backupParams.OutputFile != "" || commonParams.Directory != "" { - writer, err = getWriter(ctx, backupParams, commonParams, awsS3, gcpStorage, azureBlob) + writer, err = getWriter( + ctx, + backupParams, + commonParams, + awsS3, + gcpStorage, + azureBlob, + ) if err != nil { return nil, fmt.Errorf("failed to create backup writer: %w", err) } @@ -78,10 +85,9 @@ func NewASBackup( } } - if backupParams.StateFileDst != "" || backupParams.Continue != "" { + if backupParams.ShouldSaveState() { r := &models.Restore{InputFile: backupParams.OutputFile} - - reader, err = getReader(ctx, r, commonParams, awsS3, gcpStorage, azureBlob) + reader, err = getReader(ctx, r, commonParams, awsS3, gcpStorage, azureBlob, backupParams) if err != nil { return nil, fmt.Errorf("failed to create reader: %w", err) } diff --git a/cmd/internal/app/asrestore.go b/cmd/internal/app/asrestore.go index 2bf1e1ef..57b86c20 100644 --- a/cmd/internal/app/asrestore.go +++ b/cmd/internal/app/asrestore.go @@ -49,7 +49,7 @@ func NewASRestore( return nil, err } - reader, err := getReader(ctx, restoreParams, commonParams, awsS3, gcpStorage, azureBlob) + reader, err := getReader(ctx, restoreParams, commonParams, awsS3, gcpStorage, azureBlob, false) if err != nil { return nil, fmt.Errorf("failed to create backup reader: %w", err) } @@ -108,15 +108,16 @@ func getReader( awsS3 *models.AwsS3, gcpStorage *models.GcpStorage, azureBlob *models.AzureBlob, + backupParams *models.Backup, ) (backup.StreamingReader, error) { switch { case awsS3.Region != "": - return newS3Reader(ctx, awsS3, restoreParams, commonParams) + return newS3Reader(ctx, awsS3, restoreParams, commonParams, backupParams) case gcpStorage.BucketName != "": - return newGcpReader(ctx, gcpStorage, restoreParams, commonParams) + return newGcpReader(ctx, gcpStorage, restoreParams, commonParams, backupParams) case azureBlob.ContainerName != "": - return newAzureReader(ctx, azureBlob, restoreParams, commonParams) + return newAzureReader(ctx, azureBlob, restoreParams, commonParams, backupParams) default: - return newLocalReader(restoreParams, commonParams) + return newLocalReader(restoreParams, commonParams, backupParams) } } diff --git a/cmd/internal/app/readers.go b/cmd/internal/app/readers.go index 5e53e96e..b65ca5a7 100644 --- a/cmd/internal/app/readers.go +++ b/cmd/internal/app/readers.go @@ -26,11 +26,16 @@ import ( "github.com/aerospike/backup-go/io/local" ) -func newLocalReader(r *models.Restore, c *models.Common) (backup.StreamingReader, error) { +func newLocalReader(r *models.Restore, c *models.Common, b *models.Backup) (backup.StreamingReader, error) { var opts []local.Opt if c.Directory != "" && r.InputFile == "" { - opts = append(opts, local.WithDir(c.Directory), local.WithValidator(asb.NewValidator())) + opts = append(opts, local.WithDir(c.Directory)) + // Append Validator only if backup params are not set. + // That means we don't need to check that we are saving a state file. + if b == nil { + opts = append(opts, local.WithValidator(asb.NewValidator())) + } } if r.InputFile != "" && c.Directory == "" { @@ -45,6 +50,7 @@ func newS3Reader( a *models.AwsS3, r *models.Restore, c *models.Common, + b *models.Backup, ) (backup.StreamingReader, error) { client, err := newS3Client(ctx, a) if err != nil { @@ -57,7 +63,12 @@ func newS3Reader( if c.Directory != "" && r.InputFile == "" { bucketName, path = getBucketFromPath(c.Directory) - opts = append(opts, s3.WithDir(path), s3.WithValidator(asb.NewValidator())) + opts = append(opts, s3.WithDir(path)) + // Append Validator only if backup params are not set. + // That means we don't need to check that we are saving a state file. + if b == nil { + opts = append(opts, s3.WithValidator(asb.NewValidator())) + } } if r.InputFile != "" && c.Directory == "" { @@ -73,6 +84,7 @@ func newGcpReader( g *models.GcpStorage, r *models.Restore, c *models.Common, + b *models.Backup, ) (backup.StreamingReader, error) { client, err := newGcpClient(ctx, g) if err != nil { @@ -82,7 +94,12 @@ func newGcpReader( opts := make([]storage.Opt, 0) if c.Directory != "" && r.InputFile == "" { - opts = append(opts, storage.WithDir(c.Directory), storage.WithValidator(asb.NewValidator())) + opts = append(opts, storage.WithDir(c.Directory)) + // Append Validator only if backup params are not set. + // That means we don't need to check that we are saving a state file. + if b == nil { + opts = append(opts, storage.WithValidator(asb.NewValidator())) + } } if r.InputFile != "" && c.Directory == "" { @@ -97,6 +114,7 @@ func newAzureReader( a *models.AzureBlob, r *models.Restore, c *models.Common, + b *models.Backup, ) (backup.StreamingReader, error) { client, err := newAzureClient(a) if err != nil { @@ -106,7 +124,12 @@ func newAzureReader( opts := make([]blob.Opt, 0) if c.Directory != "" && r.InputFile == "" { - opts = append(opts, blob.WithDir(c.Directory), blob.WithValidator(asb.NewValidator())) + opts = append(opts, blob.WithDir(c.Directory)) + // Append Validator only if backup params are not set. + // That means we don't need to check that we are saving a state file. + if b == nil { + opts = append(opts, blob.WithValidator(asb.NewValidator())) + } } if r.InputFile != "" && c.Directory == "" { diff --git a/cmd/internal/app/writers.go b/cmd/internal/app/writers.go index 92051dad..222daece 100644 --- a/cmd/internal/app/writers.go +++ b/cmd/internal/app/writers.go @@ -42,6 +42,14 @@ func newLocalWriter(ctx context.Context, b *models.Backup, c *models.Common) (ba opts = append(opts, local.WithRemoveFiles()) } + if b.Continue != "" { + opts = append(opts, local.WithSkipDirCheck()) + } + + if b.ShouldSaveState() { + opts = append(opts, local.WithUnbufferedWrite()) + } + opts = append(opts, local.WithValidator(asb.NewValidator())) return local.NewWriter(ctx, opts...) @@ -76,6 +84,14 @@ func newS3Writer( opts = append(opts, s3.WithRemoveFiles()) } + if b.Continue != "" { + opts = append(opts, s3.WithSkipDirCheck()) + } + + if b.ShouldSaveState() { + opts = append(opts, s3.WithUnbufferedWrite()) + } + opts = append(opts, s3.WithValidator(asb.NewValidator())) return s3.NewWriter(ctx, client, bucketName, opts...) @@ -106,6 +122,14 @@ func newGcpWriter( opts = append(opts, storage.WithRemoveFiles()) } + if b.Continue != "" { + opts = append(opts, storage.WithSkipDirCheck()) + } + + if b.ShouldSaveState() { + opts = append(opts, storage.WithUnbufferedWrite()) + } + opts = append(opts, storage.WithValidator(asb.NewValidator())) return storage.NewWriter(ctx, client, g.BucketName, opts...) @@ -136,6 +160,14 @@ func newAzureWriter( opts = append(opts, blob.WithRemoveFiles()) } + if b.Continue != "" { + opts = append(opts, blob.WithSkipDirCheck()) + } + + if b.ShouldSaveState() { + opts = append(opts, blob.WithUnbufferedWrite()) + } + opts = append(opts, blob.WithValidator(asb.NewValidator())) return blob.NewWriter(ctx, client, a.ContainerName, opts...) diff --git a/cmd/internal/models/backup.go b/cmd/internal/models/backup.go index 1a1abf58..54b49813 100644 --- a/cmd/internal/models/backup.go +++ b/cmd/internal/models/backup.go @@ -43,5 +43,9 @@ type Backup struct { // ShouldClearTarget check if we should clean target directory. func (b *Backup) ShouldClearTarget() bool { - return b.RemoveFiles || b.RemoveArtifacts + return (b.RemoveFiles || b.RemoveArtifacts) && b.Continue == "" +} + +func (b *Backup) ShouldSaveState() bool { + return b.StateFileDst != "" || b.Continue != "" } diff --git a/io/aws/s3/options.go b/io/aws/s3/options.go index 153e3146..0ea821a9 100644 --- a/io/aws/s3/options.go +++ b/io/aws/s3/options.go @@ -32,6 +32,8 @@ type options struct { startAfter string // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool + // unbuffered means that writings to the cloud will be unbuffered. + unbuffered bool } type Opt func(*options) @@ -90,3 +92,11 @@ func WithSkipDirCheck() Opt { r.skipDirCheck = true } } + +// WithUnbufferedWrite adds an unbuffered flag to the writer. +// Which means that writings to the cloud will be unbuffered. +func WithUnbufferedWrite() Opt { + return func(r *options) { + r.unbuffered = true + } +} diff --git a/io/aws/s3/writer.go b/io/aws/s3/writer.go index 855b5240..00ae3ae1 100644 --- a/io/aws/s3/writer.go +++ b/io/aws/s3/writer.go @@ -148,6 +148,7 @@ func (w *Writer) NewWriter(ctx context.Context, filename string) (io.WriteCloser buffer: new(bytes.Buffer), partNumber: 1, chunkSize: s3DefaultChunkSize, + unbuffered: w.unbuffered, }, nil } @@ -169,6 +170,7 @@ type s3Writer struct { chunkSize int partNumber int32 closed bool + unbuffered bool } var _ io.WriteCloser = (*s3Writer)(nil) @@ -178,6 +180,13 @@ func (w *s3Writer) Write(p []byte) (int, error) { return 0, os.ErrClosed } + if w.unbuffered { + if err := w.uploadDirect(p); err != nil { + return 0, fmt.Errorf("failed to upload direct: %w", err) + } + return len(p), nil + } + if w.buffer.Len() >= w.chunkSize { err := w.uploadPart() if err != nil { @@ -213,19 +222,56 @@ func (w *s3Writer) uploadPart() error { return nil } +// uploadDirect is used for unbuffered upload. +func (w *s3Writer) uploadDirect(p []byte) error { + response, err := w.client.UploadPart(context.Background(), &s3.UploadPartInput{ + Body: bytes.NewReader(p), + Bucket: &w.bucket, + Key: &w.key, + PartNumber: &w.partNumber, + UploadId: w.uploadID, + }) + + if err != nil { + return fmt.Errorf("failed to upload part: %w", err) + } + + pn := w.partNumber + w.completedParts = append(w.completedParts, types.CompletedPart{ + PartNumber: &pn, + ETag: response.ETag, + }) + + w.partNumber++ + + if w.ctx.Err() != nil { + if err = w.Close(); err != nil { + return fmt.Errorf("failed to close writer: %w", err) + } + } + + return nil +} + func (w *s3Writer) Close() error { if w.closed { return os.ErrClosed } - if w.buffer.Len() > 0 { + ctx := w.ctx + if w.unbuffered { + ctx = context.Background() + } + + // Upload from buffer only if unbuffered = false. + if !w.unbuffered && w.buffer.Len() > 0 { err := w.uploadPart() if err != nil { return fmt.Errorf("failed to upload part: %w", err) } } - _, err := w.client.CompleteMultipartUpload(w.ctx, + _, err := w.client.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ Bucket: &w.bucket, UploadId: w.uploadID, diff --git a/io/azure/blob/options.go b/io/azure/blob/options.go index 67fa8cc6..00d7bba3 100644 --- a/io/azure/blob/options.go +++ b/io/azure/blob/options.go @@ -40,6 +40,8 @@ type options struct { marker string // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool + // unbuffered means that writings to the cloud will be unbuffered. + unbuffered bool } type Opt func(*options) @@ -108,3 +110,11 @@ func WithSkipDirCheck() Opt { r.skipDirCheck = true } } + +// WithUnbufferedWrite adds an unbuffered flag to the writer. +// Which means that writings to the cloud will be unbuffered. +func WithUnbufferedWrite() Opt { + return func(r *options) { + r.unbuffered = true + } +} diff --git a/io/gcp/storage/options.go b/io/gcp/storage/options.go index b6b54c87..d68d3597 100644 --- a/io/gcp/storage/options.go +++ b/io/gcp/storage/options.go @@ -32,6 +32,8 @@ type options struct { startOffset string // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool + // unbuffered means that writings to the cloud will be unbuffered. + unbuffered bool } type Opt func(*options) @@ -92,3 +94,11 @@ func WithSkipDirCheck() Opt { r.skipDirCheck = true } } + +// WithUnbufferedWrite adds an unbuffered flag to the writer. +// Which means that writings to the cloud will be unbuffered. +func WithUnbufferedWrite() Opt { + return func(r *options) { + r.unbuffered = true + } +} diff --git a/io/local/options.go b/io/local/options.go index 2fd31e86..bad7ecf3 100644 --- a/io/local/options.go +++ b/io/local/options.go @@ -27,7 +27,7 @@ type options struct { // When we stream files or delete files in folder, we skip directories. This flag will avoid skipping. // Default: false withNestedDir bool - // unbuffered means that writings toi disk will be unbuffered. + // unbuffered means that writings to the disk will be unbuffered. unbuffered bool // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool @@ -75,7 +75,7 @@ func WithRemoveFiles() Opt { } // WithUnbufferedWrite adds an unbuffered flag to the writer. -// Which means that writings to disk will be unbuffered. +// Which means that writings to the disk will be unbuffered. func WithUnbufferedWrite() Opt { return func(r *options) { r.unbuffered = true From 6be5db5f6d34a00201b2409fab7f34af63c64a8a Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 11:03:20 +0300 Subject: [PATCH 19/25] FMWK-570-backup-restore-state - cloud readers stream file fix --- cmd/asbackup/readme.md | 2 +- cmd/internal/app/asrestore.go | 2 +- io/aws/s3/reader.go | 2 +- io/aws/s3/writer.go | 26 ++++---------------------- io/azure/blob/reader.go | 5 +++++ io/gcp/storage/reader.go | 5 +++++ state.go | 2 +- 7 files changed, 18 insertions(+), 26 deletions(-) diff --git a/cmd/asbackup/readme.md b/cmd/asbackup/readme.md index 9c730984..32824a71 100644 --- a/cmd/asbackup/readme.md +++ b/cmd/asbackup/readme.md @@ -133,7 +133,7 @@ Backup Flags: --estimate-samples int The number of samples to take when running a backup estimate. (default 10000) -c, --continue string Resumes an interrupted/failed backup from where it was left off, given the .state file that was generated from the interrupted/failed run. - --state-file-dst .asb.state Either a path with a file name or a directory in which the backup state file will be + --state-file-dst Either a path with a file name or a directory in which the backup state file will be placed if the backup is interrupted/fails. If a path with a file name is used, that exact path is where the backup file will be placed. If a directory is given, the backup state will be placed in the directory with name .asb.state, or diff --git a/cmd/internal/app/asrestore.go b/cmd/internal/app/asrestore.go index 57b86c20..3e7c36e5 100644 --- a/cmd/internal/app/asrestore.go +++ b/cmd/internal/app/asrestore.go @@ -49,7 +49,7 @@ func NewASRestore( return nil, err } - reader, err := getReader(ctx, restoreParams, commonParams, awsS3, gcpStorage, azureBlob, false) + reader, err := getReader(ctx, restoreParams, commonParams, awsS3, gcpStorage, azureBlob, nil) if err != nil { return nil, fmt.Errorf("failed to create backup reader: %w", err) } diff --git a/io/aws/s3/reader.go b/io/aws/s3/reader.go index a5d9b160..26482a59 100644 --- a/io/aws/s3/reader.go +++ b/io/aws/s3/reader.go @@ -76,7 +76,7 @@ func NewReader( if _, err := client.HeadBucket(ctx, &s3.HeadBucketInput{ Bucket: aws.String(bucketName), }); err != nil { - return nil, fmt.Errorf("bucket does not exist or you don't have access: %w", err) + return nil, fmt.Errorf("bucket %s does not exist or you don't have access: %w", bucketName, err) } // S3 storage can read/write to "/" prefix, so we should replace it with "". diff --git a/io/aws/s3/writer.go b/io/aws/s3/writer.go index 00ae3ae1..fd6bc261 100644 --- a/io/aws/s3/writer.go +++ b/io/aws/s3/writer.go @@ -87,7 +87,7 @@ func NewWriter( Bucket: aws.String(bucketName), }) if err != nil { - return nil, fmt.Errorf("bucket does not exist or you don't have access: %w", err) + return nil, fmt.Errorf("bucket %s does not exist or you don't have access: %w", bucketName, err) } if w.isDir && !w.skipDirCheck { @@ -180,13 +180,6 @@ func (w *s3Writer) Write(p []byte) (int, error) { return 0, os.ErrClosed } - if w.unbuffered { - if err := w.uploadDirect(p); err != nil { - return 0, fmt.Errorf("failed to upload direct: %w", err) - } - return len(p), nil - } - if w.buffer.Len() >= w.chunkSize { err := w.uploadPart() if err != nil { @@ -233,7 +226,7 @@ func (w *s3Writer) uploadDirect(p []byte) error { }) if err != nil { - return fmt.Errorf("failed to upload part: %w", err) + return fmt.Errorf("failed to upload part %d: %w", w.partNumber, err) } pn := w.partNumber @@ -244,12 +237,6 @@ func (w *s3Writer) uploadDirect(p []byte) error { w.partNumber++ - if w.ctx.Err() != nil { - if err = w.Close(); err != nil { - return fmt.Errorf("failed to close writer: %w", err) - } - } - return nil } @@ -258,20 +245,15 @@ func (w *s3Writer) Close() error { return os.ErrClosed } - ctx := w.ctx - if w.unbuffered { - ctx = context.Background() - } - // Upload from buffer only if unbuffered = false. - if !w.unbuffered && w.buffer.Len() > 0 { + if w.buffer.Len() > 0 { err := w.uploadPart() if err != nil { return fmt.Errorf("failed to upload part: %w", err) } } - _, err := w.client.CompleteMultipartUpload(ctx, + _, err := w.client.CompleteMultipartUpload(w.ctx, &s3.CompleteMultipartUploadInput{ Bucket: &w.bucket, UploadId: w.uploadID, diff --git a/io/azure/blob/reader.go b/io/azure/blob/reader.go index 280bfe4c..92177cd1 100644 --- a/io/azure/blob/reader.go +++ b/io/azure/blob/reader.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "io" + "path/filepath" "strings" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" @@ -152,6 +153,10 @@ func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { defer close(readersCh) + if r.isDir { + filename = filepath.Join(r.path, filename) + } + resp, err := r.client.DownloadStream(ctx, r.containerName, filename, nil) if err != nil { errorsCh <- fmt.Errorf("failed to create reader from file %s: %w", filename, err) diff --git a/io/gcp/storage/reader.go b/io/gcp/storage/reader.go index 9a11740a..1e9d4573 100644 --- a/io/gcp/storage/reader.go +++ b/io/gcp/storage/reader.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "path/filepath" "strings" "cloud.google.com/go/storage" @@ -165,6 +166,10 @@ func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { defer close(readersCh) + if r.isDir { + filename = filepath.Join(r.path, filename) + } + reader, err := r.bucketHandle.Object(filename).NewReader(ctx) if err != nil { errorsCh <- fmt.Errorf("failed to open %s: %w", filename, err) diff --git a/state.go b/state.go index 66bf4bda..46aa318f 100644 --- a/state.go +++ b/state.go @@ -188,7 +188,7 @@ func (s *State) dump() error { if err = enc.Encode(s); err != nil { return fmt.Errorf("failed to encode state data: %w", err) } - // file.Close() + file.Close() s.mu.Unlock() s.logger.Debug("state file dumped", slog.Time("saved at", time.Now())) From ac8f6d3aa382a37d8f68c82bc8a93c642ed0b18e Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 14:23:12 +0300 Subject: [PATCH 20/25] FMWK-570-backup-restore-state - save state on file close --- cmd/internal/app/configs.go | 1 - cmd/internal/app/writers.go | 16 ----- cmd/internal/flags/backup.go | 11 +--- cmd/internal/models/backup.go | 47 ++++++++------- config_backup.go | 6 +- handler_backup.go | 16 ++--- handler_backup_records.go | 4 ++ io/aws/s3/options.go | 10 ---- io/aws/s3/reader.go | 5 ++ io/aws/s3/writer.go | 27 --------- io/azure/blob/options.go | 10 ---- io/gcp/storage/options.go | 10 ---- io/local/options.go | 10 ---- io/local/writer.go | 5 -- io/sized/writer.go | 18 ++++-- io/sized/writer_test.go | 4 +- models/partition_filter_serialized.go | 2 + state.go | 85 ++++++++++++++------------- writers.go | 6 ++ writers_test.go | 2 +- 20 files changed, 116 insertions(+), 179 deletions(-) diff --git a/cmd/internal/app/configs.go b/cmd/internal/app/configs.go index e49fc849..6bdc9fe5 100644 --- a/cmd/internal/app/configs.go +++ b/cmd/internal/app/configs.go @@ -62,7 +62,6 @@ func mapBackupConfig( c.Bandwidth = commonParams.Nice * 1024 * 1024 c.Compact = backupParams.Compact c.NoTTLOnly = backupParams.NoTTLOnly - c.StateFileDumpDuration = time.Duration(backupParams.StateFileDumpDuration) * time.Millisecond c.OutputFilePrefix = backupParams.OutputFilePrefix if backupParams.Continue != "" { diff --git a/cmd/internal/app/writers.go b/cmd/internal/app/writers.go index 222daece..6f355cb3 100644 --- a/cmd/internal/app/writers.go +++ b/cmd/internal/app/writers.go @@ -46,10 +46,6 @@ func newLocalWriter(ctx context.Context, b *models.Backup, c *models.Common) (ba opts = append(opts, local.WithSkipDirCheck()) } - if b.ShouldSaveState() { - opts = append(opts, local.WithUnbufferedWrite()) - } - opts = append(opts, local.WithValidator(asb.NewValidator())) return local.NewWriter(ctx, opts...) @@ -88,10 +84,6 @@ func newS3Writer( opts = append(opts, s3.WithSkipDirCheck()) } - if b.ShouldSaveState() { - opts = append(opts, s3.WithUnbufferedWrite()) - } - opts = append(opts, s3.WithValidator(asb.NewValidator())) return s3.NewWriter(ctx, client, bucketName, opts...) @@ -126,10 +118,6 @@ func newGcpWriter( opts = append(opts, storage.WithSkipDirCheck()) } - if b.ShouldSaveState() { - opts = append(opts, storage.WithUnbufferedWrite()) - } - opts = append(opts, storage.WithValidator(asb.NewValidator())) return storage.NewWriter(ctx, client, g.BucketName, opts...) @@ -164,10 +152,6 @@ func newAzureWriter( opts = append(opts, blob.WithSkipDirCheck()) } - if b.ShouldSaveState() { - opts = append(opts, blob.WithUnbufferedWrite()) - } - opts = append(opts, blob.WithValidator(asb.NewValidator())) return blob.NewWriter(ctx, client, a.ContainerName, opts...) diff --git a/cmd/internal/flags/backup.go b/cmd/internal/flags/backup.go index 55a96ddb..74670c76 100644 --- a/cmd/internal/flags/backup.go +++ b/cmd/internal/flags/backup.go @@ -132,14 +132,9 @@ func (f *Backup) NewFlagSet() *pflag.FlagSet { "that was generated from the interrupted/failed run.") flagSet.StringVar(&f.StateFileDst, "state-file-dst", "", - "Either a path with a file name or a directory in which the backup state file will be\n"+ - "placed if the backup is interrupted/fails. If a path with a file name is used, that\n"+ - "exact path is where the backup file will be placed. If a directory is given, the backup\n"+ - "state will be placed in the directory with name `.asb.state`, or\n"+ - "`.asb.state` if `--output-file-prefix` is given.") - flagSet.Int64Var(&f.StateFileDumpDuration, "state-file-dump-duration", - 10000, - "Intervals in milliseconds, how often dump state file to disk.") + "Name or path relatively to --directory where state file will be saved.\n"+ + "Works only with --file-limit parameter. As we reach file-limit and save file to storage\n"+ + "current state will be saved.") flagSet.Int64Var(&f.ScanPageSize, "scan-page-size", 10000, "How many records will be read on one iteration for continuation backup.\n"+ diff --git a/cmd/internal/models/backup.go b/cmd/internal/models/backup.go index 54b49813..7d8205b3 100644 --- a/cmd/internal/models/backup.go +++ b/cmd/internal/models/backup.go @@ -15,30 +15,29 @@ package models type Backup struct { - OutputFile string - RemoveFiles bool - ModifiedBefore string - ModifiedAfter string - FileLimit int64 - AfterDigest string - MaxRecords int64 - NoBins bool - SleepBetweenRetries int - FilterExpression string - ParallelNodes bool - RemoveArtifacts bool - Compact bool - NodeList string - NoTTLOnly bool - PreferRacks string - PartitionList string - Estimate bool - EstimateSamples int64 - StateFileDst string - StateFileDumpDuration int64 - Continue string - ScanPageSize int64 - OutputFilePrefix string + OutputFile string + RemoveFiles bool + ModifiedBefore string + ModifiedAfter string + FileLimit int64 + AfterDigest string + MaxRecords int64 + NoBins bool + SleepBetweenRetries int + FilterExpression string + ParallelNodes bool + RemoveArtifacts bool + Compact bool + NodeList string + NoTTLOnly bool + PreferRacks string + PartitionList string + Estimate bool + EstimateSamples int64 + StateFileDst string + Continue string + ScanPageSize int64 + OutputFilePrefix string } // ShouldClearTarget check if we should clean target directory. diff --git a/config_backup.go b/config_backup.go index f0be9e50..24849732 100644 --- a/config_backup.go +++ b/config_backup.go @@ -110,8 +110,6 @@ type BackupConfig struct { // state will be placed in the directory with name `.asb.state` // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. StateFile string - // How often we will dump a state file to disk. - StateFileDumpDuration time.Duration // Resumes an interrupted/failed backup from where it was left off, given the .state file // that was generated from the interrupted/failed run. // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. @@ -207,6 +205,10 @@ func (c *BackupConfig) validate() error { return fmt.Errorf("page size must be set if saving state to state file is enabled") } + if c.StateFile != "" && c.FileLimit == 0 { + return fmt.Errorf("file limit must be set if saving state to state file is enabled") + } + if c.Continue && c.StateFile == "" { return fmt.Errorf("state file must be set if continue is enabled") } diff --git a/handler_backup.go b/handler_backup.go index 677ce96f..d0802ee7 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -269,9 +269,9 @@ func (bh *BackupHandler) makeWriteWorkers( writeWorkers := make([]pipeline.Worker[*models.Token], len(backupWriters)) for i, w := range backupWriters { - var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger, nil) + var dataWriter pipeline.DataWriter[*models.Token] = newTokenWriter(bh.encoder, w, bh.logger, nil, i) if bh.state != nil { - dataWriter = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsStateChan) + dataWriter = newTokenWriter(bh.encoder, w, bh.logger, bh.state.RecordsStateChan, i) } dataWriter = newWriterWithTokenStats(dataWriter, &bh.stats, bh.logger) @@ -285,7 +285,7 @@ func (bh *BackupHandler) makeWriters(ctx context.Context, n int) ([]io.WriteClos backupWriters := make([]io.WriteCloser, n) for i := 0; i < n; i++ { - writer, err := bh.newWriter(ctx) + writer, err := bh.newWriter(ctx, i) if err != nil { return nil, err } @@ -308,9 +308,9 @@ func closeWriters(backupWriters []io.WriteCloser, logger *slog.Logger) { // If FileLimit is set, it returns a sized writer limited to FileLimit bytes. // The returned writer may be compressed or encrypted depending on the BackupHandler's // configuration. -func (bh *BackupHandler) newWriter(ctx context.Context) (io.WriteCloser, error) { +func (bh *BackupHandler) newWriter(ctx context.Context, n int) (io.WriteCloser, error) { if bh.config.FileLimit > 0 { - return sized.NewWriter(ctx, bh.config.FileLimit, bh.newConfiguredWriter) + return sized.NewWriter(ctx, n, bh.state.SaveCommandChan, bh.config.FileLimit, bh.newConfiguredWriter) } return bh.newConfiguredWriter(ctx) @@ -446,7 +446,7 @@ func (bh *BackupHandler) backupSIndexes( reader := aerospike.NewSIndexReader(bh.infoClient, bh.config.Namespace, bh.logger) sindexReadWorker := pipeline.NewReadWorker[*models.Token](reader) - sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil)) + sindexWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil, -1)) if bh.state != nil { sindexWriter = pipeline.DataWriter[*models.Token]( newTokenWriter( @@ -454,6 +454,7 @@ func (bh *BackupHandler) backupSIndexes( writer, bh.logger, bh.state.RecordsStateChan, + -1, ), ) } @@ -480,7 +481,7 @@ func (bh *BackupHandler) backupUDFs( reader := aerospike.NewUDFReader(bh.infoClient, bh.logger) udfReadWorker := pipeline.NewReadWorker[*models.Token](reader) - udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil)) + udfWriter := pipeline.DataWriter[*models.Token](newTokenWriter(bh.encoder, writer, bh.logger, nil, -1)) if bh.state != nil { udfWriter = pipeline.DataWriter[*models.Token]( @@ -489,6 +490,7 @@ func (bh *BackupHandler) backupUDFs( writer, bh.logger, bh.state.RecordsStateChan, + -1, ), ) } diff --git a/handler_backup_records.go b/handler_backup_records.go index 42477069..a876e391 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -218,6 +218,10 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( if err != nil { return nil, err } + // Init state. + if err = bh.state.InitState(partitionGroups); err != nil { + return nil, err + } } // If we have multiply partition filters, we shrink workers to number of filters. diff --git a/io/aws/s3/options.go b/io/aws/s3/options.go index 0ea821a9..153e3146 100644 --- a/io/aws/s3/options.go +++ b/io/aws/s3/options.go @@ -32,8 +32,6 @@ type options struct { startAfter string // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool - // unbuffered means that writings to the cloud will be unbuffered. - unbuffered bool } type Opt func(*options) @@ -92,11 +90,3 @@ func WithSkipDirCheck() Opt { r.skipDirCheck = true } } - -// WithUnbufferedWrite adds an unbuffered flag to the writer. -// Which means that writings to the cloud will be unbuffered. -func WithUnbufferedWrite() Opt { - return func(r *options) { - r.unbuffered = true - } -} diff --git a/io/aws/s3/reader.go b/io/aws/s3/reader.go index 26482a59..3b0ac602 100644 --- a/io/aws/s3/reader.go +++ b/io/aws/s3/reader.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "io" + "path/filepath" "strings" "github.com/aws/aws-sdk-go-v2/aws" @@ -175,6 +176,10 @@ func (r *Reader) StreamFile( ctx context.Context, filename string, readersCh chan<- io.ReadCloser, errorsCh chan<- error) { defer close(readersCh) + if r.isDir { + filename = filepath.Join(r.path, filename) + } + object, err := r.client.GetObject(ctx, &s3.GetObjectInput{ Bucket: &r.bucketName, Key: &filename, diff --git a/io/aws/s3/writer.go b/io/aws/s3/writer.go index fd6bc261..297b5564 100644 --- a/io/aws/s3/writer.go +++ b/io/aws/s3/writer.go @@ -148,7 +148,6 @@ func (w *Writer) NewWriter(ctx context.Context, filename string) (io.WriteCloser buffer: new(bytes.Buffer), partNumber: 1, chunkSize: s3DefaultChunkSize, - unbuffered: w.unbuffered, }, nil } @@ -215,37 +214,11 @@ func (w *s3Writer) uploadPart() error { return nil } -// uploadDirect is used for unbuffered upload. -func (w *s3Writer) uploadDirect(p []byte) error { - response, err := w.client.UploadPart(context.Background(), &s3.UploadPartInput{ - Body: bytes.NewReader(p), - Bucket: &w.bucket, - Key: &w.key, - PartNumber: &w.partNumber, - UploadId: w.uploadID, - }) - - if err != nil { - return fmt.Errorf("failed to upload part %d: %w", w.partNumber, err) - } - - pn := w.partNumber - w.completedParts = append(w.completedParts, types.CompletedPart{ - PartNumber: &pn, - ETag: response.ETag, - }) - - w.partNumber++ - - return nil -} - func (w *s3Writer) Close() error { if w.closed { return os.ErrClosed } - // Upload from buffer only if unbuffered = false. if w.buffer.Len() > 0 { err := w.uploadPart() if err != nil { diff --git a/io/azure/blob/options.go b/io/azure/blob/options.go index 00d7bba3..67fa8cc6 100644 --- a/io/azure/blob/options.go +++ b/io/azure/blob/options.go @@ -40,8 +40,6 @@ type options struct { marker string // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool - // unbuffered means that writings to the cloud will be unbuffered. - unbuffered bool } type Opt func(*options) @@ -110,11 +108,3 @@ func WithSkipDirCheck() Opt { r.skipDirCheck = true } } - -// WithUnbufferedWrite adds an unbuffered flag to the writer. -// Which means that writings to the cloud will be unbuffered. -func WithUnbufferedWrite() Opt { - return func(r *options) { - r.unbuffered = true - } -} diff --git a/io/gcp/storage/options.go b/io/gcp/storage/options.go index d68d3597..b6b54c87 100644 --- a/io/gcp/storage/options.go +++ b/io/gcp/storage/options.go @@ -32,8 +32,6 @@ type options struct { startOffset string // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool - // unbuffered means that writings to the cloud will be unbuffered. - unbuffered bool } type Opt func(*options) @@ -94,11 +92,3 @@ func WithSkipDirCheck() Opt { r.skipDirCheck = true } } - -// WithUnbufferedWrite adds an unbuffered flag to the writer. -// Which means that writings to the cloud will be unbuffered. -func WithUnbufferedWrite() Opt { - return func(r *options) { - r.unbuffered = true - } -} diff --git a/io/local/options.go b/io/local/options.go index bad7ecf3..834c9be6 100644 --- a/io/local/options.go +++ b/io/local/options.go @@ -27,8 +27,6 @@ type options struct { // When we stream files or delete files in folder, we skip directories. This flag will avoid skipping. // Default: false withNestedDir bool - // unbuffered means that writings to the disk will be unbuffered. - unbuffered bool // skipDirCheck if true, backup directory won't be checked. skipDirCheck bool } @@ -74,14 +72,6 @@ func WithRemoveFiles() Opt { } } -// WithUnbufferedWrite adds an unbuffered flag to the writer. -// Which means that writings to the disk will be unbuffered. -func WithUnbufferedWrite() Opt { - return func(r *options) { - r.unbuffered = true - } -} - // WithSkipDirCheck adds skip dir check flags. // Which means that backup directory won't be checked for emptiness. func WithSkipDirCheck() Opt { diff --git a/io/local/writer.go b/io/local/writer.go index 71b222ff..6c1f3250 100644 --- a/io/local/writer.go +++ b/io/local/writer.go @@ -204,11 +204,6 @@ func (w *Writer) NewWriter(ctx context.Context, fileName string) (io.WriteCloser return nil, fmt.Errorf("failed to open file %s: %w", filePath, err) } - // If unbuffered write is set, we return file directly. - if w.unbuffered { - return file, nil - } - return &bufferedFile{bufio.NewWriterSize(file, bufferSize), file}, nil } diff --git a/io/sized/writer.go b/io/sized/writer.go index be4d9bee..e00c1d3e 100644 --- a/io/sized/writer.go +++ b/io/sized/writer.go @@ -29,30 +29,40 @@ type Writer struct { open func(context.Context) (io.WriteCloser, error) size int64 limit int64 + // Number of writer, for saving state. + n int + saveCommandChan chan int } // NewWriter creates a new Writer writer with a size limit. // limit must be greater than 0. -func NewWriter(ctx context.Context, limit int64, +func NewWriter(ctx context.Context, n int, saveCommandChan chan int, limit int64, open func(context.Context) (io.WriteCloser, error)) (*Writer, error) { if limit <= 0 { return nil, fmt.Errorf("limit must be greater than 0, got %d", limit) } return &Writer{ - ctx: ctx, - limit: limit, - open: open, + ctx: ctx, + limit: limit, + open: open, + n: n, + saveCommandChan: saveCommandChan, }, nil } func (f *Writer) Write(p []byte) (n int, err error) { if f.size >= f.limit { + err = f.writer.Close() if err != nil { return 0, fmt.Errorf("failed to close writer: %w", err) } + if f.saveCommandChan != nil { + f.saveCommandChan <- f.n + } + f.size = 0 f.writer = nil } diff --git a/io/sized/writer_test.go b/io/sized/writer_test.go index f85c1098..e498a1bf 100644 --- a/io/sized/writer_test.go +++ b/io/sized/writer_test.go @@ -61,7 +61,7 @@ func (suite *sizedTestSuite) Test_writeCloserSized() { return writer2, nil } - wcs, err := NewWriter(context.Background(), 10, open) + wcs, err := NewWriter(context.Background(), 1, nil, 10, open) suite.NotNil(wcs) suite.Nil(err) @@ -111,6 +111,6 @@ func (suite *sizedTestSuite) Test_writeCloserSized_ErrLimit() { return writer2, nil } - _, err := NewWriter(context.Background(), -1, open) + _, err := NewWriter(context.Background(), 1, nil, -1, open) require.ErrorContains(suite.T(), err, "limit must be greater than 0") } diff --git a/models/partition_filter_serialized.go b/models/partition_filter_serialized.go index a9d67a3a..56307d3a 100644 --- a/models/partition_filter_serialized.go +++ b/models/partition_filter_serialized.go @@ -27,6 +27,8 @@ type PartitionFilterSerialized struct { Count int Digest []byte Cursor []byte + // Worker number. + N int } // NewPartitionFilterSerialized serialize *a.PartitionFilter and returns new PartitionFilterSerialized instance. diff --git a/state.go b/state.go index 46aa318f..dba63239 100644 --- a/state.go +++ b/state.go @@ -38,14 +38,16 @@ type State struct { // RecordsStateChan communication channel to save current filter state. RecordsStateChan chan models.PartitionFilterSerialized // RecordStates store states of all filters. - RecordStates map[string]models.PartitionFilterSerialized + RecordStates map[int]models.PartitionFilterSerialized + + RecordStatesSaved map[int]models.PartitionFilterSerialized + // SaveCommandChan command to save current state for worker. + SaveCommandChan chan int // Mutex for RecordStates operations. // Ordinary mutex is used, because we must not allow any writings when we read state. mu sync.Mutex // File to save state to. FileName string - // How often file will be saved to disk. - DumpDuration time.Duration // writer is used to create a state file. writer Writer @@ -93,12 +95,13 @@ func newState( s := &State{ ctx: ctx, // RecordsStateChan must not be buffered, so we can stop all operations. - RecordsStateChan: make(chan models.PartitionFilterSerialized), - RecordStates: make(map[string]models.PartitionFilterSerialized), - FileName: config.StateFile, - DumpDuration: config.StateFileDumpDuration, - writer: writer, - logger: logger, + RecordsStateChan: make(chan models.PartitionFilterSerialized), + RecordStates: make(map[int]models.PartitionFilterSerialized), + RecordStatesSaved: make(map[int]models.PartitionFilterSerialized), + SaveCommandChan: make(chan int), + FileName: config.StateFile, + writer: writer, + logger: logger, } // Run watcher on initialization. go s.serve() @@ -131,9 +134,10 @@ func newStateFromFile( s.writer = writer s.logger = logger s.RecordsStateChan = make(chan models.PartitionFilterSerialized) + s.SaveCommandChan = make(chan int) s.Counter++ - logger.Debug("loaded state file successfully") + logger.Debug("loaded state file successfully, filters loaded:", len(s.RecordStatesSaved)) // Run watcher on initialization. go s.serve() @@ -144,39 +148,15 @@ func newStateFromFile( // serve dumps files to disk. func (s *State) serve() { - ticker := time.NewTicker(s.DumpDuration) - defer ticker.Stop() - - // Dump a file at the very beginning. - if err := s.dump(); err != nil { - s.logger.Error("failed to dump state", slog.Any("error", err)) - return - } - - // Server ticker. - for { - select { - case <-s.ctx.Done(): - // saves state and exit - if err := s.dump(); err != nil { - s.logger.Error("failed to dump state", slog.Any("error", err)) - return - } - - s.logger.Debug("state context done") - + for msg := range s.SaveCommandChan { + if err := s.dump(msg); err != nil { + s.logger.Error("failed to dump state", slog.Any("error", err)) return - case <-ticker.C: - // save intermediate state. - if err := s.dump(); err != nil { - s.logger.Error("failed to dump state", slog.Any("error", err)) - return - } } } } -func (s *State) dump() error { +func (s *State) dump(n int) error { file, err := s.writer.NewWriter(s.ctx, s.FileName) if err != nil { return fmt.Errorf("failed to create state file %s: %w", s.FileName, err) @@ -185,10 +165,17 @@ func (s *State) dump() error { enc := gob.NewEncoder(file) s.mu.Lock() + + if n > -1 { + s.RecordStatesSaved[n] = s.RecordStates[n] + } + if err = enc.Encode(s); err != nil { return fmt.Errorf("failed to encode state data: %w", err) } + file.Close() + s.mu.Unlock() s.logger.Debug("state file dumped", slog.Time("saved at", time.Now())) @@ -196,12 +183,26 @@ func (s *State) dump() error { return nil } +func (s *State) InitState(pf []*a.PartitionFilter) error { + s.mu.Lock() + for i := range pf { + pfs, err := models.NewPartitionFilterSerialized(pf[i]) + if err != nil { + return err + } + s.RecordStates[i] = pfs + s.RecordStatesSaved[i] = pfs + } + s.mu.Unlock() + return s.dump(-1) +} + func (s *State) loadPartitionFilters() ([]*a.PartitionFilter, error) { s.mu.Lock() - result := make([]*a.PartitionFilter, 0, len(s.RecordStates)) + result := make([]*a.PartitionFilter, 0, len(s.RecordStatesSaved)) - for _, state := range s.RecordStates { + for _, state := range s.RecordStatesSaved { f, err := state.Decode() if err != nil { return nil, err @@ -230,8 +231,8 @@ func (s *State) serveRecords() { counter++ s.mu.Lock() - key := fmt.Sprintf("%d%d%s", state.Begin, state.Count, state.Digest) - s.RecordStates[key] = state + // key := fmt.Sprintf("%d%d%s", state.Begin, state.Count, state.Digest) + s.RecordStates[state.N] = state s.mu.Unlock() } } diff --git a/writers.go b/writers.go index 49b81299..7ca31036 100644 --- a/writers.go +++ b/writers.go @@ -89,6 +89,8 @@ type tokenWriter struct { output io.Writer logger *slog.Logger recordsStateChan chan<- models.PartitionFilterSerialized + // Number of writer. + n int } // newTokenWriter creates a new tokenWriter. @@ -97,6 +99,7 @@ func newTokenWriter( output io.Writer, logger *slog.Logger, recordsStateChan chan<- models.PartitionFilterSerialized, + n int, ) *tokenWriter { id := uuid.NewString() logger = logging.WithWriter(logger, id, logging.WriterTypeToken) @@ -107,6 +110,7 @@ func newTokenWriter( output: output, logger: logger, recordsStateChan: recordsStateChan, + n: n, } } @@ -118,6 +122,8 @@ func (w *tokenWriter) Write(v *models.Token) (int, error) { } if w.recordsStateChan != nil && v.Filter != nil { + // Set worker number. + v.Filter.N = w.n w.recordsStateChan <- *v.Filter } diff --git a/writers_test.go b/writers_test.go index 4d78ef23..5517be74 100644 --- a/writers_test.go +++ b/writers_test.go @@ -70,7 +70,7 @@ func (suite *writersTestSuite) TestTokenWriter() { mockEncoder.EXPECT().EncodeToken(invalidToken).Return(nil, errors.New("error")) dst := bytes.Buffer{} - writer := newTokenWriter(mockEncoder, &dst, slog.Default(), nil) + writer := newTokenWriter(mockEncoder, &dst, slog.Default(), nil, -1) suite.NotNil(writer) _, err := writer.Write(recToken) From 11e1371f1b1bc7a8b9e985e6da3b29d7e7139823 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 14:47:40 +0300 Subject: [PATCH 21/25] FMWK-570-backup-restore-state - linters --- cmd/internal/app/asbackup.go | 1 + cmd/internal/app/readers_test.go | 22 +++++++++++++--------- handler_backup_records.go | 2 +- io/aws/s3/writer.go | 1 - io/sized/writer.go | 1 - state.go | 5 +++-- state_test.go | 2 -- tests/integration/integration_test.go | 6 ++---- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cmd/internal/app/asbackup.go b/cmd/internal/app/asbackup.go index 1df6707c..e3a6c89d 100644 --- a/cmd/internal/app/asbackup.go +++ b/cmd/internal/app/asbackup.go @@ -87,6 +87,7 @@ func NewASBackup( if backupParams.ShouldSaveState() { r := &models.Restore{InputFile: backupParams.OutputFile} + reader, err = getReader(ctx, r, commonParams, awsS3, gcpStorage, azureBlob, backupParams) if err != nil { return nil, fmt.Errorf("failed to create reader: %w", err) diff --git a/cmd/internal/app/readers_test.go b/cmd/internal/app/readers_test.go index 3435fe59..b03bcd37 100644 --- a/cmd/internal/app/readers_test.go +++ b/cmd/internal/app/readers_test.go @@ -28,8 +28,9 @@ func TestNewLocalReader(t *testing.T) { c := &models.Common{ Directory: t.TempDir(), } + b := &models.Backup{} - reader, err := newLocalReader(r, c) + reader, err := newLocalReader(r, c, b) assert.NoError(t, err) assert.NotNil(t, reader) assert.Equal(t, testLocalType, reader.GetType()) @@ -39,13 +40,13 @@ func TestNewLocalReader(t *testing.T) { } c = &models.Common{} - reader, err = newLocalReader(r, c) + reader, err = newLocalReader(r, c, b) assert.NoError(t, err) assert.NotNil(t, reader) assert.Equal(t, testLocalType, reader.GetType()) r = &models.Restore{} - reader, err = newLocalReader(r, c) + reader, err = newLocalReader(r, c, b) assert.Error(t, err) assert.Nil(t, reader) } @@ -59,6 +60,7 @@ func TestNewS3Reader(t *testing.T) { c := &models.Common{ Directory: "asbackup/" + t.TempDir(), } + b := &models.Backup{} s3cfg := &models.AwsS3{ Region: testS3Region, @@ -68,7 +70,7 @@ func TestNewS3Reader(t *testing.T) { ctx := context.Background() - writer, err := newS3Reader(ctx, s3cfg, r, c) + writer, err := newS3Reader(ctx, s3cfg, r, c, b) assert.NoError(t, err) assert.NotNil(t, writer) assert.Equal(t, testS3Type, writer.GetType()) @@ -78,7 +80,7 @@ func TestNewS3Reader(t *testing.T) { } c = &models.Common{} - writer, err = newS3Reader(ctx, s3cfg, r, c) + writer, err = newS3Reader(ctx, s3cfg, r, c, b) assert.NoError(t, err) assert.NotNil(t, writer) assert.Equal(t, testS3Type, writer.GetType()) @@ -93,6 +95,7 @@ func TestNewGcpReader(t *testing.T) { c := &models.Common{ Directory: t.TempDir(), } + b := &models.Backup{} cfg := &models.GcpStorage{ BucketName: testBucket, @@ -101,7 +104,7 @@ func TestNewGcpReader(t *testing.T) { ctx := context.Background() - writer, err := newGcpReader(ctx, cfg, r, c) + writer, err := newGcpReader(ctx, cfg, r, c, b) assert.NoError(t, err) assert.NotNil(t, writer) assert.Equal(t, testGcpType, writer.GetType()) @@ -111,7 +114,7 @@ func TestNewGcpReader(t *testing.T) { } c = &models.Common{} - writer, err = newGcpReader(ctx, cfg, r, c) + writer, err = newGcpReader(ctx, cfg, r, c, b) assert.NoError(t, err) assert.NotNil(t, writer) assert.Equal(t, testGcpType, writer.GetType()) @@ -126,6 +129,7 @@ func TestNewAzureReader(t *testing.T) { c := &models.Common{ Directory: t.TempDir(), } + b := &models.Backup{} cfg := &models.AzureBlob{ AccountName: testAzureAccountName, @@ -136,7 +140,7 @@ func TestNewAzureReader(t *testing.T) { ctx := context.Background() - writer, err := newAzureReader(ctx, cfg, r, c) + writer, err := newAzureReader(ctx, cfg, r, c, b) assert.NoError(t, err) assert.NotNil(t, writer) assert.Equal(t, testAzureType, writer.GetType()) @@ -146,7 +150,7 @@ func TestNewAzureReader(t *testing.T) { } c = &models.Common{} - writer, err = newAzureReader(ctx, cfg, r, c) + writer, err = newAzureReader(ctx, cfg, r, c, b) assert.NoError(t, err) assert.NotNil(t, writer) assert.Equal(t, testAzureType, writer.GetType()) diff --git a/handler_backup_records.go b/handler_backup_records.go index a876e391..f94a7600 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -219,7 +219,7 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( return nil, err } // Init state. - if err = bh.state.InitState(partitionGroups); err != nil { + if err := bh.state.InitState(partitionGroups); err != nil { return nil, err } } diff --git a/io/aws/s3/writer.go b/io/aws/s3/writer.go index 297b5564..0d4bc16b 100644 --- a/io/aws/s3/writer.go +++ b/io/aws/s3/writer.go @@ -169,7 +169,6 @@ type s3Writer struct { chunkSize int partNumber int32 closed bool - unbuffered bool } var _ io.WriteCloser = (*s3Writer)(nil) diff --git a/io/sized/writer.go b/io/sized/writer.go index e00c1d3e..0ec7e27c 100644 --- a/io/sized/writer.go +++ b/io/sized/writer.go @@ -53,7 +53,6 @@ func NewWriter(ctx context.Context, n int, saveCommandChan chan int, limit int64 func (f *Writer) Write(p []byte) (n int, err error) { if f.size >= f.limit { - err = f.writer.Close() if err != nil { return 0, fmt.Errorf("failed to close writer: %w", err) diff --git a/state.go b/state.go index dba63239..4ca51840 100644 --- a/state.go +++ b/state.go @@ -137,7 +137,7 @@ func newStateFromFile( s.SaveCommandChan = make(chan int) s.Counter++ - logger.Debug("loaded state file successfully, filters loaded:", len(s.RecordStatesSaved)) + logger.Debug("loaded state file successfully", slog.Int("filters loaded", len(s.RecordStatesSaved))) // Run watcher on initialization. go s.serve() @@ -190,10 +190,12 @@ func (s *State) InitState(pf []*a.PartitionFilter) error { if err != nil { return err } + s.RecordStates[i] = pfs s.RecordStatesSaved[i] = pfs } s.mu.Unlock() + return s.dump(-1) } @@ -231,7 +233,6 @@ func (s *State) serveRecords() { counter++ s.mu.Lock() - // key := fmt.Sprintf("%d%d%s", state.Begin, state.Count, state.Digest) s.RecordStates[state.N] = state s.mu.Unlock() } diff --git a/state_test.go b/state_test.go index 56b71899..a0e19a08 100644 --- a/state_test.go +++ b/state_test.go @@ -49,7 +49,6 @@ func TestState(t *testing.T) { cfg := NewDefaultBackupConfig() cfg.StateFile = testStateFile - cfg.StateFileDumpDuration = testDuration cfg.PageSize = 100000 cfg.SyncPipelines = true cfg.PartitionFilters = testFilters @@ -64,7 +63,6 @@ func TestState(t *testing.T) { local.WithValidator(asb.NewValidator()), local.WithSkipDirCheck(), local.WithDir(testDir), - local.WithUnbufferedWrite(), ) require.NoError(t, err) diff --git a/tests/integration/integration_test.go b/tests/integration/integration_test.go index 2ba258fd..c46ae953 100644 --- a/tests/integration/integration_test.go +++ b/tests/integration/integration_test.go @@ -1151,7 +1151,6 @@ func (suite *backupRestoreTestSuite) runFirstBackup(ctx context.Context, testFol local.WithValidator(asb.NewValidator()), local.WithSkipDirCheck(), local.WithDir(bFolder), - local.WithUnbufferedWrite(), ) if err != nil { panic(err) @@ -1170,7 +1169,7 @@ func (suite *backupRestoreTestSuite) runFirstBackup(ctx context.Context, testFol backupCfg.ParallelWrite = 10 backupCfg.StateFile = testStateFile - backupCfg.StateFileDumpDuration = 10 * time.Millisecond + backupCfg.FileLimit = 100000 backupCfg.Bandwidth = 1000000 backupCfg.PageSize = 100 backupCfg.SyncPipelines = true @@ -1198,7 +1197,6 @@ func (suite *backupRestoreTestSuite) runContinueBackup(ctx context.Context, test local.WithValidator(asb.NewValidator()), local.WithSkipDirCheck(), local.WithDir(bFolder), - local.WithUnbufferedWrite(), ) if err != nil { panic(err) @@ -1218,7 +1216,7 @@ func (suite *backupRestoreTestSuite) runContinueBackup(ctx context.Context, test backupCfg.StateFile = testStateFile backupCfg.Continue = true - backupCfg.StateFileDumpDuration = 10 * time.Millisecond + backupCfg.FileLimit = 100000 backupCfg.PageSize = 100 backupCfg.SyncPipelines = true From b825da97b173083d933e407c76c104c3fc086063 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 14:56:06 +0300 Subject: [PATCH 22/25] FMWK-570-backup-restore-state - fix first init after continuation --- handler_backup_records.go | 9 ++++++--- state.go | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/handler_backup_records.go b/handler_backup_records.go index f94a7600..e7d10fa9 100644 --- a/handler_backup_records.go +++ b/handler_backup_records.go @@ -218,9 +218,12 @@ func (bh *backupRecordsHandler) makeAerospikeReadWorkersForPartition( if err != nil { return nil, err } - // Init state. - if err := bh.state.InitState(partitionGroups); err != nil { - return nil, err + + if bh.config.isStateFirstRun() { + // Init state. + if err := bh.state.InitState(partitionGroups); err != nil { + return nil, err + } } } diff --git a/state.go b/state.go index 4ca51840..31f94b82 100644 --- a/state.go +++ b/state.go @@ -137,6 +137,11 @@ func newStateFromFile( s.SaveCommandChan = make(chan int) s.Counter++ + // Init current state. + for k, v := range s.RecordStatesSaved { + s.RecordStates[k] = v + } + logger.Debug("loaded state file successfully", slog.Int("filters loaded", len(s.RecordStatesSaved))) // Run watcher on initialization. From f43e4e4c535b4081f315b3c4b5d787e61f1720ca Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 15:11:52 +0300 Subject: [PATCH 23/25] FMWK-570-backup-restore-state - fix ordinary file limit backup --- cmd/internal/flags/backup.go | 7 ++++--- config_backup.go | 10 +++++----- handler_backup.go | 7 ++++++- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/cmd/internal/flags/backup.go b/cmd/internal/flags/backup.go index 74670c76..166fb8fc 100644 --- a/cmd/internal/flags/backup.go +++ b/cmd/internal/flags/backup.go @@ -132,9 +132,10 @@ func (f *Backup) NewFlagSet() *pflag.FlagSet { "that was generated from the interrupted/failed run.") flagSet.StringVar(&f.StateFileDst, "state-file-dst", "", - "Name or path relatively to --directory where state file will be saved.\n"+ - "Works only with --file-limit parameter. As we reach file-limit and save file to storage\n"+ - "current state will be saved.") + "Name of a state file that will be saved in backup --directory.\n"+ + "Works only with --file-limit parameter. As we reach --file-limit and close file,\n"+ + "current state will be saved. Works only for default and/or partition backup. \n"+ + "Not work with --parallel-nodes or --node--list.") flagSet.Int64Var(&f.ScanPageSize, "scan-page-size", 10000, "How many records will be read on one iteration for continuation backup.\n"+ diff --git a/config_backup.go b/config_backup.go index 24849732..ec2163a4 100644 --- a/config_backup.go +++ b/config_backup.go @@ -104,11 +104,11 @@ type BackupConfig struct { Compact bool // Only include records that have no ttl set (persistent records). NoTTLOnly bool - // Either a path with a file name or a directory in which the backup state file will be - // placed if the backup is interrupted/fails. If a path with a file name is used, that - // exact path is where the backup file will be placed. If a directory is given, the backup - // state will be placed in the directory with name `.asb.state` - // Works only for default and/or partition backup. Not work with ParallelNodes or NodeList. + // Name of a state file that will be saved in backup directory. + // Works only with FileLimit parameter. + // As we reach FileLimit and close file, the current state will be saved. + // Works only for default and/or partition backup. + // Not work with ParallelNodes or NodeList. StateFile string // Resumes an interrupted/failed backup from where it was left off, given the .state file // that was generated from the interrupted/failed run. diff --git a/handler_backup.go b/handler_backup.go index d0802ee7..1b586d53 100644 --- a/handler_backup.go +++ b/handler_backup.go @@ -310,7 +310,12 @@ func closeWriters(backupWriters []io.WriteCloser, logger *slog.Logger) { // configuration. func (bh *BackupHandler) newWriter(ctx context.Context, n int) (io.WriteCloser, error) { if bh.config.FileLimit > 0 { - return sized.NewWriter(ctx, n, bh.state.SaveCommandChan, bh.config.FileLimit, bh.newConfiguredWriter) + // For saving state operation, we init writer with a communication channel. + if bh.config.isStateFirstRun() || bh.config.isStateContinue() { + return sized.NewWriter(ctx, n, bh.state.SaveCommandChan, bh.config.FileLimit, bh.newConfiguredWriter) + } + + return sized.NewWriter(ctx, n, nil, bh.config.FileLimit, bh.newConfiguredWriter) } return bh.newConfiguredWriter(ctx) From c7b80f3f8bd7d7d549205c921c956f9f73867789 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 15:18:39 +0300 Subject: [PATCH 24/25] FMWK-570-backup-restore-state - state test --- state_test.go | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/state_test.go b/state_test.go index a0e19a08..39914e11 100644 --- a/state_test.go +++ b/state_test.go @@ -20,7 +20,6 @@ import ( "os" "path/filepath" "testing" - "time" a "github.com/aerospike/aerospike-client-go/v7" "github.com/aerospike/backup-go/io/encoding/asb" @@ -30,7 +29,6 @@ import ( ) const ( - testDuration = 1 * time.Second testStateFile = "test_state_file" ) @@ -45,7 +43,7 @@ func TestState(t *testing.T) { NewPartitionFilterByID(2), } - ctx, cancel := context.WithCancel(context.Background()) + ctx := context.Background() cfg := NewDefaultBackupConfig() cfg.StateFile = testStateFile @@ -73,20 +71,19 @@ func TestState(t *testing.T) { require.NotNil(t, state) require.NoError(t, err) + err = state.InitState(testFilters) + require.NoError(t, err) + for i := range testFilters { pfs, err := models.NewPartitionFilterSerialized(testFilters[i]) require.NoError(t, err) state.RecordsStateChan <- pfs } - time.Sleep(testDuration * 3) - cancel() - // Check that file exists. _, err = os.Stat(tempFile) require.NoError(t, err) - // Nullify the link. result := []*a.PartitionFilter{ NewPartitionFilterByID(1), NewPartitionFilterByID(2), From 5fe36eda95006d7a1c069e703dfa84cc8845a1c6 Mon Sep 17 00:00:00 2001 From: Dmitrii Neeman Date: Sun, 20 Oct 2024 15:25:28 +0300 Subject: [PATCH 25/25] FMWK-570-backup-restore-state - readme update --- cmd/asbackup/readme.md | 136 ++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 69 deletions(-) diff --git a/cmd/asbackup/readme.md b/cmd/asbackup/readme.md index 32824a71..db7a77fc 100644 --- a/cmd/asbackup/readme.md +++ b/cmd/asbackup/readme.md @@ -73,75 +73,73 @@ Backup Flags: --socket-timeout int Socket timeout in milliseconds. If this value is 0, its set to total-timeout. If both are 0, there is no socket idle time limit (default 10000) -N, --nice int The limits for read/write storage bandwidth in MiB/s - -o, --output-file string Backup to a single backup file. Use - for stdout. Required, unless -d or -e is used. - -q, --output-file-prefix string When using directory parameter, prepend a prefix to the names of the generated files. - -r, --remove-files Remove existing backup file (-o) or files (-d). - -F, --file-limit int Rotate backup files, when their size crosses the given - value (in bytes) Only used when backing up to a Directory. - -D, --after-digest string Backup records after record digest in record's partition plus all succeeding - partitions. Used to resume backup with last record received from previous - incomplete backup. - This argument is mutually exclusive to partition-list. - Format: base64 encoded string - Example: EjRWeJq83vEjRRI0VniavN7xI0U= - - -a, --modified-before string - Perform an incremental backup; only include records - that changed after the given date and time. The system's - local timezone applies. If only HH:MM:SS is specified, then - today's date is assumed as the date. If only YYYY-MM-DD is - specified, then 00:00:00 (midnight) is assumed as the time. - - -b, --modified-after string - Only include records that last changed before the given - date and time. May combined with --modified-after to specify a range. - -M, --max-records int The number of records approximately to back up. 0 - all records - -x, --no-bins Do not include bin data in the backup. - --sleep-between-retries int The amount of milliseconds to sleep between retries. (default 5) - -f, --filter-exp string Base64 encoded expression. Use the encoded filter expression in each scan call, - which can be used to do a partial backup. The expression to be used can be base64 - encoded through any client. This argument is mutually exclusive with multi-set backup. - - --parallel-nodes Specifies how to perform scan. If set to true, we launch parallel workers for nodes; - otherwise workers run in parallel for partitions. - --remove-artifacts Remove existing backup file (-o) or files (-d) without performing a backup. - -C, --compact Do not apply base-64 encoding to BLOBs; results in smaller backup files. - -l, --node-list string :[,:[,...]] - ::[,::[,...]] - Backup the given cluster nodes only. - The job is parallelized by number of nodes unless --parallel is set less than nodes number. - This argument is mutually exclusive to partition-list/after-digest arguments. - Default: backup all nodes in the cluster - --no-ttl-only Only include records that have no ttl set (persistent records). - --prefer-racks string [,[,...]] - A list of Aerospike Server rack IDs to prefer when reading records for a backup. - -X, --partition-list string List of partitions [...]]> to back up. Partition filters can be ranges, - individual partitions, or records after a specific digest within a single partition. - This argument is mutually exclusive to after-digest. - Filter: [-]| - begin partition: 0-4095 - partition count: 1-4096 Default: 1 - digest: base64 encoded string - Examples: 0-1000, 1000-1000, 2222, EjRWeJq83vEjRRI0VniavN7xI0U= - Default: 0-4096 (all partitions) - - -e, --estimate Estimate the backed-up record size from a random sample of - 10,000 (default) records at 99.9999%% confidence. - It ignores any filter: filter-exp, node-list, modified-after, modified-before, no-ttl-only, - after-digest, partition-list. - It calculates estimate size of full backup. - --estimate-samples int The number of samples to take when running a backup estimate. (default 10000) - -c, --continue string Resumes an interrupted/failed backup from where it was left off, given the .state file - that was generated from the interrupted/failed run. - --state-file-dst Either a path with a file name or a directory in which the backup state file will be - placed if the backup is interrupted/fails. If a path with a file name is used, that - exact path is where the backup file will be placed. If a directory is given, the backup - state will be placed in the directory with name .asb.state, or - `.asb.state` if `--output-file-prefix` is given. - --state-file-dump-duration int Intervals in milliseconds, how often dump state file to disk. (default 10000) - --scan-page-size int How many records will be read on one iteration for continuation backup. - Affects size if overlap on resuming backup after an error. - Is used only with --state-file-dst or --continue. (default 10000) + -o, --output-file string Backup to a single backup file. Use - for stdout. Required, unless -d or -e is used. + -q, --output-file-prefix string When using directory parameter, prepend a prefix to the names of the generated files. + -r, --remove-files Remove existing backup file (-o) or files (-d). + -F, --file-limit int Rotate backup files, when their size crosses the given + value (in bytes) Only used when backing up to a Directory. + -D, --after-digest string Backup records after record digest in record's partition plus all succeeding + partitions. Used to resume backup with last record received from previous + incomplete backup. + This argument is mutually exclusive to partition-list. + Format: base64 encoded string + Example: EjRWeJq83vEjRRI0VniavN7xI0U= + + -a, --modified-before string + Perform an incremental backup; only include records + that changed after the given date and time. The system's + local timezone applies. If only HH:MM:SS is specified, then + today's date is assumed as the date. If only YYYY-MM-DD is + specified, then 00:00:00 (midnight) is assumed as the time. + + -b, --modified-after string + Only include records that last changed before the given + date and time. May combined with --modified-after to specify a range. + -M, --max-records int The number of records approximately to back up. 0 - all records + -x, --no-bins Do not include bin data in the backup. + --sleep-between-retries int The amount of milliseconds to sleep between retries. (default 5) + -f, --filter-exp string Base64 encoded expression. Use the encoded filter expression in each scan call, + which can be used to do a partial backup. The expression to be used can be base64 + encoded through any client. This argument is mutually exclusive with multi-set backup. + + --parallel-nodes Specifies how to perform scan. If set to true, we launch parallel workers for nodes; + otherwise workers run in parallel for partitions. + --remove-artifacts Remove existing backup file (-o) or files (-d) without performing a backup. + -C, --compact Do not apply base-64 encoding to BLOBs; results in smaller backup files. + -l, --node-list string :[,:[,...]] + ::[,::[,...]] + Backup the given cluster nodes only. + The job is parallelized by number of nodes unless --parallel is set less than nodes number. + This argument is mutually exclusive to partition-list/after-digest arguments. + Default: backup all nodes in the cluster + --no-ttl-only Only include records that have no ttl set (persistent records). + --prefer-racks string [,[,...]] + A list of Aerospike Server rack IDs to prefer when reading records for a backup. + -X, --partition-list string List of partitions [...]]> to back up. Partition filters can be ranges, + individual partitions, or records after a specific digest within a single partition. + This argument is mutually exclusive to after-digest. + Filter: [-]| + begin partition: 0-4095 + partition count: 1-4096 Default: 1 + digest: base64 encoded string + Examples: 0-1000, 1000-1000, 2222, EjRWeJq83vEjRRI0VniavN7xI0U= + Default: 0-4096 (all partitions) + + -e, --estimate Estimate the backed-up record size from a random sample of + 10,000 (default) records at 99.9999%% confidence. + It ignores any filter: filter-exp, node-list, modified-after, modified-before, no-ttl-only, + after-digest, partition-list. + It calculates estimate size of full backup. + --estimate-samples int The number of samples to take when running a backup estimate. (default 10000) + -c, --continue string Resumes an interrupted/failed backup from where it was left off, given the .state file + that was generated from the interrupted/failed run. + --state-file-dst string Name of a state file that will be saved in backup --directory. + Works only with --file-limit parameter. As we reach --file-limit and close file, + current state will be saved. Works only for default and/or partition backup. + Not work with --parallel-nodes or --node--list. + --scan-page-size int How many records will be read on one iteration for continuation backup. + Affects size if overlap on resuming backup after an error. + Is used only with --state-file-dst or --continue. (default 10000) Compression Flags: -z, --compress string Enables compressing of backup files using the specified compression algorithm.