diff --git a/go.mod b/go.mod index 9162601ea..f10cd68fd 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/peak/s5cmd/v2 +module github.com/MqllR/s5cmd/v2 go 1.19 @@ -13,6 +13,7 @@ require ( github.com/karrick/godirwalk v1.15.3 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/lanrat/extsort v1.0.0 + github.com/peak/s5cmd/v2 v2.0.0-00010101000000-000000000000 github.com/termie/go-shutil v0.0.0-20140729215957-bcacb06fecae github.com/urfave/cli/v2 v2.11.2 gotest.tools/v3 v3.0.3 @@ -21,16 +22,16 @@ require ( require ( github.com/VividCortex/ewma v1.2.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect github.com/fatih/color v1.15.0 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect - github.com/kr/pretty v0.3.0 // indirect + github.com/kr/text v0.2.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/rivo/uniseg v0.2.0 // indirect + github.com/rogpeppe/go-internal v1.6.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 // indirect @@ -39,6 +40,7 @@ require ( golang.org/x/sync v0.1.0 // indirect golang.org/x/sys v0.7.0 // indirect golang.org/x/tools v0.8.0 // indirect - gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce // indirect ) + +replace github.com/peak/s5cmd/v2 v2.0.0 => github.com/MqllR/s5cmd/v2 v2.3.0 diff --git a/go.sum b/go.sum index 8569a392d..4b0a7a438 100644 --- a/go.sum +++ b/go.sum @@ -9,7 +9,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= @@ -35,7 +34,6 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNU github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= diff --git a/vendor/github.com/peak/s5cmd/v2/LICENSE b/vendor/github.com/peak/s5cmd/v2/LICENSE new file mode 100644 index 000000000..7d65deec3 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Peak + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/peak/s5cmd/v2/command/app.go b/vendor/github.com/peak/s5cmd/v2/command/app.go new file mode 100644 index 000000000..f10d890e5 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/app.go @@ -0,0 +1,231 @@ +package command + +import ( + "context" + "fmt" + "os" + "strings" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/parallel" + "github.com/peak/s5cmd/v2/storage" +) + +const ( + defaultWorkerCount = 256 + defaultRetryCount = 10 + + appName = "s5cmd" +) + +var app = &cli.App{ + Name: appName, + Usage: "Blazing fast S3 and local filesystem execution tool", + EnableBashCompletion: true, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "json", + Usage: "enable JSON formatted output", + }, + &cli.IntFlag{ + Name: "numworkers", + Value: defaultWorkerCount, + Usage: "number of workers execute operation on each object", + }, + &cli.IntFlag{ + Name: "retry-count", + Aliases: []string{"r"}, + Value: defaultRetryCount, + Usage: "number of times that a request will be retried for failures", + }, + &cli.StringFlag{ + Name: "endpoint-url", + Usage: "override default S3 host for custom services", + EnvVars: []string{"S3_ENDPOINT_URL"}, + }, + &cli.BoolFlag{ + Name: "no-verify-ssl", + Usage: "disable SSL certificate verification", + }, + &cli.GenericFlag{ + Name: "log", + Value: &EnumValue{ + Enum: []string{"trace", "debug", "info", "error"}, + Default: "info", + }, + Usage: "log level: (trace, debug, info, error)", + }, + &cli.BoolFlag{ + Name: "install-completion", + Usage: "get completion installation instructions for your shell (only available for bash, pwsh, and zsh)", + }, + &cli.BoolFlag{ + Name: "dry-run", + Usage: "fake run; show what commands will be executed without actually executing them", + }, + &cli.BoolFlag{ + Name: "stat", + Usage: "collect statistics of program execution and display it at the end", + }, + &cli.BoolFlag{ + Name: "no-sign-request", + Usage: "do not sign requests: credentials will not be loaded if --no-sign-request is provided", + }, + &cli.BoolFlag{ + Name: "use-list-objects-v1", + Usage: "use ListObjectsV1 API for services that don't support ListObjectsV2", + }, + &cli.StringFlag{ + Name: "request-payer", + Usage: "who pays for request (access requester pays buckets)", + }, + &cli.StringFlag{ + Name: "profile", + Usage: "use the specified profile from the credentials file", + }, + &cli.StringFlag{ + Name: "credentials-file", + Usage: "use the specified credentials file instead of the default credentials file", + }, + }, + Before: func(c *cli.Context) error { + retryCount := c.Int("retry-count") + workerCount := c.Int("numworkers") + printJSON := c.Bool("json") + logLevel := c.String("log") + isStat := c.Bool("stat") + endpointURL := c.String("endpoint-url") + + log.Init(logLevel, printJSON) + parallel.Init(workerCount) + + if retryCount < 0 { + err := fmt.Errorf("retry count cannot be a negative value") + printError(commandFromContext(c), c.Command.Name, err) + return err + } + if c.Bool("no-sign-request") && c.String("profile") != "" { + err := fmt.Errorf(`"no-sign-request" and "profile" flags cannot be used together`) + printError(commandFromContext(c), c.Command.Name, err) + return err + } + if c.Bool("no-sign-request") && c.String("credentials-file") != "" { + err := fmt.Errorf(`"no-sign-request" and "credentials-file" flags cannot be used together`) + printError(commandFromContext(c), c.Command.Name, err) + return err + } + + if isStat { + stat.InitStat() + } + + if endpointURL != "" { + if !strings.HasPrefix(endpointURL, "http") { + err := fmt.Errorf(`bad value for --endpoint-url %v: scheme is missing. Must be of the form http:/// or https:///`, endpointURL) + printError(commandFromContext(c), c.Command.Name, err) + return err + } + } + + return nil + }, + CommandNotFound: func(c *cli.Context, command string) { + msg := log.ErrorMessage{ + Command: command, + Err: "command not found", + } + log.Error(msg) + + // After callback is not called if app exists with cli.Exit. + parallel.Close() + log.Close() + }, + OnUsageError: func(c *cli.Context, err error, isSubcommand bool) error { + if err != nil { + _, _ = fmt.Fprintf(os.Stderr, "%s %s\n", "Incorrect Usage:", err.Error()) + _, _ = fmt.Fprintf(os.Stderr, "See 's5cmd --help' for usage\n") + return err + } + + return nil + }, + Action: func(c *cli.Context) error { + if c.Bool("install-completion") { + printAutocompletionInstructions(os.Getenv("SHELL")) + return nil + } + args := c.Args() + if args.Present() { + cli.ShowCommandHelp(c, args.First()) + return cli.Exit("", 1) + } + + return cli.ShowAppHelp(c) + }, + After: func(c *cli.Context) error { + if c.Bool("stat") && len(stat.Statistics()) > 0 { + log.Stat(stat.Statistics()) + } + + parallel.Close() + log.Close() + return nil + }, +} + +// NewStorageOpts creates storage.Options object from the given context. +func NewStorageOpts(c *cli.Context) storage.Options { + return storage.Options{ + DryRun: c.Bool("dry-run"), + Endpoint: c.String("endpoint-url"), + MaxRetries: c.Int("retry-count"), + NoSignRequest: c.Bool("no-sign-request"), + NoVerifySSL: c.Bool("no-verify-ssl"), + RequestPayer: c.String("request-payer"), + UseListObjectsV1: c.Bool("use-list-objects-v1"), + Profile: c.String("profile"), + CredentialFile: c.String("credentials-file"), + LogLevel: log.LevelFromString(c.String("log")), + NoSuchUploadRetryCount: c.Int("no-such-upload-retry-count"), + } +} + +func Commands() []*cli.Command { + return []*cli.Command{ + NewListCommand(), + NewCopyCommand(), + NewDeleteCommand(), + NewMoveCommand(), + NewMakeBucketCommand(), + NewRemoveBucketCommand(), + NewSelectCommand(), + NewSizeCommand(), + NewCatCommand(), + NewPipeCommand(), + NewRunCommand(), + NewSyncCommand(), + NewVersionCommand(), + NewBucketVersionCommand(), + NewPresignCommand(), + } +} + +func AppCommand(name string) *cli.Command { + for _, c := range Commands() { + if c.HasName(name) { + return c + } + } + + return nil +} + +// Main is the entrypoint function to run given commands. +func Main(ctx context.Context, args []string) error { + app.Commands = Commands() + + return app.RunContext(ctx, args) +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/auto_complete.go b/vendor/github.com/peak/s5cmd/v2/command/auto_complete.go new file mode 100644 index 000000000..fe01f9495 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/auto_complete.go @@ -0,0 +1,241 @@ +package command + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "github.com/urfave/cli/v2" +) + +const zsh = `autoload -Uz compinit +compinit + +_s5cmd_cli_zsh_autocomplete() { + local -a opts + local cur + cur=${words[-1]} + opts=("${(@f)$(${words[@]:0:#words[@]-1} "${cur}" --generate-bash-completion)}") + + if [[ "${opts[1]}" != "" ]]; then + _describe 'values' opts + else + _files + fi +} + +compdef _s5cmd_cli_zsh_autocomplete s5cmd +` + +const bash = `# prepare autocompletion suggestions for s5cmd and save them to COMPREPLY array +_s5cmd_cli_bash_autocomplete() { + if [[ "${COMP_WORDS[0]}" != "source" ]]; then + COMPREPLY=() + local opts cur cmd` + + // get current word (cur) and prepare command (cmd) + ` + cur="${COMP_WORDS[COMP_CWORD]}" + cmd="${COMP_LINE:0:$COMP_POINT}"` + + + // if we want to complete the second argument and we didn't start writing + // yet then we should pass an empty string as another argument. Otherwise + // the white spaces will be discarded and the program will make suggestions + // as if it is completing the first argument. + // Beware that we want to pass an empty string so we intentionally write + // as it is. Fixes of SC2089 and SC2090 are not what we want. + // see also https://www.shellcheck.net/wiki/SC2090 + ` + [ "${COMP_LINE:COMP_POINT-1:$COMP_POINT}" == " " ] \ + && cmd="${cmd} ''" ` + + + // execute the command with '--generate-bash-completion' flag to obtain + // possible completion values for current word. + // ps. SC2090 is not wanted. + ` + opts=$($cmd --generate-bash-completion)` + + + // prepare completion array with possible values and filter those do not start with cur. + // if no completion is found then fallback to default completion of shell. + ` + + while IFS='' read -r line; + do + COMPREPLY+=("$line"); + done \ + < <(compgen -o bashdefault -o default -o nospace -W "${opts}" -- "${cur}") + + return 0 + fi +} + +# call the _s5cmd_cli_bash_autocomplete to complete s5cmd command. +complete -o nospace -F _s5cmd_cli_bash_autocomplete s5cmd +` + +const pwsh = `$fn = $($MyInvocation.MyCommand.Name) +$name = $fn -replace "(.*)\.ps1$", '$1' +Register-ArgumentCompleter -Native -CommandName $name -ScriptBlock { + param($commandName, $wordToComplete, $cursorPosition) + $other = "$wordToComplete --generate-bash-completion" + Invoke-Expression $other | ForEach-Object { + [System.Management.Automation.CompletionResult]::new($_, $_, 'ParameterValue', $_) + } +} +` + +func getBashCompleteFn(cmd *cli.Command, isOnlyRemote, isOnlyBucket bool) func(ctx *cli.Context) { + isOnlyRemote = isOnlyRemote || isOnlyBucket + return func(ctx *cli.Context) { + arg := parseArgumentToComplete(ctx) + + if strings.HasPrefix(arg, "-") { + cli.DefaultCompleteWithFlags(cmd)(ctx) + return + } + + if isOnlyRemote || strings.HasPrefix(arg, "s3://") { + u, err := url.New(arg) + if err != nil { + u = &url.URL{Type: 0, Scheme: "s3"} + } + + c := ctx.Context + client, err := storage.NewRemoteClient(c, u, NewStorageOpts(ctx)) + if err != nil { + return + } + + shell := filepath.Base(os.Getenv("SHELL")) + printS3Suggestions(c, shell, client, u, arg, isOnlyBucket) + return + } + } +} + +// constantCompleteWithDefault returns a complete function which prints the argument, itself, which is to be completed. +// If the argument is empty string it uses the defaultCompletions to make suggestions. +func constantCompleteWithDefault(shell, arg string, defaultCompletions ...string) { + if arg == "" { + for _, str := range defaultCompletions { + fmt.Println(formatSuggestionForShell(shell, str, arg)) + } + } else { + fmt.Println(formatSuggestionForShell(shell, arg, arg)) + } +} + +func printS3Suggestions(c context.Context, shell string, client *storage.S3, u *url.URL, arg string, isOnlyBucket bool) { + if u.Bucket == "" || (u.IsBucket() && !strings.HasSuffix(arg, "/")) || isOnlyBucket { + printListBuckets(c, shell, client, u, arg) + } else { + printListNURLSuggestions(c, shell, client, u, 20, arg) + } +} + +func printListBuckets(ctx context.Context, shell string, client *storage.S3, u *url.URL, argToBeCompleted string) { + buckets, err := client.ListBuckets(ctx, u.Bucket) + if err != nil { + return + } + + for _, bucket := range buckets { + fmt.Println(formatSuggestionForShell(shell, "s3://"+bucket.Name+"/", argToBeCompleted)) + } +} + +func printListNURLSuggestions(ctx context.Context, shell string, client *storage.S3, u *url.URL, count int, argToBeCompleted string) { + if u.IsBucket() { + var err error + u, err = url.New(u.Absolute() + "/") + if err != nil { + return + } + } + + i := 0 + for obj := range (*client).List(ctx, u, false) { + if i > count { + break + } + if obj.Err != nil { + return + } + fmt.Println(formatSuggestionForShell(shell, obj.URL.Absolute(), argToBeCompleted)) + i++ + } +} + +func printAutocompletionInstructions(shell string) { + var script string + baseShell := filepath.Base(shell) + instructions := `# To enable autocompletion you should add the following script to startup scripts of your shell. +# It is probably located at ~/.` + baseShell + "rc" + + switch baseShell { + case "zsh": + script = zsh + case "bash": + script = bash + case "pwsh": + script = pwsh + instructions = `# To enable autocompletion you should save the following script to a file named "s5cmd.ps1" and execute it. +# To persist it you should add the path of "s5cmd.ps1" file to profile file (which you can locate with $profile) to automatically execute "s5cmd.ps1" on every shell start up.` + default: + instructions = `# We couldn't recognize your SHELL "` + baseShell + `". +# Shell completion is supported only for bash, pwsh and zsh. +# Make sure that your SHELL environment variable is set accurately.` + } + + fmt.Println(instructions) + fmt.Println(script) +} + +func formatSuggestionForShell(baseShell, suggestion, argToBeCompleted string) string { + switch baseShell { + case "bash": + var prefix string + suggestions := make([]string, 0, 2) + if i := strings.LastIndex(argToBeCompleted, ":"); i >= 0 && baseShell == "bash" { + // include the original suggestion in case that COMP_WORDBREAKS does not contain : + // or that the argToBeCompleted was quoted. + // Bash doesn't split on : when argument is quoted even if : is in COMP_WORDBREAKS + suggestions = append(suggestions, suggestion) + prefix = argToBeCompleted[0 : i+1] + } + suggestions = append(suggestions, strings.TrimPrefix(suggestion, prefix)) + return strings.Join(suggestions, "\n") + case "zsh": + // replace every colon : with \: if shell is zsh + // colons are used as a seperator for the autocompletion script + // so "literal colons in completion must be quoted with a backslash" + // see also https://zsh.sourceforge.io/Doc/Release/Completion-System.html#:~:text=This%20is%20followed,as%20name1%3B + return strings.ReplaceAll(suggestion, ":", `\:`) + default: + return suggestion + } +} + +func parseArgumentToComplete(ctx *cli.Context) string { + var arg string + args := ctx.Args() + l := args.Len() + + if l > 0 { + arg = args.Get(l - 1) + } + + // argument may start with a quotation mark, in this case we want to trim + // that before checking if it has prefix 's3://'. + // Beware that we only want to trim the first char, not all of the leading + // quotation marks, because those quotation marks may be actual characters. + if strings.HasPrefix(arg, "'") { + arg = strings.TrimPrefix(arg, "'") + } else { + arg = strings.TrimPrefix(arg, "\"") + } + return arg +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/bucket_version.go b/vendor/github.com/peak/s5cmd/v2/command/bucket_version.go new file mode 100644 index 000000000..5bd91271e --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/bucket_version.go @@ -0,0 +1,151 @@ +package command + +import ( + "context" + "fmt" + "strings" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "github.com/peak/s5cmd/v2/strutil" +) + +var bucketVersionHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] s3://bucketname + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Get bucket versioning status of a bucket + > s5cmd {{.HelpName}} s3://bucketname + + 2. Enable bucket versioning for the bucket + > s5cmd {{.HelpName}} --set Enabled s3://bucketname + + 3. Suspend bucket versioning for the bucket + > s5cmd {{.HelpName}} --set Suspended s3://bucketname +` + +func NewBucketVersionCommand() *cli.Command { + cmd := &cli.Command{ + Name: "bucket-version", + CustomHelpTemplate: bucketVersionHelpTemplate, + HelpName: "bucket-version", + Usage: "configure bucket versioning", + Flags: []cli.Flag{ + &cli.GenericFlag{ + Name: "set", + Value: &EnumValue{ + Enum: []string{"Suspended", "Enabled"}, + Default: "", + ConditionFunction: strings.EqualFold, + }, + Usage: "set versioning status of bucket: (Suspended, Enabled)", + }, + }, + Before: func(ctx *cli.Context) error { + if err := checkNumberOfArguments(ctx, 1, 1); err != nil { + printError(commandFromContext(ctx), ctx.Command.Name, err) + return err + } + return nil + }, + Action: func(c *cli.Context) error { + status := c.String("set") + + fullCommand := commandFromContext(c) + + bucket, err := url.New(c.Args().First()) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + return BucketVersion{ + src: bucket, + op: c.Command.Name, + fullCommand: fullCommand, + + status: status, + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + cmd.BashComplete = getBashCompleteFn(cmd, true, true) + return cmd +} + +type BucketVersion struct { + src *url.URL + op string + fullCommand string + + status string + storageOpts storage.Options +} + +func (v BucketVersion) Run(ctx context.Context) error { + client, err := storage.NewRemoteClient(ctx, &url.URL{}, v.storageOpts) + if err != nil { + printError(v.fullCommand, v.op, err) + return err + } + + if v.status != "" { + v.status = strutil.CapitalizeFirstRune(v.status) + + err := client.SetBucketVersioning(ctx, v.status, v.src.Bucket) + if err != nil { + printError(v.fullCommand, v.op, err) + return err + } + msg := BucketVersionMessage{ + Bucket: v.src.Bucket, + Status: v.status, + isSet: true, + } + log.Info(msg) + return nil + } + + status, err := client.GetBucketVersioning(ctx, v.src.Bucket) + if err != nil { + printError(v.fullCommand, v.op, err) + return err + } + + msg := BucketVersionMessage{ + Bucket: v.src.Bucket, + Status: status, + isSet: false, + } + log.Info(msg) + return nil +} + +type BucketVersionMessage struct { + Bucket string `json:"bucket"` + Status string `json:"status"` + isSet bool +} + +func (v BucketVersionMessage) String() string { + if v.isSet { + return fmt.Sprintf("Bucket versioning for %q is set to %q", v.Bucket, v.Status) + } + if v.Status != "" { + return fmt.Sprintf("Bucket versioning for %q is %q", v.Bucket, v.Status) + } + return fmt.Sprintf("%q is an unversioned bucket", v.Bucket) +} + +func (v BucketVersionMessage) JSON() string { + return strutil.JSON(v) +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/cat.go b/vendor/github.com/peak/s5cmd/v2/command/cat.go new file mode 100644 index 000000000..d16a11600 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/cat.go @@ -0,0 +1,156 @@ +package command + +import ( + "context" + "fmt" + "os" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/orderedwriter" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var catHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] source + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Print a remote object's content to stdout + > s5cmd {{.HelpName}} s3://bucket/prefix/object + + 2. Print specific version of a remote object's content to stdout + > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object +` + +func NewCatCommand() *cli.Command { + cmd := &cli.Command{ + Name: "cat", + HelpName: "cat", + Usage: "print remote object content", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "raw", + Usage: "disable the wildcard operations, useful with filenames that contains glob characters", + }, + &cli.StringFlag{ + Name: "version-id", + Usage: "use the specified version of an object", + }, + &cli.IntFlag{ + Name: "concurrency", + Aliases: []string{"c"}, + Value: defaultCopyConcurrency, + Usage: "number of concurrent parts transferred between host and remote server", + }, + &cli.IntFlag{ + Name: "part-size", + Aliases: []string{"p"}, + Value: defaultPartSize, + Usage: "size of each part transferred between host and remote server, in MiB", + }, + }, + CustomHelpTemplate: catHelpTemplate, + Before: func(c *cli.Context) error { + err := validateCatCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + op := c.Command.Name + fullCommand := commandFromContext(c) + + src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw"))) + if err != nil { + printError(fullCommand, op, err) + return err + } + + return Cat{ + src: src, + op: op, + fullCommand: fullCommand, + + storageOpts: NewStorageOpts(c), + concurrency: c.Int("concurrency"), + partSize: c.Int64("part-size") * megabytes, + }.Run(c.Context) + }, + } + cmd.BashComplete = getBashCompleteFn(cmd, true, false) + return cmd +} + +// Cat holds cat operation flags and states. +type Cat struct { + src *url.URL + op string + fullCommand string + + storageOpts storage.Options + concurrency int + partSize int64 +} + +// Run prints content of given source to standard output. +func (c Cat) Run(ctx context.Context) error { + client, err := storage.NewRemoteClient(ctx, c.src, c.storageOpts) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + _, err = client.Stat(ctx, c.src) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + buf := orderedwriter.New(os.Stdout) + _, err = client.Get(ctx, c.src, buf, c.concurrency, c.partSize) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + return nil +} + +func validateCatCommand(c *cli.Context) error { + if c.Args().Len() != 1 { + return fmt.Errorf("expected only one argument") + } + + src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw"))) + if err != nil { + return err + } + + if !src.IsRemote() { + return fmt.Errorf("source must be a remote object") + } + + if src.IsBucket() || src.IsPrefix() { + return fmt.Errorf("remote source must be an object") + } + + if src.IsWildcard() { + return fmt.Errorf("remote source %q can not contain glob characters", src) + } + + if err := checkVersioningWithGoogleEndpoint(c); err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/context.go b/vendor/github.com/peak/s5cmd/v2/command/context.go new file mode 100644 index 000000000..1eb78fb3d --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/context.go @@ -0,0 +1,106 @@ +package command + +import ( + "flag" + "fmt" + "sort" + "strconv" + "strings" + + "github.com/peak/s5cmd/v2/storage/url" + "github.com/urfave/cli/v2" +) + +func commandFromContext(c *cli.Context) string { + cmd := c.Command.FullName() + + for _, f := range c.Command.Flags { + flagname := f.Names()[0] + for _, flagvalue := range contextValue(c, flagname) { + cmd = fmt.Sprintf("%s --%s=%v", cmd, flagname, flagvalue) + } + } + + if c.Args().Len() > 0 { + cmd = fmt.Sprintf("%v %v", cmd, strings.Join(c.Args().Slice(), " ")) + } + + return cmd +} + +// contextValue traverses context and its ancestor contexts to find +// the flag value and returns string slice. +func contextValue(c *cli.Context, flagname string) []string { + for _, c := range c.Lineage() { + if !c.IsSet(flagname) { + continue + } + + val := c.Value(flagname) + switch val.(type) { + case cli.StringSlice: + return c.StringSlice(flagname) + case cli.Int64Slice, cli.IntSlice: + values := c.Int64Slice(flagname) + var result []string + for _, v := range values { + result = append(result, strconv.FormatInt(v, 10)) + } + return result + case string: + return []string{c.String(flagname)} + case bool: + return []string{strconv.FormatBool(c.Bool(flagname))} + case int, int64: + return []string{strconv.FormatInt(c.Int64(flagname), 10)} + default: + return []string{fmt.Sprintf("%v", val)} + } + } + + return nil +} + +// generateCommand generates command string from given context, app command, default flags and urls. +func generateCommand(c *cli.Context, cmd string, defaultFlags map[string]interface{}, urls ...*url.URL) (string, error) { + command := AppCommand(cmd) + flagset := flag.NewFlagSet(command.Name, flag.ContinueOnError) + + var args []string + for _, url := range urls { + args = append(args, fmt.Sprintf("%q", url.String())) + } + + flags := []string{} + for flagname, flagvalue := range defaultFlags { + flags = append(flags, fmt.Sprintf("--%s='%v'", flagname, flagvalue)) + } + + isDefaultFlag := func(flagname string) bool { + _, ok := defaultFlags[flagname] + return ok + } + + for _, f := range command.Flags { + flagname := f.Names()[0] + if isDefaultFlag(flagname) || !c.IsSet(flagname) { + continue + } + + for _, flagvalue := range contextValue(c, flagname) { + flags = append(flags, fmt.Sprintf("--%s='%s'", flagname, flagvalue)) + } + } + + sort.Strings(flags) + flags = append(flags, args...) + flags = append([]string{command.Name}, flags...) + + err := flagset.Parse(flags) + if err != nil { + return "", err + } + + cmdCtx := cli.NewContext(c.App, flagset, c) + return strings.TrimSpace(commandFromContext(cmdCtx)), nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/cp.go b/vendor/github.com/peak/s5cmd/v2/command/cp.go new file mode 100644 index 000000000..775e31100 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/cp.go @@ -0,0 +1,1102 @@ +package command + +import ( + "context" + "errors" + "fmt" + "io" + "mime" + "net/http" + "os" + "path/filepath" + "regexp" + "strings" + "sync" + + "github.com/hashicorp/go-multierror" + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/parallel" + "github.com/peak/s5cmd/v2/progressbar" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +const ( + defaultCopyConcurrency = 5 + defaultPartSize = 50 // MiB + megabytes = 1024 * 1024 + kilobytes = 1024 +) + +var copyHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] source destination + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 01. Download an S3 object to working directory + > s5cmd {{.HelpName}} s3://bucket/prefix/object.gz . + + 02. Download an S3 object and rename + > s5cmd {{.HelpName}} s3://bucket/prefix/object.gz myobject.gz + + 03. Download all S3 objects to a directory + > s5cmd {{.HelpName}} "s3://bucket/*" target-directory/ + + 04. Download an S3 object from a public bucket + > s5cmd --no-sign-request {{.HelpName}} s3://bucket/prefix/object.gz . + + 05. Upload a file to S3 bucket + > s5cmd {{.HelpName}} myfile.gz s3://bucket/ + + 06. Upload matching files to S3 bucket + > s5cmd {{.HelpName}} "dir/*.gz" s3://bucket/ + + 07. Upload all files in a directory to S3 bucket recursively + > s5cmd {{.HelpName}} dir/ s3://bucket/ + + 08. Copy S3 object to another bucket + > s5cmd {{.HelpName}} s3://bucket/object s3://target-bucket/prefix/object + + 09. Copy matching S3 objects to another bucket + > s5cmd {{.HelpName}} "s3://bucket/*.gz" s3://target-bucket/prefix/ + + 10. Copy files in a directory to S3 prefix if not found on target + > s5cmd {{.HelpName}} -n -s -u dir/ s3://bucket/target-prefix/ + + 11. Copy files in an S3 prefix to another S3 prefix if not found on target + > s5cmd {{.HelpName}} -n -s -u "s3://bucket/source-prefix/*" s3://bucket/target-prefix/ + + 12. Perform KMS Server Side Encryption of the object(s) at the destination + > s5cmd {{.HelpName}} --sse aws:kms s3://bucket/object s3://target-bucket/prefix/object + + 13. Perform KMS-SSE of the object(s) at the destination using customer managed Customer Master Key (CMK) key id + > s5cmd {{.HelpName}} --sse aws:kms --sse-kms-key-id s3://bucket/object s3://target-bucket/prefix/object + + 14. Force transfer of GLACIER objects with a prefix whether they are restored or not + > s5cmd {{.HelpName}} --force-glacier-transfer "s3://bucket/prefix/*" target-directory/ + + 15. Upload a file to S3 bucket with public read s3 acl + > s5cmd {{.HelpName}} --acl "public-read" myfile.gz s3://bucket/ + + 16. Upload a file to S3 bucket with expires header + > s5cmd {{.HelpName}} --expires "2024-10-01T20:30:00Z" myfile.gz s3://bucket/ + + 17. Upload a file to S3 bucket with cache-control header + > s5cmd {{.HelpName}} --cache-control "public, max-age=345600" myfile.gz s3://bucket/ + + 18. Copy all files to S3 bucket but exclude the ones with txt and gz extension + > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket + + 19. Copy all files from S3 bucket to another S3 bucket but exclude the ones starts with log + > s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket + + 20. Copy all files from S3 bucket to another S3 bucket but only the ones starts with log + > s5cmd {{.HelpName}} --include "log*" "s3://bucket/*" s3://destbucket + + 21. Download an S3 object from a requester pays bucket + > s5cmd --request-payer=requester {{.HelpName}} s3://bucket/prefix/object.gz . + + 22. Upload a file to S3 with a content-type and content-encoding header + > s5cmd --content-type "text/css" --content-encoding "br" myfile.css.br s3://bucket/ + + 23. Download the specific version of a remote object to working directory + > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object . + + 24. Pass arbitrary metadata to the object during upload or copy + > s5cmd {{.HelpName}} --metadata "camera=Nixon D750" --metadata "imageSize=6032x4032" flowers.png s3://bucket/prefix/flowers.png +` + +func NewSharedFlags() []cli.Flag { + return []cli.Flag{ + &cli.BoolFlag{ + Name: "no-follow-symlinks", + Usage: "do not follow symbolic links", + }, + &cli.StringFlag{ + Name: "storage-class", + Usage: "set storage class for target ('STANDARD','REDUCED_REDUNDANCY','GLACIER','STANDARD_IA','ONEZONE_IA','INTELLIGENT_TIERING','DEEP_ARCHIVE')", + }, + &cli.IntFlag{ + Name: "concurrency", + Aliases: []string{"c"}, + Value: defaultCopyConcurrency, + Usage: "number of concurrent parts transferred between host and remote server", + }, + &cli.IntFlag{ + Name: "part-size", + Aliases: []string{"p"}, + Value: defaultPartSize, + Usage: "size of each part transferred between host and remote server, in MiB", + }, + &MapFlag{ + Name: "metadata", + Usage: "set arbitrary metadata for the object, e.g. --metadata 'foo=bar' --metadata 'fizz=buzz'", + }, + &cli.StringFlag{ + Name: "sse", + Usage: "perform server side encryption of the data at its destination, e.g. aws:kms", + }, + &cli.StringFlag{ + Name: "sse-kms-key-id", + Usage: "customer master key (CMK) id for SSE-KMS encryption; leave it out if server-side generated key is desired", + }, + &cli.StringFlag{ + Name: "acl", + Usage: "set acl for target: defines granted accesses and their types on different accounts/groups, e.g. cp --acl 'public-read'", + }, + &cli.StringFlag{ + Name: "cache-control", + Usage: "set cache control for target: defines cache control header for object, e.g. cp --cache-control 'public, max-age=345600'", + }, + &cli.StringFlag{ + Name: "expires", + Usage: "set expires for target (uses RFC3339 format): defines expires header for object, e.g. cp --expires '2024-10-01T20:30:00Z'", + }, + &cli.BoolFlag{ + Name: "force-glacier-transfer", + Usage: "force transfer of glacier objects whether they are restored or not", + }, + &cli.BoolFlag{ + Name: "ignore-glacier-warnings", + Usage: "turns off glacier warnings: ignore errors encountered during copying, downloading and moving glacier objects", + }, + &cli.StringFlag{ + Name: "source-region", + Usage: "set the region of source bucket; the region of the source bucket will be automatically discovered if --source-region is not specified", + }, + &cli.StringFlag{ + Name: "destination-region", + Usage: "set the region of destination bucket: the region of the destination bucket will be automatically discovered if --destination-region is not specified", + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "exclude objects with given pattern", + }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, + &cli.BoolFlag{ + Name: "raw", + Usage: "disable the wildcard operations, useful with filenames that contains glob characters", + }, + &cli.StringFlag{ + Name: "content-type", + Usage: "set content type for target: defines content type header for object, e.g. --content-type text/plain", + }, + &cli.StringFlag{ + Name: "content-encoding", + Usage: "set content encoding for target: defines content encoding header for object, e.g. --content-encoding gzip", + }, + &cli.StringFlag{ + Name: "content-disposition", + Usage: "set content disposition for target: defines content disposition header for object, e.g. --content-disposition 'attachment; filename=\"filename.jpg\"'", + }, + &cli.IntFlag{ + Name: "no-such-upload-retry-count", + Usage: "number of times that a request will be retried on NoSuchUpload error; you should not use this unless you really know what you're doing", + DefaultText: "0", + Hidden: true, + }, + } +} + +func NewCopyCommandFlags() []cli.Flag { + copyFlags := []cli.Flag{ + &cli.BoolFlag{ + Name: "flatten", + Aliases: []string{"f"}, + Usage: "flatten directory structure of source, starting from the first wildcard", + }, + &cli.BoolFlag{ + Name: "no-clobber", + Aliases: []string{"n"}, + Usage: "do not overwrite destination if already exists", + }, + &cli.BoolFlag{ + Name: "if-size-differ", + Aliases: []string{"s"}, + Usage: "only overwrite destination if size differs", + }, + &cli.BoolFlag{ + Name: "if-source-newer", + Aliases: []string{"u"}, + Usage: "only overwrite destination if source modtime is newer", + }, + &cli.StringFlag{ + Name: "version-id", + Usage: "use the specified version of an object", + }, + &cli.BoolFlag{ + Name: "show-progress", + Aliases: []string{"sp"}, + Usage: "show a progress bar", + }, + } + sharedFlags := NewSharedFlags() + return append(copyFlags, sharedFlags...) +} + +func NewCopyCommand() *cli.Command { + cmd := &cli.Command{ + Name: "cp", + HelpName: "cp", + Usage: "copy objects", + Flags: NewCopyCommandFlags(), + CustomHelpTemplate: copyHelpTemplate, + Before: func(c *cli.Context) error { + err := validateCopyCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + // don't delete source + copy, err := NewCopy(c, false) + if err != nil { + return err + } + return copy.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} + +// Copy holds copy operation flags and states. +type Copy struct { + src *url.URL + dst *url.URL + op string + fullCommand string + + deleteSource bool + + // flags + noClobber bool + ifSizeDiffer bool + ifSourceNewer bool + flatten bool + followSymlinks bool + storageClass storage.StorageClass + encryptionMethod string + encryptionKeyID string + acl string + forceGlacierTransfer bool + ignoreGlacierWarnings bool + exclude []string + include []string + cacheControl string + expires string + contentType string + contentEncoding string + contentDisposition string + metadata map[string]string + showProgress bool + progressbar progressbar.ProgressBar + + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp + + // region settings + srcRegion string + dstRegion string + + // s3 options + concurrency int + partSize int64 + storageOpts storage.Options +} + +// NewCopy creates Copy from cli.Context. +func NewCopy(c *cli.Context, deleteSource bool) (*Copy, error) { + fullCommand := commandFromContext(c) + + src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + dst, err := url.New(c.Args().Get(1), url.WithRaw(c.Bool("raw"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + var commandProgressBar progressbar.ProgressBar + + if c.Bool("show-progress") && !(src.Type == dst.Type) { + commandProgressBar = progressbar.New() + } else { + commandProgressBar = &progressbar.NoOp{} + } + + metadata, ok := c.Value("metadata").(MapValue) + if !ok { + err := errors.New("metadata flag is not a map") + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + return &Copy{ + src: src, + dst: dst, + op: c.Command.Name, + fullCommand: fullCommand, + deleteSource: deleteSource, + // flags + noClobber: c.Bool("no-clobber"), + ifSizeDiffer: c.Bool("if-size-differ"), + ifSourceNewer: c.Bool("if-source-newer"), + flatten: c.Bool("flatten"), + followSymlinks: !c.Bool("no-follow-symlinks"), + storageClass: storage.StorageClass(c.String("storage-class")), + concurrency: c.Int("concurrency"), + partSize: c.Int64("part-size") * megabytes, + encryptionMethod: c.String("sse"), + encryptionKeyID: c.String("sse-kms-key-id"), + acl: c.String("acl"), + forceGlacierTransfer: c.Bool("force-glacier-transfer"), + ignoreGlacierWarnings: c.Bool("ignore-glacier-warnings"), + exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), + cacheControl: c.String("cache-control"), + expires: c.String("expires"), + contentType: c.String("content-type"), + contentEncoding: c.String("content-encoding"), + contentDisposition: c.String("content-disposition"), + metadata: metadata, + showProgress: c.Bool("show-progress"), + progressbar: commandProgressBar, + + // region settings + srcRegion: c.String("source-region"), + dstRegion: c.String("destination-region"), + + storageOpts: NewStorageOpts(c), + }, nil +} + +const fdlimitWarning = ` +WARNING: s5cmd is hitting the max open file limit allowed by your OS. Either +increase the open file limit or try to decrease the number of workers with +'-numworkers' parameter. +` + +// Run starts copying given source objects to destination. +func (c Copy) Run(ctx context.Context) error { + // override source region if set + if c.srcRegion != "" { + c.storageOpts.SetRegion(c.srcRegion) + } + + client, err := storage.NewClient(ctx, c.src, c.storageOpts) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + objch, err := expandSource(ctx, client, c.followSymlinks, c.src) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + c.progressbar.Start() + defer c.progressbar.Finish() + waiter := parallel.NewWaiter() + + var ( + merrorWaiter error + merrorObjects error + errDoneCh = make(chan bool) + ) + + go func() { + defer close(errDoneCh) + for err := range waiter.Err() { + if strings.Contains(err.Error(), "too many open files") { + fmt.Println(strings.TrimSpace(fdlimitWarning)) + fmt.Printf("ERROR %v\n", err) + + os.Exit(1) + } + printError(c.fullCommand, c.op, err) + merrorWaiter = multierror.Append(merrorWaiter, err) + } + }() + + isBatch := c.src.IsWildcard() + if !isBatch && !c.src.IsRemote() { + obj, err := client.Stat(ctx, c.src) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + isBatch = obj != nil && obj.Type.IsDir() + } + + c.excludePatterns, err = createRegexFromWildcard(c.exclude) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + c.includePatterns, err = createRegexFromWildcard(c.include) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + for object := range objch { + if errorpkg.IsCancelation(object.Err) || object.Type.IsDir() { + continue + } + + if !object.Type.IsRegular() { + err := fmt.Errorf("object '%v' is not a regular file", object) + merrorObjects = multierror.Append(merrorObjects, err) + printError(c.fullCommand, c.op, err) + continue + } + + if err := object.Err; err != nil { + merrorObjects = multierror.Append(merrorObjects, err) + printError(c.fullCommand, c.op, err) + continue + } + + if object.StorageClass.IsGlacier() && !c.forceGlacierTransfer { + if !c.ignoreGlacierWarnings { + err := fmt.Errorf("object '%v' is on Glacier storage", object) + merrorObjects = multierror.Append(merrorObjects, err) + printError(c.fullCommand, c.op, err) + } + continue + } + + isExcluded, err := isObjectExcluded(object, c.excludePatterns, c.includePatterns, c.src.Prefix) + if err != nil { + printError(c.fullCommand, c.op, err) + } + if isExcluded { + continue + } + + srcurl := object.URL + var task parallel.Task + + if object.Size == 0 && !(srcurl.Type == c.dst.Type) { + obj, err := client.Stat(ctx, srcurl) + if err == nil { + object.Size = obj.Size + } + } + c.progressbar.AddTotalBytes(object.Size) + c.progressbar.IncrementTotalObjects() + + switch { + case srcurl.Type == c.dst.Type: // local->local or remote->remote + task = c.prepareCopyTask(ctx, srcurl, c.dst, isBatch, c.metadata) + case srcurl.IsRemote(): // remote->local + task = c.prepareDownloadTask(ctx, srcurl, c.dst, isBatch) + case c.dst.IsRemote(): // local->remote + task = c.prepareUploadTask(ctx, srcurl, c.dst, isBatch, c.metadata) + default: + panic("unexpected src-dst pair") + } + parallel.Run(task, waiter) + } + waiter.Wait() + <-errDoneCh + + return multierror.Append(merrorWaiter, merrorObjects).ErrorOrNil() +} + +func (c Copy) prepareCopyTask( + ctx context.Context, + srcurl *url.URL, + dsturl *url.URL, + isBatch bool, + metadata map[string]string, +) func() error { + return func() error { + dsturl = prepareRemoteDestination(srcurl, dsturl, c.flatten, isBatch) + err := c.doCopy(ctx, srcurl, dsturl, metadata) + if err != nil { + return &errorpkg.Error{ + Op: c.op, + Src: srcurl, + Dst: dsturl, + Err: err, + } + } + c.progressbar.IncrementCompletedObjects() + return nil + } +} + +func (c Copy) prepareDownloadTask( + ctx context.Context, + srcurl *url.URL, + dsturl *url.URL, + isBatch bool, +) func() error { + return func() error { + dsturl, err := prepareLocalDestination(ctx, srcurl, dsturl, c.flatten, isBatch, c.storageOpts) + if err != nil { + return err + } + err = c.doDownload(ctx, srcurl, dsturl) + if err != nil { + return &errorpkg.Error{ + Op: c.op, + Src: srcurl, + Dst: dsturl, + Err: err, + } + } + c.progressbar.IncrementCompletedObjects() + return nil + } +} + +func (c Copy) prepareUploadTask( + ctx context.Context, + srcurl *url.URL, + dsturl *url.URL, + isBatch bool, + metadata map[string]string, +) func() error { + return func() error { + dsturl = prepareRemoteDestination(srcurl, dsturl, c.flatten, isBatch) + err := c.doUpload(ctx, srcurl, dsturl, metadata) + if err != nil { + return &errorpkg.Error{ + Op: c.op, + Src: srcurl, + Dst: dsturl, + Err: err, + } + } + c.progressbar.IncrementCompletedObjects() + return nil + } +} + +// doDownload is used to fetch a remote object and save as a local object. +func (c Copy) doDownload(ctx context.Context, srcurl *url.URL, dsturl *url.URL) error { + srcClient, err := storage.NewRemoteClient(ctx, srcurl, c.storageOpts) + if err != nil { + return err + } + + dstClient := storage.NewLocalClient(c.storageOpts) + + err = c.shouldOverride(ctx, srcurl, dsturl) + if err != nil { + // FIXME(ig): rename + if errorpkg.IsWarning(err) { + printDebug(c.op, err, srcurl, dsturl) + return nil + } + return err + } + + dstPath := filepath.Dir(dsturl.Absolute()) + dstFile := filepath.Base(dsturl.Absolute()) + file, err := dstClient.CreateTemp(dstPath, dstFile) + if err != nil { + return err + } + + writer := newCountingReaderWriter(file, c.progressbar) + size, err := srcClient.Get(ctx, srcurl, writer, c.concurrency, c.partSize) + file.Close() + + if err != nil { + dErr := dstClient.Delete(ctx, &url.URL{Path: file.Name(), Type: dsturl.Type}) + if dErr != nil { + printDebug(c.op, dErr, srcurl, dsturl) + } + return err + } + + if c.deleteSource { + _ = srcClient.Delete(ctx, srcurl) + } + + err = dstClient.Rename(file, dsturl.Absolute()) + if err != nil { + return err + } + + if !c.showProgress { + msg := log.InfoMessage{ + Operation: c.op, + Source: srcurl, + Destination: dsturl, + Object: &storage.Object{ + Size: size, + }, + } + log.Info(msg) + } + + return nil +} + +func (c Copy) doUpload(ctx context.Context, srcurl *url.URL, dsturl *url.URL, extradata map[string]string) error { + srcClient := storage.NewLocalClient(c.storageOpts) + + file, err := srcClient.Open(srcurl.Absolute()) + if err != nil { + return err + } + defer file.Close() + + err = c.shouldOverride(ctx, srcurl, dsturl) + if err != nil { + if errorpkg.IsWarning(err) { + printDebug(c.op, err, srcurl, dsturl) + return nil + } + return err + } + + // override destination region if set + if c.dstRegion != "" { + c.storageOpts.SetRegion(c.dstRegion) + } + dstClient, err := storage.NewRemoteClient(ctx, dsturl, c.storageOpts) + if err != nil { + return err + } + + metadata := storage.Metadata{ + UserDefined: extradata, + ACL: c.acl, + CacheControl: c.cacheControl, + Expires: c.expires, + StorageClass: string(c.storageClass), + ContentEncoding: c.contentEncoding, + ContentDisposition: c.contentDisposition, + EncryptionMethod: c.encryptionMethod, + EncryptionKeyID: c.encryptionKeyID, + } + + if c.contentType != "" { + metadata.ContentType = c.contentType + } else { + metadata.ContentType = guessContentType(file) + } + + reader := newCountingReaderWriter(file, c.progressbar) + err = dstClient.Put(ctx, reader, dsturl, metadata, c.concurrency, c.partSize) + + if err != nil { + return err + } + + obj, err := srcClient.Stat(ctx, srcurl) + if err != nil { + return err + } + + if c.deleteSource { + // close the file before deleting + file.Close() + if err := srcClient.Delete(ctx, srcurl); err != nil { + return err + } + } + + if !c.showProgress { + msg := log.InfoMessage{ + Operation: c.op, + Source: srcurl, + Destination: dsturl, + Object: &storage.Object{ + Size: obj.Size, + StorageClass: c.storageClass, + }, + } + log.Info(msg) + } + + return nil +} + +func (c Copy) doCopy(ctx context.Context, srcurl, dsturl *url.URL, extradata map[string]string) error { + // override destination region if set + if c.dstRegion != "" { + c.storageOpts.SetRegion(c.dstRegion) + } + dstClient, err := storage.NewClient(ctx, dsturl, c.storageOpts) + if err != nil { + return err + } + + metadata := storage.Metadata{ + UserDefined: extradata, + ACL: c.acl, + CacheControl: c.cacheControl, + Expires: c.expires, + StorageClass: string(c.storageClass), + ContentType: c.contentType, + ContentEncoding: c.contentEncoding, + ContentDisposition: c.contentDisposition, + EncryptionMethod: c.encryptionMethod, + EncryptionKeyID: c.encryptionKeyID, + } + + err = c.shouldOverride(ctx, srcurl, dsturl) + if err != nil { + if errorpkg.IsWarning(err) { + printDebug(c.op, err, srcurl, dsturl) + return nil + } + return err + } + + err = dstClient.Copy(ctx, srcurl, dsturl, metadata) + if err != nil { + return err + } + + if c.deleteSource { + srcClient, err := storage.NewClient(ctx, srcurl, c.storageOpts) + if err != nil { + return err + } + if err := srcClient.Delete(ctx, srcurl); err != nil { + return err + } + } + + msg := log.InfoMessage{ + Operation: c.op, + Source: srcurl, + Destination: dsturl, + Object: &storage.Object{ + URL: dsturl, + StorageClass: c.storageClass, + }, + } + log.Info(msg) + + return nil +} + +// shouldOverride function checks if the destination should be overridden if +// the source-destination pair and given copy flags conform to the +// override criteria. For example; "cp -n -s " should not override +// the if and filenames are the same, except if the size +// differs. +func (c Copy) shouldOverride(ctx context.Context, srcurl *url.URL, dsturl *url.URL) error { + // if not asked to override, ignore. + if !c.noClobber && !c.ifSizeDiffer && !c.ifSourceNewer { + return nil + } + + srcClient, err := storage.NewClient(ctx, srcurl, c.storageOpts) + if err != nil { + return err + } + + srcObj, err := statObject(ctx, srcurl, srcClient) + if err != nil { + return err + } + + dstClient, err := storage.NewClient(ctx, dsturl, c.storageOpts) + if err != nil { + return err + } + + dstObj, err := statObject(ctx, dsturl, dstClient) + if err != nil { + return err + } + + // if destination not exists, no conditions apply. + if dstObj == nil { + return nil + } + + var stickyErr error + if c.noClobber { + stickyErr = errorpkg.ErrObjectExists + } + + if c.ifSizeDiffer { + if srcObj.Size == dstObj.Size { + stickyErr = errorpkg.ErrObjectSizesMatch + } else { + stickyErr = nil + } + } + + if c.ifSourceNewer { + srcMod, dstMod := srcObj.ModTime, dstObj.ModTime + + if !srcMod.After(*dstMod) { + stickyErr = errorpkg.ErrObjectIsNewer + } else { + stickyErr = nil + } + } + + return stickyErr +} + +// prepareRemoteDestination will return a new destination URL for +// remote->remote and local->remote copy operations. +func prepareRemoteDestination( + srcurl *url.URL, + dsturl *url.URL, + flatten bool, + isBatch bool, +) *url.URL { + objname := srcurl.Base() + if isBatch && !flatten { + objname = srcurl.Relative() + } + + if dsturl.IsPrefix() || dsturl.IsBucket() { + dsturl = dsturl.Join(objname) + } + return dsturl +} + +// prepareDownloadDestination will return a new destination URL for +// remote->local copy operations. +func prepareLocalDestination( + ctx context.Context, + srcurl *url.URL, + dsturl *url.URL, + flatten bool, + isBatch bool, + storageOpts storage.Options, +) (*url.URL, error) { + objname := srcurl.Base() + if isBatch && !flatten { + objname = srcurl.Relative() + } + + client := storage.NewLocalClient(storageOpts) + + if isBatch { + err := client.MkdirAll(dsturl.Absolute()) + if err != nil { + return nil, err + } + } + + obj, err := client.Stat(ctx, dsturl) + if err != nil { + var objNotFound *storage.ErrGivenObjectNotFound + if !errors.As(err, &objNotFound) { + return nil, err + } + } + + if isBatch && !flatten { + dsturl = dsturl.Join(objname) + err := client.MkdirAll(dsturl.Dir()) + if err != nil { + return nil, err + } + } + var objNotFound *storage.ErrGivenObjectNotFound + if errors.As(err, &objNotFound) { + err := client.MkdirAll(dsturl.Dir()) + if err != nil { + return nil, err + } + if strings.HasSuffix(dsturl.Absolute(), "/") { + dsturl = dsturl.Join(objname) + } + } else { + if obj.Type.IsDir() { + dsturl = obj.URL.Join(objname) + } + } + + return dsturl, nil +} + +// statObject checks if the object from given url exists. If no object is +// found, error and returning object would be nil. +func statObject(ctx context.Context, url *url.URL, client storage.Storage) (*storage.Object, error) { + obj, err := client.Stat(ctx, url) + var objNotFound *storage.ErrGivenObjectNotFound + if errors.As(err, &objNotFound) { + return nil, nil + } + + return obj, err +} + +func validateCopyCommand(c *cli.Context) error { + if c.Args().Len() != 2 { + return fmt.Errorf("expected source and destination arguments") + } + + ctx := c.Context + src := c.Args().Get(0) + dst := c.Args().Get(1) + + srcurl, err := url.New(src, url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw"))) + if err != nil { + return err + } + + dsturl, err := url.New(dst, url.WithRaw(c.Bool("raw"))) + if err != nil { + return err + } + + // wildcard destination doesn't mean anything + if dsturl.IsWildcard() { + return fmt.Errorf("target %q can not contain glob characters", dst) + } + + // we don't operate on S3 prefixes for copy and delete operations. + if srcurl.IsBucket() || srcurl.IsPrefix() { + return fmt.Errorf("source argument must contain wildcard character") + } + + // 'cp dir/* s3://bucket/prefix': expect a trailing slash to avoid any + // surprises. + if srcurl.IsWildcard() && dsturl.IsRemote() && !dsturl.IsPrefix() && !dsturl.IsBucket() { + return fmt.Errorf("target %q must be a bucket or a prefix", dsturl) + } + + if err := checkVersinoningURLRemote(srcurl); err != nil { + return err + } + + if err := checkVersioningWithGoogleEndpoint(c); err != nil { + return err + } + + switch { + case srcurl.Type == dsturl.Type: + return validateCopy(srcurl, dsturl) + case dsturl.IsRemote(): + return validateUpload(ctx, srcurl, dsturl, NewStorageOpts(c)) + default: + return nil + } +} + +func validateCopy(srcurl, dsturl *url.URL) error { + if srcurl.IsRemote() || dsturl.IsRemote() { + return nil + } + + // we don't support local->local copies + return fmt.Errorf("local->local copy operations are not permitted") +} + +func validateUpload(ctx context.Context, srcurl, dsturl *url.URL, storageOpts storage.Options) error { + srcclient := storage.NewLocalClient(storageOpts) + + if srcurl.IsWildcard() { + return nil + } + + obj, err := srcclient.Stat(ctx, srcurl) + if err != nil { + return err + } + + // 'cp dir/ s3://bucket/prefix-without-slash': expect a trailing slash to + // avoid any surprises. + if obj.Type.IsDir() && !dsturl.IsBucket() && !dsturl.IsPrefix() { + return fmt.Errorf("target %q must be a bucket or a prefix", dsturl) + } + + return nil +} + +// guessContentType gets content type of the file. +func guessContentType(file *os.File) string { + contentType := mime.TypeByExtension(filepath.Ext(file.Name())) + if contentType == "" { + defer file.Seek(0, io.SeekStart) + + const bufsize = 512 + buf, err := io.ReadAll(io.LimitReader(file, bufsize)) + if err != nil { + return "" + } + + return http.DetectContentType(buf) + } + return contentType +} + +type countingReaderWriter struct { + pb progressbar.ProgressBar + fp *os.File + signMap map[int64]struct{} + mu sync.Mutex +} + +func newCountingReaderWriter(file *os.File, pb progressbar.ProgressBar) *countingReaderWriter { + return &countingReaderWriter{ + pb: pb, + fp: file, + signMap: map[int64]struct{}{}, + } +} + +func (r *countingReaderWriter) WriteAt(p []byte, off int64) (int, error) { + n, err := r.fp.WriteAt(p, off) + r.pb.AddCompletedBytes(int64(n)) + return n, err +} + +func (r *countingReaderWriter) Read(p []byte) (int, error) { + n, err := r.fp.Read(p) + r.pb.AddCompletedBytes(int64(n)) + return n, err +} + +func (r *countingReaderWriter) ReadAt(p []byte, off int64) (int, error) { + n, err := r.fp.ReadAt(p, off) + r.mu.Lock() + // Ignore the first signature call + if _, ok := r.signMap[off]; ok { + // Got the length have read (or means has uploaded) + r.pb.AddCompletedBytes(int64(n)) + } else { + r.signMap[off] = struct{}{} + } + r.mu.Unlock() + return n, err +} + +func (r *countingReaderWriter) Seek(offset int64, whence int) (int64, error) { + return r.fp.Seek(offset, whence) +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/du.go b/vendor/github.com/peak/s5cmd/v2/command/du.go new file mode 100644 index 000000000..8812c9582 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/du.go @@ -0,0 +1,275 @@ +package command + +import ( + "context" + "fmt" + + urlpkg "net/url" + + "github.com/hashicorp/go-multierror" + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "github.com/peak/s5cmd/v2/strutil" +) + +var sizeHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] argument + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Show disk usage of all objects in a bucket + > s5cmd {{.HelpName}} "s3://bucket/*" + + 2. Show disk usage of all objects that match a wildcard, grouped by storage class + > s5cmd {{.HelpName}} --group "s3://bucket/prefix/obj*.gz" + + 3. Show disk usage of all objects in a bucket but exclude the ones with py extension or starts with main + > s5cmd {{.HelpName}} --exclude "*.py" --exclude "main*" "s3://bucket/*" + + 4. Show disk usage of all versions of an object in the bucket + > s5cmd {{.HelpName}} --all-versions s3://bucket/object + + 5. Show disk usage of all versions of all objects that starts with a prefix in the bucket + > s5cmd {{.HelpName}} --all-versions "s3://bucket/prefix*" + + 6. Show disk usage of all versions of all objects in the bucket + > s5cmd {{.HelpName}} --all-versions "s3://bucket/*" + + 7. Show disk usage of a specific version of an object in the bucket + > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/object +` + +func NewSizeCommand() *cli.Command { + cmd := &cli.Command{ + Name: "du", + HelpName: "du", + Usage: "show object size usage", + CustomHelpTemplate: sizeHelpTemplate, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "group", + Aliases: []string{"g"}, + Usage: "group sizes by storage class", + }, + &cli.BoolFlag{ + Name: "humanize", + Aliases: []string{"H"}, + Usage: "human-readable output for object sizes", + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "exclude objects with given pattern", + }, + &cli.BoolFlag{ + Name: "all-versions", + Usage: "list all versions of object(s)", + }, + &cli.StringFlag{ + Name: "version-id", + Usage: "use the specified version of an object", + }, + }, + Before: func(c *cli.Context) error { + err := validateDUCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + fullCommand := commandFromContext(c) + + srcurl, err := url.New(c.Args().First(), + url.WithAllVersions(c.Bool("all-versions")), + url.WithVersion(c.String("version-id"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + return Size{ + src: srcurl, + op: c.Command.Name, + fullCommand: fullCommand, + // flags + groupByClass: c.Bool("group"), + humanize: c.Bool("humanize"), + exclude: c.StringSlice("exclude"), + + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} + +// Size holds disk usage (du) operation flags and states. +type Size struct { + src *url.URL + op string + fullCommand string + + // flags + groupByClass bool + humanize bool + exclude []string + + storageOpts storage.Options +} + +// Run calculates disk usage of given source. +func (sz Size) Run(ctx context.Context) error { + client, err := storage.NewClient(ctx, sz.src, sz.storageOpts) + if err != nil { + printError(sz.fullCommand, sz.op, err) + return err + } + + storageTotal := map[string]sizeAndCount{} + total := sizeAndCount{} + + var merror error + + excludePatterns, err := createRegexFromWildcard(sz.exclude) + if err != nil { + printError(sz.fullCommand, sz.op, err) + return err + } + + for object := range client.List(ctx, sz.src, false) { + if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) { + continue + } + + if err := object.Err; err != nil { + merror = multierror.Append(merror, err) + printError(sz.fullCommand, sz.op, err) + continue + } + + if isURLMatched(excludePatterns, object.URL.Path, sz.src.Prefix) { + continue + } + + storageClass := string(object.StorageClass) + s := storageTotal[storageClass] + s.addObject(object) + storageTotal[storageClass] = s + + total.addObject(object) + } + + if !sz.groupByClass { + msg := SizeMessage{ + Source: sz.src.String(), + Count: total.count, + Size: total.size, + showHumanized: sz.humanize, + } + log.Info(msg) + return nil + } + + for k, v := range storageTotal { + msg := SizeMessage{ + Source: sz.src.String(), + StorageClass: k, + Count: v.count, + Size: v.size, + showHumanized: sz.humanize, + } + log.Info(msg) + } + return merror +} + +// SizeMessage is the structure for logging disk usage. +type SizeMessage struct { + Source string `json:"source"` + StorageClass string `json:"storage_class,omitempty"` + Count int64 `json:"count"` + Size int64 `json:"size"` + + showHumanized bool +} + +// humanize is a helper method to humanize bytes. +func (s SizeMessage) humanize() string { + if s.showHumanized { + return strutil.HumanizeBytes(s.Size) + } + return fmt.Sprintf("%d", s.Size) +} + +// String returns the string representation of SizeMessage. +func (s SizeMessage) String() string { + var storageCls string + if s.StorageClass != "" { + storageCls = fmt.Sprintf(" [%s]", s.StorageClass) + } + return fmt.Sprintf( + "%s bytes in %d objects: %s%s", + s.humanize(), + s.Count, + s.Source, + storageCls, + ) +} + +// JSON returns the JSON representation of SizeMessage. +func (s SizeMessage) JSON() string { + return strutil.JSON(s) +} + +type sizeAndCount struct { + size int64 + count int64 +} + +func (s *sizeAndCount) addObject(obj *storage.Object) { + s.size += obj.Size + s.count++ +} + +func validateDUCommand(c *cli.Context) error { + if c.Args().Len() != 1 { + return fmt.Errorf("expected only 1 argument") + } + + if err := checkVersioningFlagCompatibility(c); err != nil { + return err + } + + srcurl, err := url.New(c.Args().First(), + url.WithAllVersions(c.Bool("all-versions"))) + if err != nil { + return err + } + + if err := checkVersinoningURLRemote(srcurl); err != nil { + return err + } + + // the "all-versions" flag of du command works with GCS, because it does not + // depend on the generation numbers. + endpoint, err := urlpkg.Parse(c.String("endpoint-url")) + if err == nil && c.String("version-id") != "" && storage.IsGoogleEndpoint(*endpoint) { + return fmt.Errorf(versioningNotSupportedWarning, endpoint) + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/error.go b/vendor/github.com/peak/s5cmd/v2/command/error.go new file mode 100644 index 000000000..37294ce4f --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/error.go @@ -0,0 +1,96 @@ +package command + +import ( + "fmt" + "strings" + + "github.com/hashicorp/go-multierror" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/storage/url" +) + +func printDebug(op string, err error, urls ...*url.URL) { + command := op + for _, url := range urls { + if url != nil { + command += fmt.Sprintf(" %s", url) + } + } + + msg := log.DebugMessage{ + Command: command, + Operation: op, + Err: cleanupError(err), + } + log.Debug(msg) +} + +// printError is the helper function to log error messages. +func printError(command, op string, err error) { + // dont print cancelation errors + if errorpkg.IsCancelation(err) { + return + } + + // check if we have our own error type + { + cerr, ok := err.(*errorpkg.Error) + if ok { + msg := log.ErrorMessage{ + Err: cleanupError(cerr.Err), + Command: cerr.FullCommand(), + Operation: cerr.Op, + } + log.Error(msg) + return + } + } + + // check if errors are aggregated + { + merr, ok := err.(*multierror.Error) + if ok { + for _, err := range merr.Errors { + customErr, ok := err.(*errorpkg.Error) + if ok { + msg := log.ErrorMessage{ + Err: cleanupError(customErr.Err), + Command: customErr.FullCommand(), + Operation: customErr.Op, + } + log.Error(msg) + continue + } + + msg := log.ErrorMessage{ + Err: cleanupError(err), + Command: command, + Operation: op, + } + + log.Error(msg) + } + return + } + } + + // we don't know the exact error type. log the error as is. + msg := log.ErrorMessage{ + Err: cleanupError(err), + Command: command, + Operation: op, + } + log.Error(msg) +} + +// cleanupError converts multiline messages into +// a single line. +func cleanupError(err error) string { + s := strings.Replace(err.Error(), "\n", " ", -1) + s = strings.Replace(s, "\t", " ", -1) + s = strings.Replace(s, " ", " ", -1) + s = strings.TrimSpace(s) + return s +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/expand.go b/vendor/github.com/peak/s5cmd/v2/command/expand.go new file mode 100644 index 000000000..d079c2251 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/expand.go @@ -0,0 +1,96 @@ +package command + +import ( + "context" + "errors" + "sync" + "sync/atomic" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +// expandSource returns the full list of objects from the given src argument. +// If src is an expandable URL, such as directory, prefix or a glob, all +// objects are returned by walking the source. +func expandSource( + ctx context.Context, + client storage.Storage, + followSymlinks bool, + srcurl *url.URL, +) (<-chan *storage.Object, error) { + var objType storage.ObjectType + // if the source is local, we send a Stat call to know if we have + // directory or file to walk. For remote storage, we don't want to send + // Stat since it doesn't have any folder semantics. + if !srcurl.IsWildcard() && !srcurl.IsRemote() { + obj, err := client.Stat(ctx, srcurl) + if err != nil { + return nil, err + } + objType = obj.Type + } + + // call storage.List for only walking operations. + if srcurl.IsWildcard() || srcurl.AllVersions || objType.IsDir() { + return client.List(ctx, srcurl, followSymlinks), nil + } + + ch := make(chan *storage.Object, 1) + if storage.ShouldProcessURL(srcurl, followSymlinks) { + ch <- &storage.Object{URL: srcurl, Type: objType} + } + close(ch) + return ch, nil +} + +// expandSources is a non-blocking argument dispatcher. It creates a object +// channel by walking and expanding the given source urls. If the url has a +// glob, it creates a goroutine to list storage items and sends them to object +// channel, otherwise it creates storage object from the original source. +func expandSources( + ctx context.Context, + client storage.Storage, + followSymlinks bool, + srcurls ...*url.URL, +) <-chan *storage.Object { + ch := make(chan *storage.Object) + + go func() { + defer close(ch) + + var wg sync.WaitGroup + var objFound atomic.Bool + + for _, origSrc := range srcurls { + wg.Add(1) + go func(origSrc *url.URL) { + defer wg.Done() + + objch, err := expandSource(ctx, client, followSymlinks, origSrc) + if err != nil { + var objNotFound *storage.ErrGivenObjectNotFound + if !errors.As(err, &objNotFound) { + ch <- &storage.Object{Err: err} + } + return + } + + for object := range objch { + if object.Err == storage.ErrNoObjectFound { + continue + } + ch <- object + objFound.Store(true) + } + }(origSrc) + } + + wg.Wait() + if !objFound.Load() { + ch <- &storage.Object{Err: storage.ErrNoObjectFound} + } + }() + + return ch +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/flag.go b/vendor/github.com/peak/s5cmd/v2/command/flag.go new file mode 100644 index 000000000..256b546f7 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/flag.go @@ -0,0 +1,170 @@ +package command + +import ( + "flag" + "fmt" + "strings" + + "github.com/urfave/cli/v2" +) + +type EnumValue struct { + Enum []string + Default string + // ConditionFunction is used to check if the value passed to Set method is valid + // or not. + // If ConditionFunction is not set, it defaults to string '==' comparison. + ConditionFunction func(str, target string) bool + selected string +} + +func (e *EnumValue) Set(value string) error { + if e.ConditionFunction == nil { + e.ConditionFunction = func(str, target string) bool { + return str == target + } + } + for _, enum := range e.Enum { + if e.ConditionFunction(enum, value) { + e.selected = value + return nil + } + } + + return fmt.Errorf("allowed values: [%s]", strings.Join(e.Enum, ", ")) +} + +func (e EnumValue) String() string { + if e.selected == "" { + return e.Default + } + return e.selected +} + +func (e EnumValue) Get() interface{} { + return e +} + +type MapValue map[string]string + +func (m MapValue) String() string { + if m == nil { + m = make(map[string]string) + } + + var s strings.Builder + for key, value := range m { + s.WriteString(fmt.Sprintf("%s=%s ", key, value)) + } + + return s.String() +} + +func (m MapValue) Set(s string) error { + if m == nil { + m = make(map[string]string) + } + + if len(s) == 0 { + return fmt.Errorf("flag can't be passed empty. Format: key=value") + } + + tokens := strings.Split(s, "=") + if len(tokens) <= 1 { + return fmt.Errorf("the key value pair(%s) has invalid format", tokens) + } + + key := tokens[0] + value := strings.Join(tokens[1:], "=") + + _, ok := m[key] + if ok { + return fmt.Errorf("key %q is already defined", key) + } + + m[key] = value + return nil +} + +func (m MapValue) Get() interface{} { + if m == nil { + m = make(map[string]string) + } + return m +} + +type MapFlag struct { + Name string + + Category string + DefaultText string + FilePath string + Usage string + + HasBeenSet bool + Required bool + Hidden bool + + Value MapValue +} + +var ( + _ cli.Flag = (*MapFlag)(nil) + _ cli.RequiredFlag = (*MapFlag)(nil) + _ cli.VisibleFlag = (*MapFlag)(nil) + _ cli.DocGenerationFlag = (*MapFlag)(nil) +) + +func (f *MapFlag) Apply(set *flag.FlagSet) error { + if f.Value == nil { + f.Value = make(map[string]string) + } + for _, name := range f.Names() { + set.Var(f.Value, name, f.Usage) + if len(f.Value) > 0 { + f.HasBeenSet = true + } + } + + return nil +} + +func (f *MapFlag) GetUsage() string { + return f.Usage +} + +func (f *MapFlag) Names() []string { + return []string{f.Name} +} + +func (f *MapFlag) IsSet() bool { + return f.HasBeenSet +} + +func (f *MapFlag) IsVisible() bool { + return true +} + +func (f *MapFlag) String() string { + return cli.FlagStringer(f) +} + +func (f *MapFlag) TakesValue() bool { + return true +} + +func (f *MapFlag) GetValue() string { + return f.Value.String() +} + +func (f *MapFlag) GetDefaultText() string { + return "" +} + +func (f *MapFlag) GetEnvVars() []string { + return []string{} +} + +func (f *MapFlag) IsRequired() bool { + return f.Required +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/ls.go b/vendor/github.com/peak/s5cmd/v2/command/ls.go new file mode 100644 index 000000000..87ff1e06b --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/ls.go @@ -0,0 +1,341 @@ +package command + +import ( + "context" + "fmt" + + "github.com/hashicorp/go-multierror" + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "github.com/peak/s5cmd/v2/strutil" +) + +var listHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] argument + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. List all buckets + > s5cmd {{.HelpName}} + + 2. List objects and prefixes in a bucket + > s5cmd {{.HelpName}} s3://bucket/ + + 3. List all objects in a bucket + > s5cmd {{.HelpName}} "s3://bucket/*" + + 4. List all objects that matches a wildcard + > s5cmd {{.HelpName}} "s3://bucket/prefix/*/*.gz" + + 5. List all objects in a public bucket + > s5cmd --no-sign-request {{.HelpName}} "s3://bucket/*" + + 6. List all objects in a bucket but exclude the ones with prefix abc + > s5cmd {{.HelpName}} --exclude "abc*" "s3://bucket/*" + + 7. List all object in a requester pays bucket + > s5cmd --request-payer=requester {{.HelpName}} "s3://bucket/*" + + 8. List all versions of an object in the bucket + > s5cmd {{.HelpName}} --all-versions s3://bucket/object + + 9. List all versions of all objects that starts with a prefix in the bucket + > s5cmd {{.HelpName}} --all-versions "s3://bucket/prefix*" + + 10. List all versions of all objects in the bucket + > s5cmd {{.HelpName}} --all-versions "s3://bucket/*" + + 11. List all files with their fullpaths + > s5cmd {{.HelpName}} --show-fullpath "s3://bucket/*" + +` + +func NewListCommand() *cli.Command { + cmd := &cli.Command{ + Name: "ls", + HelpName: "ls", + Usage: "list buckets and objects", + CustomHelpTemplate: listHelpTemplate, + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "etag", + Aliases: []string{"e"}, + Usage: "show entity tag (ETag) in the output", + }, + &cli.BoolFlag{ + Name: "humanize", + Aliases: []string{"H"}, + Usage: "human-readable output for object sizes", + }, + &cli.BoolFlag{ + Name: "storage-class", + Aliases: []string{"s"}, + Usage: "display full name of the object class", + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "exclude objects with given pattern", + }, + &cli.BoolFlag{ + Name: "all-versions", + Usage: "list all versions of object(s)", + }, + &cli.BoolFlag{ + Name: "show-fullpath", + Usage: "shows only the fullpath names of the object(s)", + }, + }, + Before: func(c *cli.Context) error { + err := validateLSCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + if !c.Args().Present() { + err := ListBuckets(c.Context, NewStorageOpts(c)) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + } + + fullCommand := commandFromContext(c) + + srcurl, err := url.New(c.Args().First(), + url.WithAllVersions(c.Bool("all-versions"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + return List{ + src: srcurl, + op: c.Command.Name, + fullCommand: fullCommand, + // flags + showEtag: c.Bool("etag"), + humanize: c.Bool("humanize"), + showStorageClass: c.Bool("storage-class"), + exclude: c.StringSlice("exclude"), + showFullPath: c.Bool("show-fullpath"), + + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} + +// List holds list operation flags and states. +type List struct { + src *url.URL + op string + fullCommand string + + // flags + showEtag bool + humanize bool + showStorageClass bool + showFullPath bool + exclude []string + + storageOpts storage.Options +} + +// ListBuckets prints all buckets. +func ListBuckets(ctx context.Context, storageOpts storage.Options) error { + // set as remote storage + url := &url.URL{Type: 0} + client, err := storage.NewRemoteClient(ctx, url, storageOpts) + if err != nil { + return err + } + + buckets, err := client.ListBuckets(ctx, "") + if err != nil { + return err + } + + for _, bucket := range buckets { + log.Info(bucket) + } + + return nil +} + +// Run prints objects at given source. +func (l List) Run(ctx context.Context) error { + + client, err := storage.NewClient(ctx, l.src, l.storageOpts) + if err != nil { + printError(l.fullCommand, l.op, err) + return err + } + + var merror error + + excludePatterns, err := createRegexFromWildcard(l.exclude) + if err != nil { + printError(l.fullCommand, l.op, err) + return err + } + + for object := range client.List(ctx, l.src, false) { + if errorpkg.IsCancelation(object.Err) { + continue + } + + if err := object.Err; err != nil { + merror = multierror.Append(merror, err) + printError(l.fullCommand, l.op, err) + continue + } + + if isURLMatched(excludePatterns, object.URL.Path, l.src.Prefix) { + continue + } + + msg := ListMessage{ + Object: object, + showEtag: l.showEtag, + showHumanized: l.humanize, + showStorageClass: l.showStorageClass, + showFullPath: l.showFullPath, + } + + log.Info(msg) + } + + return merror +} + +// ListMessage is a structure for logging ls results. +type ListMessage struct { + Object *storage.Object `json:"object"` + + showEtag bool + showHumanized bool + showStorageClass bool + showFullPath bool +} + +// humanize is a helper function to humanize bytes. +func (l ListMessage) humanize() string { + var size string + if l.showHumanized { + size = strutil.HumanizeBytes(l.Object.Size) + } else { + size = fmt.Sprintf("%d", l.Object.Size) + } + return size +} + +const ( + dateFormat = "2006/01/02 15:04:05" +) + +// String returns the string representation of ListMessage. +func (l ListMessage) String() string { + if l.showFullPath { + return l.Object.URL.String() + } + var etag string + // date and storage fiels + var listFormat = "%19s %2s" + + // align etag + if l.showEtag { + etag = l.Object.Etag + listFormat = listFormat + " %-38s" + } else { + listFormat = listFormat + " %-1s" + } + + // format file size + listFormat = listFormat + " %12s " + // format key and version ID + if l.Object.URL.VersionID != "" { + listFormat = listFormat + " %-50s %s" + } else { + listFormat = listFormat + " %s%s" + } + + var s string + if l.Object.Type.IsDir() { + s = fmt.Sprintf( + listFormat, + "", + "", + "", + "DIR", + l.Object.URL.Relative(), + "", + ) + return s + } + + stclass := "" + if l.showStorageClass { + stclass = fmt.Sprintf("%v", l.Object.StorageClass) + } + + var path string + if l.showFullPath { + path = l.Object.URL.String() + } else { + path = l.Object.URL.Relative() + } + + s = fmt.Sprintf( + listFormat, + l.Object.ModTime.Format(dateFormat), + stclass, + etag, + l.humanize(), + path, + l.Object.URL.VersionID, + ) + + return s +} + +// JSON returns the JSON representation of ListMessage. +func (l ListMessage) JSON() string { + return strutil.JSON(l.Object) +} + +func validateLSCommand(c *cli.Context) error { + if c.Args().Len() > 1 { + return fmt.Errorf("expected only 1 argument") + } + + srcurl, err := url.New(c.Args().First(), + url.WithAllVersions(c.Bool("all-versions"))) + if err != nil { + return err + } + + if err := checkVersinoningURLRemote(srcurl); err != nil { + return err + } + + if err := checkVersioningWithGoogleEndpoint(c); err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/mb.go b/vendor/github.com/peak/s5cmd/v2/command/mb.go new file mode 100644 index 000000000..d642ac7a8 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/mb.go @@ -0,0 +1,122 @@ +package command + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var makeBucketHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} s3://bucketname + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Create a new S3 bucket + > s5cmd {{.HelpName}} s3://bucketname +` + +func NewMakeBucketCommand() *cli.Command { + cmd := &cli.Command{ + Name: "mb", + HelpName: "mb", + Usage: "make bucket", + CustomHelpTemplate: makeBucketHelpTemplate, + Before: func(c *cli.Context) error { + err := validateMBCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + return MakeBucket{ + src: c.Args().First(), + op: c.Command.Name, + fullCommand: commandFromContext(c), + + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + cmd.BashComplete = func(ctx *cli.Context) { + arg := parseArgumentToComplete(ctx) + if strings.HasPrefix(arg, "-") { + cli.DefaultCompleteWithFlags(cmd)(ctx) + } else { + shell := filepath.Base(os.Getenv("SHELL")) + constantCompleteWithDefault(shell, arg, "s3://") + } + } + + return cmd +} + +// MakeBucket holds bucket creation operation flags and states. +type MakeBucket struct { + src string + op string + fullCommand string + + storageOpts storage.Options +} + +// Run creates a bucket. +func (b MakeBucket) Run(ctx context.Context) error { + bucket, err := url.New(b.src) + if err != nil { + printError(b.fullCommand, b.op, err) + return err + } + + client, err := storage.NewRemoteClient(ctx, &url.URL{}, b.storageOpts) + if err != nil { + printError(b.fullCommand, b.op, err) + return err + } + + if err := client.MakeBucket(ctx, bucket.Bucket); err != nil { + printError(b.fullCommand, b.op, err) + return err + } + + msg := log.InfoMessage{ + Operation: b.op, + Source: bucket, + } + log.Info(msg) + + return nil +} + +func validateMBCommand(c *cli.Context) error { + if c.Args().Len() != 1 { + return fmt.Errorf("expected only 1 argument") + } + + src := c.Args().First() + bucket, err := url.New(src) + if err != nil { + return err + } + if !bucket.IsBucket() { + return fmt.Errorf("invalid s3 bucket") + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/mv.go b/vendor/github.com/peak/s5cmd/v2/command/mv.go new file mode 100644 index 000000000..78e93218f --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/mv.go @@ -0,0 +1,65 @@ +package command + +import ( + "github.com/peak/s5cmd/v2/log/stat" + + "github.com/urfave/cli/v2" +) + +var moveHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] source destination + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Move an S3 object to working directory + > s5cmd {{.HelpName}} s3://bucket/prefix/object.gz . + + 2. Move an S3 object and rename + > s5cmd {{.HelpName}} s3://bucket/prefix/object.gz myobject.gz + + 3. Move all S3 objects to a directory + > s5cmd {{.HelpName}} "s3://bucket/*" target-directory/ + + 4. Move a file to S3 bucket + > s5cmd {{.HelpName}} myfile.gz s3://bucket/ + + 5. Move a directory to S3 bucket recursively + > s5cmd {{.HelpName}} dir/ s3://bucket/ + + 6. Move all files to S3 bucket but exclude the ones with txt and gz extension + > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket + + 7. Move all files from S3 bucket to another S3 bucket but exclude the ones starts with log + > s5cmd {{.HelpName}} --exclude "log*" "s3://bucket/*" s3://destbucket +` + +func NewMoveCommand() *cli.Command { + cmd := &cli.Command{ + Name: "mv", + HelpName: "mv", + Usage: "move/rename objects", + Flags: NewCopyCommandFlags(), // move and copy commands share the same flags + CustomHelpTemplate: moveHelpTemplate, + Before: func(c *cli.Context) error { + return NewCopyCommand().Before(c) + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + // delete source + copy, err := NewCopy(c, true) + if err != nil { + return err + } + return copy.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/pipe.go b/vendor/github.com/peak/s5cmd/v2/command/pipe.go new file mode 100644 index 000000000..77793f7bc --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/pipe.go @@ -0,0 +1,337 @@ +package command + +import ( + "context" + "errors" + "fmt" + "mime" + "os" + "path/filepath" + + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var pipeHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] destination + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 01. Stream stdin to an object + > echo "content" | gzip | s5cmd {{.HelpName}} s3://bucket/prefix/object.gz + 02. Pass arbitrary metadata to an object + > cat "flowers.png" | gzip | s5cmd {{.HelpName}} --metadata "imageSize=6032x4032" s3://bucket/prefix/flowers.gz + 03. Download an object and stream it to a bucket + > curl https://github.com/peak/s5cmd/ | s5cmd {{.HelpName}} s3://bucket/s5cmd.html + 04. Compress an object and stream it to a bucket + > tar -cf - file.bin | s5cmd {{.HelpName}} s3://bucket/file.bin.tar +` + +func NewPipeCommandFlags() []cli.Flag { + pipeFlags := []cli.Flag{ + &cli.StringFlag{ + Name: "storage-class", + Usage: "set storage class for target ('STANDARD','REDUCED_REDUNDANCY','GLACIER','STANDARD_IA','ONEZONE_IA','INTELLIGENT_TIERING','DEEP_ARCHIVE')", + }, + &cli.IntFlag{ + Name: "concurrency", + Aliases: []string{"c"}, + Value: defaultCopyConcurrency, + Usage: "number of concurrent parts transferred between host and remote server", + }, + &cli.IntFlag{ + Name: "part-size", + Aliases: []string{"p"}, + Value: defaultPartSize, + Usage: "size of each part transferred between host and remote server, in MiB", + }, + &MapFlag{ + Name: "metadata", + Usage: "set arbitrary metadata for the object", + }, + &cli.StringFlag{ + Name: "sse", + Usage: "perform server side encryption of the data at its destination, e.g. aws:kms", + }, + &cli.StringFlag{ + Name: "sse-kms-key-id", + Usage: "customer master key (CMK) id for SSE-KMS encryption; leave it out if server-side generated key is desired", + }, + &cli.StringFlag{ + Name: "acl", + Usage: "set acl for target: defines granted accesses and their types on different accounts/groups, e.g. pipe --acl 'public-read'", + }, + &cli.StringFlag{ + Name: "cache-control", + Usage: "set cache control for target: defines cache control header for object, e.g. pipe --cache-control 'public, max-age=345600'", + }, + &cli.StringFlag{ + Name: "expires", + Usage: "set expires for target (uses RFC3339 format): defines expires header for object, e.g. pipe --expires '2024-10-01T20:30:00Z'", + }, + &cli.BoolFlag{ + Name: "raw", + Usage: "disable the wildcard operations, useful with filenames that contains glob characters", + }, + &cli.StringFlag{ + Name: "content-type", + Usage: "set content type for target: defines content type header for object, e.g. --content-type text/plain", + }, + &cli.StringFlag{ + Name: "content-encoding", + Usage: "set content encoding for target: defines content encoding header for object, e.g. --content-encoding gzip", + }, + &cli.StringFlag{ + Name: "content-disposition", + Usage: "set content disposition for target: defines content disposition header for object, e.g. --content-disposition 'attachment; filename=\"filename.jpg\"'", + }, + &cli.BoolFlag{ + Name: "no-clobber", + Aliases: []string{"n"}, + Usage: "do not overwrite destination if already exists", + }, + } + return pipeFlags +} + +func NewPipeCommand() *cli.Command { + cmd := &cli.Command{ + Name: "pipe", + HelpName: "pipe", + Usage: "stream to remote from stdin", + Flags: NewPipeCommandFlags(), + CustomHelpTemplate: pipeHelpTemplate, + Before: func(c *cli.Context) error { + err := validatePipeCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + pipe, err := NewPipe(c, false) + if err != nil { + return err + } + return pipe.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} + +// Pipe holds pipe operation flags and states. +type Pipe struct { + dst *url.URL + op string + fullCommand string + + deleteSource bool + + // flags + noClobber bool + storageClass storage.StorageClass + encryptionMethod string + encryptionKeyID string + acl string + cacheControl string + expires string + contentType string + contentEncoding string + contentDisposition string + metadata map[string]string + + // s3 options + concurrency int + partSize int64 + storageOpts storage.Options +} + +// NewPipe creates Pipe from cli.Context. +func NewPipe(c *cli.Context, deleteSource bool) (*Pipe, error) { + fullCommand := commandFromContext(c) + + dst, err := url.New(c.Args().Get(0), url.WithRaw(c.Bool("raw"))) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + metadata, ok := c.Value("metadata").(MapValue) + if !ok { + err := errors.New("metadata flag is not a map") + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + return &Pipe{ + dst: dst, + op: c.Command.Name, + fullCommand: fullCommand, + deleteSource: deleteSource, + // flags + noClobber: c.Bool("no-clobber"), + storageClass: storage.StorageClass(c.String("storage-class")), + concurrency: c.Int("concurrency"), + partSize: c.Int64("part-size") * megabytes, + encryptionMethod: c.String("sse"), + encryptionKeyID: c.String("sse-kms-key-id"), + acl: c.String("acl"), + cacheControl: c.String("cache-control"), + expires: c.String("expires"), + contentType: c.String("content-type"), + contentEncoding: c.String("content-encoding"), + contentDisposition: c.String("content-disposition"), + metadata: metadata, + // s3 options + storageOpts: NewStorageOpts(c), + }, nil +} + +// Run starts copying stdin output to destination. +func (c Pipe) Run(ctx context.Context) error { + if c.dst.IsBucket() || c.dst.IsPrefix() { + return fmt.Errorf("target %q must be an object", c.dst) + } + + err := c.shouldOverride(ctx, c.dst) + if err != nil { + if errorpkg.IsWarning(err) { + printDebug(c.op, err, nil, c.dst) + return nil + } + return err + } + + client, err := storage.NewRemoteClient(ctx, c.dst, c.storageOpts) + if err != nil { + return err + } + + metadata := storage.Metadata{ + UserDefined: c.metadata, + ACL: c.acl, + CacheControl: c.cacheControl, + Expires: c.expires, + StorageClass: string(c.storageClass), + ContentEncoding: c.contentEncoding, + ContentDisposition: c.contentDisposition, + EncryptionMethod: c.encryptionMethod, + EncryptionKeyID: c.encryptionKeyID, + } + + if c.contentType != "" { + metadata.ContentType = c.contentType + } else { + metadata.ContentType = guessContentTypeByExtension(c.dst) + } + + err = client.Put(ctx, &stdin{file: os.Stdin}, c.dst, metadata, c.concurrency, c.partSize) + if err != nil { + return err + } + + msg := log.InfoMessage{ + Operation: c.op, + Source: nil, + Destination: c.dst, + Object: &storage.Object{ + StorageClass: c.storageClass, + }, + } + log.Info(msg) + + return nil +} + +// shouldOverride function checks if the destination should be overridden if +// the destination object and given pipe flags conform to the +// override criteria. +func (c Pipe) shouldOverride(ctx context.Context, dsturl *url.URL) error { + // if not asked to override, ignore. + if !c.noClobber { + return nil + } + + client, err := storage.NewClient(ctx, dsturl, c.storageOpts) + if err != nil { + return err + } + + obj, err := statObject(ctx, dsturl, client) + if err != nil { + return err + } + + // if destination not exists, no conditions apply. + if obj == nil { + return nil + } + + if c.noClobber { + return errorpkg.ErrObjectExists + } + + return nil +} + +func validatePipeCommand(c *cli.Context) error { + if c.Args().Len() != 1 { + return fmt.Errorf("expected destination argument") + } + + dst := c.Args().Get(0) + + dsturl, err := url.New(dst, url.WithRaw(c.Bool("raw"))) + if err != nil { + return err + } + + if !dsturl.IsRemote() { + return fmt.Errorf("destination must be a bucket") + } + + if dsturl.IsBucket() || dsturl.IsPrefix() { + return fmt.Errorf("target %q must be an object", dsturl) + } + + // wildcard destination can not be used with pipe + if dsturl.IsWildcard() { + return fmt.Errorf("target %q can not contain glob characters", dst) + } + + return nil +} + +func guessContentTypeByExtension(dsturl *url.URL) string { + contentType := mime.TypeByExtension(filepath.Ext(dsturl.Absolute())) + if contentType == "" { + return "application/octet-stream" + } + return contentType +} + +// stdin is an io.Reader adapter for os.File, enabling it to function solely as +// an io.Reader. The AWS SDK, which accepts an io.Reader for multipart uploads, +// will attempt to use io.Seek if the reader supports it. However, os.Stdin is +// a specific type of file that can not seekable. +type stdin struct { + file *os.File +} + +func (s *stdin) Read(p []byte) (n int, err error) { + return s.file.Read(p) +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/presign.go b/vendor/github.com/peak/s5cmd/v2/command/presign.go new file mode 100644 index 000000000..5fae3006d --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/presign.go @@ -0,0 +1,134 @@ +package command + +import ( + "context" + "fmt" + "time" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var presignHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] source + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Print a remote object url to stdout + > s5cmd {{.HelpName}} s3://bucket/prefix/object + + 2. Print a remote object url with a specific expiration time to stdout + > s5cmd {{.HelpName}} --expire 24h s3://bucket/prefix/object +` + +func NewPresignCommand() *cli.Command { + cmd := &cli.Command{ + Name: "presign", + HelpName: "presign", + Usage: "print remote object presign url", + Flags: []cli.Flag{ + &cli.DurationFlag{ + Name: "expire", + Usage: "url valid duration", + Value: time.Hour * 3, + }, + &cli.StringFlag{ + Name: "version-id", + Usage: "use the specified version of an object", + }, + }, + CustomHelpTemplate: presignHelpTemplate, + Before: func(c *cli.Context) error { + err := validatePresignCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + op := c.Command.Name + fullCommand := commandFromContext(c) + + src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id"))) + if err != nil { + printError(fullCommand, op, err) + return err + } + + return Presign{ + src: src, + op: op, + fullCommand: fullCommand, + expire: c.Duration("expire"), + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + return cmd +} + +// Presign holds presign operation flags and states. +type Presign struct { + src *url.URL + op string + fullCommand string + expire time.Duration + + storageOpts storage.Options +} + +// Run prints content of given source to standard output. +func (c Presign) Run(ctx context.Context) error { + client, err := storage.NewRemoteClient(ctx, c.src, c.storageOpts) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + + url, err := client.Presign(ctx, c.src, c.expire) + if err != nil { + printError(c.fullCommand, c.op, err) + return err + } + fmt.Println(url) + return nil +} + +func validatePresignCommand(c *cli.Context) error { + if c.Args().Len() != 1 { + return fmt.Errorf("expected remote object url") + } + + src, err := url.New(c.Args().Get(0), url.WithVersion(c.String("version-id"))) + if err != nil { + return err + } + + if !src.IsRemote() { + return fmt.Errorf("source must be a remote object") + } + + if src.IsBucket() || src.IsPrefix() { + return fmt.Errorf("remote source must be an object") + } + + if src.IsWildcard() { + return fmt.Errorf("remote source %q can not contain glob characters", src) + } + + if err := checkVersioningWithGoogleEndpoint(c); err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/rb.go b/vendor/github.com/peak/s5cmd/v2/command/rb.go new file mode 100644 index 000000000..d09c73e12 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/rb.go @@ -0,0 +1,93 @@ +package command + +import ( + "context" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var removeBucketHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} s3://bucketname + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Deletes S3 bucket with given name + > s5cmd {{.HelpName}} s3://bucketname +` + +func NewRemoveBucketCommand() *cli.Command { + cmd := &cli.Command{ + Name: "rb", + HelpName: "rb", + Usage: "remove bucket", + CustomHelpTemplate: removeBucketHelpTemplate, + Before: func(c *cli.Context) error { + err := validateMBCommand(c) // uses same validation function with make bucket command. + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + return RemoveBucket{ + src: c.Args().First(), + op: c.Command.Name, + fullCommand: commandFromContext(c), + + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, true, true) + return cmd +} + +// RemoveBucket holds bucket deletion operation flags and states. +type RemoveBucket struct { + src string + op string + fullCommand string + + storageOpts storage.Options +} + +// Run removes a bucket. +func (b RemoveBucket) Run(ctx context.Context) error { + bucket, err := url.New(b.src) + if err != nil { + printError(b.fullCommand, b.op, err) + return err + } + + client, err := storage.NewRemoteClient(ctx, &url.URL{}, b.storageOpts) + if err != nil { + printError(b.fullCommand, b.op, err) + return err + } + + if err := client.RemoveBucket(ctx, bucket.Bucket); err != nil { + printError(b.fullCommand, b.op, err) + return err + } + + msg := log.InfoMessage{ + Operation: b.op, + Source: bucket, + } + log.Info(msg) + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/rm.go b/vendor/github.com/peak/s5cmd/v2/command/rm.go new file mode 100644 index 000000000..3ba092b23 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/rm.go @@ -0,0 +1,301 @@ +package command + +import ( + "context" + "fmt" + "regexp" + + "github.com/hashicorp/go-multierror" + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var deleteHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} argument [argument] + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Delete an S3 object + > s5cmd {{.HelpName}} s3://bucketname/prefix/object.gz + + 2. Delete all objects with a prefix + > s5cmd {{.HelpName}} "s3://bucketname/prefix/*" + + 3. Delete all objects that matches a wildcard + > s5cmd {{.HelpName}} "s3://bucketname/*/obj*.gz" + + 4. Delete all matching objects and a specific object + > s5cmd {{.HelpName}} "s3://bucketname/prefix/*" s3://bucketname/object1.gz + + 5. Delete all matching objects but exclude the ones with .txt extension or starts with "main" + > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "main*" "s3://bucketname/prefix/*" + + 6. Delete all matching objects but only the ones with .txt extension or starts with "main" + > s5cmd {{.HelpName}} --include "*.txt" --include "main*" "s3://bucketname/prefix/*" + + 7. Delete the specific version of a remote object's content to stdout + > s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object + + 8. Delete all versions of an object in the bucket + > s5cmd {{.HelpName}} --all-versions s3://bucket/object + + 9. Delete all versions of all objects that starts with a prefix in the bucket + > s5cmd {{.HelpName}} --all-versions "s3://bucket/prefix*" + + 10. Delete all versions of all objects in the bucket + > s5cmd {{.HelpName}} --all-versions "s3://bucket/*" +` + +func NewDeleteCommand() *cli.Command { + cmd := &cli.Command{ + Name: "rm", + HelpName: "rm", + Usage: "remove objects", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "raw", + Usage: "disable the wildcard operations, useful with filenames that contains glob characters", + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "exclude objects with given pattern", + }, + &cli.StringSliceFlag{ + Name: "include", + Usage: "include objects with given pattern", + }, + &cli.BoolFlag{ + Name: "all-versions", + Usage: "list all versions of object(s)", + }, + &cli.StringFlag{ + Name: "version-id", + Usage: "use the specified version of an object", + }, + }, + CustomHelpTemplate: deleteHelpTemplate, + Before: func(c *cli.Context) error { + err := validateRMCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + fullCommand := commandFromContext(c) + + sources := c.Args().Slice() + srcUrls, err := newURLs(c.Bool("raw"), c.String("version-id"), c.Bool("all-versions"), sources...) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + excludePatterns, err := createRegexFromWildcard(c.StringSlice("exclude")) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + includePatterns, err := createRegexFromWildcard(c.StringSlice("include")) + if err != nil { + printError(fullCommand, c.Command.Name, err) + return err + } + + return Delete{ + src: srcUrls, + op: c.Command.Name, + fullCommand: fullCommand, + + // flags + exclude: c.StringSlice("exclude"), + include: c.StringSlice("include"), + + // patterns + excludePatterns: excludePatterns, + includePatterns: includePatterns, + + storageOpts: NewStorageOpts(c), + }.Run(c.Context) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} + +// Delete holds delete operation flags and states. +type Delete struct { + src []*url.URL + op string + fullCommand string + + // flag options + exclude []string + include []string + + // patterns + excludePatterns []*regexp.Regexp + includePatterns []*regexp.Regexp + + // storage options + storageOpts storage.Options +} + +// Run remove given sources. +func (d Delete) Run(ctx context.Context) error { + + srcurl := d.src[0] + + client, err := storage.NewClient(ctx, srcurl, d.storageOpts) + if err != nil { + printError(d.fullCommand, d.op, err) + return err + } + + objch := expandSources(ctx, client, false, d.src...) + + var ( + merrorObjects error + merrorResult error + ) + + // do object->url transformation + urlch := make(chan *url.URL) + go func() { + defer close(urlch) + + for object := range objch { + if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) { + continue + } + + if err := object.Err; err != nil { + merrorObjects = multierror.Append(merrorObjects, err) + printError(d.fullCommand, d.op, err) + continue + } + + isExcluded, err := isObjectExcluded(object, d.excludePatterns, d.includePatterns, srcurl.Prefix) + if err != nil { + printError(d.fullCommand, d.op, err) + } + if isExcluded { + continue + } + + urlch <- object.URL + } + }() + + resultch := client.MultiDelete(ctx, urlch) + + for obj := range resultch { + if err := obj.Err; err != nil { + if errorpkg.IsCancelation(obj.Err) { + continue + } + + merrorResult = multierror.Append(merrorResult, obj.Err) + printError(d.fullCommand, d.op, obj.Err) + continue + } + + msg := log.InfoMessage{ + Operation: d.op, + Source: obj.URL, + } + log.Info(msg) + } + + return multierror.Append(merrorResult, merrorObjects).ErrorOrNil() +} + +// newSources creates object URL list from given sources. +func newURLs(isRaw bool, versionID string, isAllVersions bool, sources ...string) ([]*url.URL, error) { + var urls []*url.URL + for _, src := range sources { + srcurl, err := url.New(src, url.WithRaw(isRaw), url.WithVersion(versionID), + url.WithAllVersions(isAllVersions)) + if err != nil { + return nil, err + } + + if err := checkVersinoningURLRemote(srcurl); err != nil { + return nil, err + } + urls = append(urls, srcurl) + } + return urls, nil +} + +func validateRMCommand(c *cli.Context) error { + if !c.Args().Present() { + return fmt.Errorf("expected at least 1 object to remove") + } + + // It might be a reasonable request too. Consider that user wants to delete + // all-versions of "a" and "b", but want to delete only a single + // version of "c" "someversion". User might want to express this as + // `s5cmd rm --all-versions a --all-versions b version-id someversion c` + // but, current implementation does not take repetitive flags into account, + // anyway, this is not supported in the current implementation. + if err := checkVersioningFlagCompatibility(c); err != nil { + return err + } + + if len(c.Args().Slice()) > 1 && c.String("version-id") != "" { + return fmt.Errorf("version-id flag can only be used with single source object") + } + + srcurls, err := newURLs(c.Bool("raw"), c.String("version-id"), c.Bool("all-versions"), c.Args().Slice()...) + if err != nil { + return err + } + + if err := checkVersioningWithGoogleEndpoint(c); err != nil { + return err + } + + var ( + firstBucket string + hasRemote, hasLocal bool + ) + for i, srcurl := range srcurls { + // we don't operate on S3 prefixes for copy and delete operations. + if srcurl.IsBucket() || srcurl.IsPrefix() { + return fmt.Errorf("s3 bucket/prefix cannot be used for delete operations (forgot wildcard character?)") + } + + if srcurl.IsRemote() { + hasRemote = true + } else { + hasLocal = true + } + + if hasLocal && hasRemote { + return fmt.Errorf("arguments cannot have both local and remote sources") + } + if i == 0 { + firstBucket = srcurl.Bucket + continue + } + if srcurl.Bucket != firstBucket { + return fmt.Errorf("removal of objects with different buckets in a single command is not allowed") + } + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/run.go b/vendor/github.com/peak/s5cmd/v2/command/run.go new file mode 100644 index 000000000..5ef3c7890 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/run.go @@ -0,0 +1,227 @@ +package command + +import ( + "bufio" + "context" + "errors" + "flag" + "fmt" + "io" + "os" + "strings" + + "github.com/hashicorp/go-multierror" + "github.com/kballard/go-shellquote" + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/parallel" +) + +var runHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [file] + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 1. Run the commands declared in "commands.txt" file in parallel + > s5cmd {{.HelpName}} commands.txt + + 2. Read commands from standard input and execute in parallel. + > cat commands.txt | s5cmd {{.HelpName}} +` + +func NewRunCommand() *cli.Command { + return &cli.Command{ + Name: "run", + HelpName: "run", + Usage: "run commands in batch", + CustomHelpTemplate: runHelpTemplate, + Before: func(c *cli.Context) error { + err := validateRunCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) error { + reader := os.Stdin + if c.Args().Len() == 1 { + f, err := os.Open(c.Args().First()) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + return err + } + defer f.Close() + + reader = f + } + + return NewRun(c, reader).Run(c.Context) + }, + } +} + +type Run struct { + c *cli.Context + reader io.Reader + + // flags + numWorkers int +} + +func NewRun(c *cli.Context, r io.Reader) Run { + return Run{ + c: c, + reader: r, + numWorkers: c.Int("numworkers"), + } +} + +func (r Run) Run(ctx context.Context) error { + pm := parallel.New(r.numWorkers) + defer pm.Close() + + waiter := parallel.NewWaiter() + + var errDoneCh = make(chan bool) + var merrorWaiter error + go func() { + defer close(errDoneCh) + for err := range waiter.Err() { + merrorWaiter = multierror.Append(merrorWaiter, err) + } + }() + + reader := NewReader(ctx, r.reader) + + lineno := -1 + for line := range reader.Read() { + lineno++ + + line = strings.TrimSpace(line) + if line == "" { + continue + } + + // skip comment lines + if strings.HasPrefix(line, "#") { + continue + } + + fields, err := shellquote.Split(line) + if err != nil { + return err + } + + if len(fields) == 0 { + continue + } + + if fields[0] == "run" { + err := fmt.Errorf("%q command (line: %v) is not permitted in run-mode", "run", lineno) + printError(commandFromContext(r.c), r.c.Command.Name, err) + continue + } + + fn := func() error { + subcmd := fields[0] + + cmd := AppCommand(subcmd) + if cmd == nil { + err := fmt.Errorf("%q command (line: %v) not found", subcmd, lineno) + printError(commandFromContext(r.c), r.c.Command.Name, err) + return nil + } + + flagset := flag.NewFlagSet(subcmd, flag.ExitOnError) + if err := flagset.Parse(fields); err != nil { + printError(commandFromContext(r.c), r.c.Command.Name, err) + return nil + } + + ctx := cli.NewContext(app, flagset, r.c) + return cmd.Run(ctx) + } + + pm.Run(fn, waiter) + } + + waiter.Wait() + <-errDoneCh + + if reader.Err() != nil { + printError(commandFromContext(r.c), r.c.Command.Name, reader.Err()) + } + + return multierror.Append(merrorWaiter, reader.Err()).ErrorOrNil() +} + +// Reader is a cancelable reader. +type Reader struct { + *bufio.Reader + err error + linech chan string + ctx context.Context +} + +// NewReader creates a new reader with cancellation. +func NewReader(ctx context.Context, r io.Reader) *Reader { + reader := &Reader{ + ctx: ctx, + Reader: bufio.NewReader(r), + linech: make(chan string), + } + + go reader.read() + return reader +} + +// read reads lines from the underlying reader. +func (r *Reader) read() { + defer close(r.linech) + + for { + select { + case <-r.ctx.Done(): + r.err = r.ctx.Err() + return + default: + // If ReadString encounters an error before finding a delimiter, + // it returns the data read before the error and the error itself (often io.EOF). + line, err := r.ReadString('\n') + if line != "" { + r.linech <- line + } + if err != nil { + if err == io.EOF { + if errors.Is(r.ctx.Err(), context.Canceled) { + r.err = r.ctx.Err() + } + return + } + r.err = multierror.Append(r.err, err) + } + } + } +} + +// Read returns read-only channel to consume lines. +func (r *Reader) Read() <-chan string { + return r.linech +} + +// Err returns encountered errors, if any. +func (r *Reader) Err() error { + return r.err +} + +func validateRunCommand(c *cli.Context) error { + if c.Args().Len() > 1 { + return fmt.Errorf("expected only 1 file") + } + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/select.go b/vendor/github.com/peak/s5cmd/v2/command/select.go new file mode 100644 index 000000000..4fc000020 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/select.go @@ -0,0 +1,438 @@ +package command + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strconv" + "strings" + + "github.com/hashicorp/go-multierror" + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/parallel" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +var selectHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] argument + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 01. Select the average price of the avocado and amount sold, set the output format as csv + > s5cmd select csv -use-header USE --query "SELECT s.avg_price, s.quantity FROM S3Object s WHERE s.item='avocado'" "s3://bucket/prices.csv" + + 02. Query TSV files + > s5cmd select csv --delimiter=\t --use-header USE --query "SELECT s.avg_price, s.quantity FROM S3Object s WHERE s.item='avocado'" "s3://bucket/prices.tsv" + + 03. Select a specific field in a JSON document + > s5cmd select json --structure document --query "SELECT s.tracking_id FROM s3object[*]['metadata']['.zattrs'] s" "s3://bucket/metadata.json" + + 04. Query files that contain lines of JSON objects + > s5cmd select json --query "SELECT s.id FROM s3object s WHERE s.lineNumber = 1" +` + +func beforeFunc(c *cli.Context) error { + err := validateSelectCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err +} + +func buildSelect(c *cli.Context, inputFormat string, inputStructure *string) (cmd *Select, err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + fullCommand := commandFromContext(c) + + src, err := url.New( + c.Args().Get(0), + url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw")), + url.WithAllVersions(c.Bool("all-versions")), + ) + + if err != nil { + printError(fullCommand, c.Command.Name, err) + return nil, err + } + + outputFormat := c.String("output-format") + if c.String("output-format") == "" { + outputFormat = inputFormat + } + + cmd = &Select{ + src: src, + op: c.Command.Name, + fullCommand: fullCommand, + // flags + inputFormat: inputFormat, + outputFormat: outputFormat, + query: c.String("query"), + compressionType: c.String("compression"), + exclude: c.StringSlice("exclude"), + forceGlacierTransfer: c.Bool("force-glacier-transfer"), + ignoreGlacierWarnings: c.Bool("ignore-glacier-warnings"), + + storageOpts: NewStorageOpts(c), + } + + // parquet files don't have an input structure + if inputStructure != nil { + cmd.inputStructure = *inputStructure + } + return cmd, nil +} + +func NewSelectCommand() *cli.Command { + sharedFlags := []cli.Flag{ + &cli.StringFlag{ + Name: "query", + Aliases: []string{"e"}, + Usage: "SQL expression to use to select from the objects", + }, + &cli.StringFlag{ + Name: "output-format", + Usage: "output format of the result (options: json, csv)", + }, + &cli.StringSliceFlag{ + Name: "exclude", + Usage: "exclude objects with given pattern", + }, + &cli.BoolFlag{ + Name: "force-glacier-transfer", + Usage: "force transfer of glacier objects whether they are restored or not", + }, + &cli.BoolFlag{ + Name: "ignore-glacier-warnings", + Usage: "turns off glacier warnings: ignore errors encountered during selecting objects", + }, + &cli.BoolFlag{ + Name: "raw", + Usage: "disable the wildcard operations, useful with filenames that contains glob characters", + }, + &cli.BoolFlag{ + Name: "all-versions", + Usage: "list all versions of object(s)", + }, + &cli.StringFlag{ + Name: "version-id", + Usage: "use the specified version of the object", + }, + } + + cmd := &cli.Command{ + Name: "select", + HelpName: "select", + Usage: "run SQL queries on objects", + Subcommands: []*cli.Command{ + { + Name: "csv", + Usage: "run queries on csv files", + Flags: append([]cli.Flag{ + &cli.StringFlag{ + Name: "delimiter for the csv file", + Usage: "delimiter of the csv file.", + Value: ",", + }, + &cli.StringFlag{ + Name: "use-header", + Usage: "use header of the csv file. (options for AWS: IGNORE, NONE, USE)", + Value: "NONE", + }, + &cli.StringFlag{ + Name: "compression", + Usage: "input compression format (options for AWS: GZIP or BZIP2)", + }, + }, sharedFlags...), + CustomHelpTemplate: selectHelpTemplate, + Before: beforeFunc, + Action: func(c *cli.Context) (err error) { + delimiter := c.String("delimiter") + // We are doing this because the delimiters that are special characters + // are automatically escaped by go. We quote/unquote to capture the actual + // delimiter. See: https://stackoverflow.com/questions/59952721/how-to-read-t-as-tab-in-golang + quotedDelimiter, err := strconv.Unquote(`"` + delimiter + `"`) + + if err != nil { + printError("select csv", c.Command.Name, err) + return err + } + + cmd, err := buildSelect(c, "csv", "edDelimiter) + + // Since this flag is only specific to csv + // and we set a default value, we tend + // to set it explicitly after building + // the command rather than setting it + // on the shared builder. We also + // show the options, but we don't + // constraint the options that can be + // passed to this flag, since other + // providers might support other options + // that AWS does not support. + cmd.fileHeaderInfo = c.String("use-header") + if err != nil { + printError(cmd.fullCommand, c.Command.Name, err) + return err + } + return cmd.Run(c.Context) + }, + }, + { + Name: "json", + Usage: "run queries on json files", + Flags: append([]cli.Flag{ + &cli.GenericFlag{ + Name: "structure", + Usage: "how objects are aligned in the json file, options:(lines, document)", + Value: &EnumValue{ + Enum: []string{"lines", "document"}, + Default: "lines", + ConditionFunction: func(str, target string) bool { + return strings.ToLower(target) == str + }, + }, + }, + &cli.StringFlag{ + Name: "compression", + Usage: "input compression format (options for AWS: GZIP or BZIP2)", + }, + }, sharedFlags...), + CustomHelpTemplate: selectHelpTemplate, + Before: beforeFunc, + Action: func(c *cli.Context) (err error) { + structure := c.String("structure") + cmd, err := buildSelect(c, "json", &structure) + if err != nil { + printError(cmd.fullCommand, c.Command.Name, err) + return err + } + return cmd.Run(c.Context) + }, + }, + { + Name: "parquet", + Usage: "run queries on parquet files", + Flags: sharedFlags, + CustomHelpTemplate: selectHelpTemplate, + Before: beforeFunc, + Action: func(c *cli.Context) (err error) { + cmd, err := buildSelect(c, "parquet", nil) + if err != nil { + printError(cmd.fullCommand, c.Command.Name, err) + return err + } + return cmd.Run(c.Context) + }, + }, + }, + Flags: append([]cli.Flag{ + &cli.StringFlag{ + Name: "compression", + Usage: "input compression format (options for AWS: GZIP or BZIP2)", + }, + }, sharedFlags...), + Before: func(c *cli.Context) (err error) { + if c.Args().Len() == 0 { + err = fmt.Errorf("expected source argument") + printError(commandFromContext(c), c.Command.Name, err) + return err + } + return nil + }, + Action: func(c *cli.Context) (err error) { + // default fallback + structure := "lines" + cmd, err := buildSelect(c, "json", &structure) + if err != nil { + printError(cmd.fullCommand, c.Command.Name, err) + return err + } + return cmd.Run(c.Context) + }, + CustomHelpTemplate: selectHelpTemplate, + } + cmd.BashComplete = getBashCompleteFn(cmd, true, false) + return cmd +} + +// Select holds select operation flags and states. +type Select struct { + src *url.URL + op string + fullCommand string + + query string + inputFormat string + compressionType string + inputStructure string + fileHeaderInfo string + outputFormat string + exclude []string + forceGlacierTransfer bool + ignoreGlacierWarnings bool + + // s3 options + storageOpts storage.Options +} + +// Run starts copying given source objects to destination. +func (s Select) Run(ctx context.Context) error { + client, err := storage.NewRemoteClient(ctx, s.src, s.storageOpts) + if err != nil { + printError(s.fullCommand, s.op, err) + return err + } + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + objch, err := expandSource(ctx, client, false, s.src) + if err != nil { + printError(s.fullCommand, s.op, err) + return err + } + + var ( + merrorWaiter error + merrorObjects error + ) + + waiter := parallel.NewWaiter() + errDoneCh := make(chan bool) + writeDoneCh := make(chan bool) + resultCh := make(chan json.RawMessage, 128) + + go func() { + defer close(errDoneCh) + for err := range waiter.Err() { + printError(s.fullCommand, s.op, err) + merrorWaiter = multierror.Append(merrorWaiter, err) + } + }() + + go func() { + defer close(writeDoneCh) + var fatalError error + for { + record, ok := <-resultCh + if !ok { + break + } + if fatalError != nil { + // Drain the channel. + continue + } + if _, err := os.Stdout.Write(append(record, '\n')); err != nil { + // Stop reading upstream. Notably useful for EPIPE. + cancel() + printError(s.fullCommand, s.op, err) + fatalError = err + } + } + }() + + excludePatterns, err := createRegexFromWildcard(s.exclude) + if err != nil { + printError(s.fullCommand, s.op, err) + return err + } + + for object := range objch { + if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) { + continue + } + + if err := object.Err; err != nil { + merrorObjects = multierror.Append(merrorObjects, err) + printError(s.fullCommand, s.op, err) + continue + } + + if object.StorageClass.IsGlacier() && !s.forceGlacierTransfer { + if !s.ignoreGlacierWarnings { + err := fmt.Errorf("object '%v' is on Glacier storage", object) + merrorObjects = multierror.Append(merrorObjects, err) + printError(s.fullCommand, s.op, err) + } + continue + } + + if isURLMatched(excludePatterns, object.URL.Path, s.src.Prefix) { + continue + } + + task := s.prepareTask(ctx, client, object.URL, resultCh) + parallel.Run(task, waiter) + + } + + waiter.Wait() + close(resultCh) + <-errDoneCh + <-writeDoneCh + + return multierror.Append(merrorWaiter, merrorObjects).ErrorOrNil() +} + +func (s Select) prepareTask(ctx context.Context, client *storage.S3, url *url.URL, resultCh chan<- json.RawMessage) func() error { + return func() error { + query := &storage.SelectQuery{ + ExpressionType: "SQL", + Expression: s.query, + InputFormat: s.inputFormat, + InputContentStructure: s.inputStructure, + FileHeaderInfo: s.fileHeaderInfo, + OutputFormat: s.outputFormat, + CompressionType: s.compressionType, + } + + return client.Select(ctx, url, query, resultCh) + } +} + +func validateSelectCommand(c *cli.Context) error { + if c.Args().Len() != 1 { + return fmt.Errorf("expected source argument") + } + + if err := checkVersioningFlagCompatibility(c); err != nil { + return err + } + + if err := checkVersioningWithGoogleEndpoint(c); err != nil { + return err + } + + srcurl, err := url.New( + c.Args().Get(0), + url.WithVersion(c.String("version-id")), + url.WithRaw(c.Bool("raw")), + url.WithAllVersions(c.Bool("all-versions")), + ) + + if err != nil { + return err + } + + if !srcurl.IsRemote() { + return fmt.Errorf("source must be remote") + } + + if c.String("query") == "" { + return fmt.Errorf("query must be non-empty") + } + + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/sync.go b/vendor/github.com/peak/s5cmd/v2/command/sync.go new file mode 100644 index 000000000..c848e7abf --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/sync.go @@ -0,0 +1,584 @@ +package command + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/hashicorp/go-multierror" + "github.com/lanrat/extsort" + "github.com/urfave/cli/v2" + + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/log/stat" + "github.com/peak/s5cmd/v2/parallel" + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +const ( + extsortChannelBufferSize = 1_000 + extsortChunkSize = 100_000 +) + +var syncHelpTemplate = `Name: + {{.HelpName}} - {{.Usage}} + +Usage: + {{.HelpName}} [options] source destination + +Options: + {{range .VisibleFlags}}{{.}} + {{end}} +Examples: + 01. Sync local folder to s3 bucket + > s5cmd {{.HelpName}} folder/ s3://bucket/ + + 02. Sync S3 bucket to local folder + > s5cmd {{.HelpName}} "s3://bucket/*" folder/ + + 03. Sync S3 bucket objects under prefix to S3 bucket. + > s5cmd {{.HelpName}} "s3://sourcebucket/prefix/*" s3://destbucket/ + + 04. Sync local folder to S3 but delete the files that S3 bucket has but local does not have. + > s5cmd {{.HelpName}} --delete folder/ s3://bucket/ + + 05. Sync S3 bucket to local folder but use size as only comparison criteria. + > s5cmd {{.HelpName}} --size-only "s3://bucket/*" folder/ + + 06. Sync a file to S3 bucket + > s5cmd {{.HelpName}} myfile.gz s3://bucket/ + + 07. Sync matching S3 objects to another bucket + > s5cmd {{.HelpName}} "s3://bucket/*.gz" s3://target-bucket/prefix/ + + 08. Perform KMS Server Side Encryption of the object(s) at the destination + > s5cmd {{.HelpName}} --sse aws:kms s3://bucket/object s3://target-bucket/prefix/object + + 09. Perform KMS-SSE of the object(s) at the destination using customer managed Customer Master Key (CMK) key id + > s5cmd {{.HelpName}} --sse aws:kms --sse-kms-key-id s3://bucket/object s3://target-bucket/prefix/object + + 10. Sync all files to S3 bucket but exclude the ones with txt and gz extension + > s5cmd {{.HelpName}} --exclude "*.txt" --exclude "*.gz" dir/ s3://bucket + + 11. Sync all files to S3 bucket but include the only ones with txt and gz extension + > s5cmd {{.HelpName}} --include "*.txt" --include "*.gz" dir/ s3://bucket +` + +func NewSyncCommandFlags() []cli.Flag { + syncFlags := []cli.Flag{ + &cli.BoolFlag{ + Name: "delete", + Usage: "delete objects in destination but not in source", + }, + &cli.BoolFlag{ + Name: "size-only", + Usage: "make size of object only criteria to decide whether an object should be synced", + }, + &cli.BoolFlag{ + Name: "exit-on-error", + Usage: "stops the sync process if an error is received", + }, + } + sharedFlags := NewSharedFlags() + return append(syncFlags, sharedFlags...) +} + +func NewSyncCommand() *cli.Command { + cmd := &cli.Command{ + Name: "sync", + HelpName: "sync", + Usage: "sync objects", + Flags: NewSyncCommandFlags(), + CustomHelpTemplate: syncHelpTemplate, + Before: func(c *cli.Context) error { + // sync command share same validation method as copy command + err := validateCopyCommand(c) + if err != nil { + printError(commandFromContext(c), c.Command.Name, err) + } + return err + }, + Action: func(c *cli.Context) (err error) { + defer stat.Collect(c.Command.FullName(), &err)() + + return NewSync(c).Run(c) + }, + } + + cmd.BashComplete = getBashCompleteFn(cmd, false, false) + return cmd +} + +type ObjectPair struct { + src, dst *storage.Object +} + +// Sync holds sync operation flags and states. +type Sync struct { + src string + dst string + op string + fullCommand string + + // flags + delete bool + sizeOnly bool + exitOnError bool + + // s3 options + storageOpts storage.Options + + followSymlinks bool + storageClass storage.StorageClass + raw bool + + srcRegion string + dstRegion string +} + +// NewSync creates Sync from cli.Context +func NewSync(c *cli.Context) Sync { + return Sync{ + src: c.Args().Get(0), + dst: c.Args().Get(1), + op: c.Command.Name, + fullCommand: commandFromContext(c), + + // flags + delete: c.Bool("delete"), + sizeOnly: c.Bool("size-only"), + exitOnError: c.Bool("exit-on-error"), + + // flags + followSymlinks: !c.Bool("no-follow-symlinks"), + storageClass: storage.StorageClass(c.String("storage-class")), + raw: c.Bool("raw"), + // region settings + srcRegion: c.String("source-region"), + dstRegion: c.String("destination-region"), + storageOpts: NewStorageOpts(c), + } +} + +// Run compares files, plans necessary s5cmd commands to execute +// and executes them in order to sync source to destination. +func (s Sync) Run(c *cli.Context) error { + srcurl, err := url.New(s.src, url.WithRaw(s.raw)) + if err != nil { + return err + } + + dsturl, err := url.New(s.dst, url.WithRaw(s.raw)) + if err != nil { + return err + } + + ctx, cancel := context.WithCancel(c.Context) + + sourceObjects, destObjects, err := s.getSourceAndDestinationObjects(ctx, cancel, srcurl, dsturl) + if err != nil { + printError(s.fullCommand, s.op, err) + return err + } + + isBatch := srcurl.IsWildcard() + if !isBatch && !srcurl.IsRemote() { + sourceClient, err := storage.NewClient(ctx, srcurl, s.storageOpts) + if err != nil { + return err + } + + obj, err := sourceClient.Stat(ctx, srcurl) + if err != nil { + return err + } + + isBatch = obj != nil && obj.Type.IsDir() + } + + onlySource, onlyDest, commonObjects := compareObjects(sourceObjects, destObjects) + + sourceObjects = nil + destObjects = nil + + waiter := parallel.NewWaiter() + var ( + merrorWaiter error + errDoneCh = make(chan bool) + ) + + go func() { + defer close(errDoneCh) + for err := range waiter.Err() { + if strings.Contains(err.Error(), "too many open files") { + fmt.Println(strings.TrimSpace(fdlimitWarning)) + fmt.Printf("ERROR %v\n", err) + + os.Exit(1) + } + printError(s.fullCommand, s.op, err) + merrorWaiter = multierror.Append(merrorWaiter, err) + } + }() + + strategy := NewStrategy(s.sizeOnly) // create comparison strategy. + pipeReader, pipeWriter := io.Pipe() // create a reader, writer pipe to pass commands to run + + // Create commands in background. + go s.planRun(c, onlySource, onlyDest, commonObjects, dsturl, strategy, pipeWriter, isBatch) + + err = NewRun(c, pipeReader).Run(ctx) + return multierror.Append(err, merrorWaiter).ErrorOrNil() +} + +// compareObjects compares source and destination objects. It assumes that +// sourceObjects and destObjects channels are already sorted in ascending order. +// Returns objects those in only source, only destination +// and both. +func compareObjects(sourceObjects, destObjects chan *storage.Object) (chan *url.URL, chan *url.URL, chan *ObjectPair) { + var ( + srcOnly = make(chan *url.URL, extsortChannelBufferSize) + dstOnly = make(chan *url.URL, extsortChannelBufferSize) + commonObj = make(chan *ObjectPair, extsortChannelBufferSize) + srcName string + dstName string + ) + + go func() { + src, srcOk := <-sourceObjects + dst, dstOk := <-destObjects + + defer close(srcOnly) + defer close(dstOnly) + defer close(commonObj) + + for { + if srcOk { + srcName = filepath.ToSlash(src.URL.Relative()) + } + if dstOk { + dstName = filepath.ToSlash(dst.URL.Relative()) + } + + if srcOk && dstOk { + if srcName < dstName { + srcOnly <- src.URL + src, srcOk = <-sourceObjects + } else if srcName == dstName { // if there is a match. + commonObj <- &ObjectPair{src: src, dst: dst} + src, srcOk = <-sourceObjects + dst, dstOk = <-destObjects + } else { + dstOnly <- dst.URL + dst, dstOk = <-destObjects + } + } else if srcOk { + srcOnly <- src.URL + src, srcOk = <-sourceObjects + } else if dstOk { + dstOnly <- dst.URL + dst, dstOk = <-destObjects + } else /* if !srcOK && !dstOk */ { + break + } + } + }() + + return srcOnly, dstOnly, commonObj +} + +// getSourceAndDestinationObjects returns source and destination objects from +// given URLs. The returned channels gives objects sorted in ascending order +// with respect to their url.Relative path. See also storage.Less. +func (s Sync) getSourceAndDestinationObjects(ctx context.Context, cancel context.CancelFunc, srcurl, dsturl *url.URL) (chan *storage.Object, chan *storage.Object, error) { + sourceClient, err := storage.NewClient(ctx, srcurl, s.storageOpts) + if err != nil { + return nil, nil, err + } + + destClient, err := storage.NewClient(ctx, dsturl, s.storageOpts) + if err != nil { + return nil, nil, err + } + + // add * to end of destination string, to get all objects recursively. + var destinationURLPath string + if strings.HasSuffix(s.dst, "/") { + destinationURLPath = s.dst + "*" + } else { + destinationURLPath = s.dst + "/*" + } + + destObjectsURL, err := url.New(destinationURLPath) + if err != nil { + return nil, nil, err + } + + var ( + sourceObjects = make(chan *storage.Object, extsortChannelBufferSize) + destObjects = make(chan *storage.Object, extsortChannelBufferSize) + ) + + extsortDefaultConfig := extsort.DefaultConfig() + extsortConfig := &extsort.Config{ + ChunkSize: extsortChunkSize, + NumWorkers: extsortDefaultConfig.NumWorkers, + ChanBuffSize: extsortChannelBufferSize, + SortedChanBuffSize: extsortChannelBufferSize, + } + extsortDefaultConfig = nil + + // get source objects. + go func() { + defer close(sourceObjects) + unfilteredSrcObjectChannel := sourceClient.List(ctx, srcurl, s.followSymlinks) + filteredSrcObjectChannel := make(chan extsort.SortType, extsortChannelBufferSize) + + go func() { + defer close(filteredSrcObjectChannel) + // filter and redirect objects + for st := range unfilteredSrcObjectChannel { + if st.Err != nil && s.shouldStopSync(st.Err) { + msg := log.ErrorMessage{ + Err: cleanupError(st.Err), + Command: s.fullCommand, + Operation: s.op, + } + log.Error(msg) + cancel() + } + if s.shouldSkipObject(st, true) { + continue + } + filteredSrcObjectChannel <- *st + } + }() + + var ( + sorter *extsort.SortTypeSorter + srcOutputChan chan extsort.SortType + ) + + sorter, srcOutputChan, srcErrCh := extsort.New(filteredSrcObjectChannel, storage.FromBytes, storage.Less, extsortConfig) + sorter.Sort(ctx) + + for srcObject := range srcOutputChan { + o := srcObject.(storage.Object) + sourceObjects <- &o + } + + // read and print the external sort errors + go func() { + for err := range srcErrCh { + printError(s.fullCommand, s.op, err) + } + }() + }() + + // get destination objects. + go func() { + defer close(destObjects) + unfilteredDestObjectsChannel := destClient.List(ctx, destObjectsURL, false) + filteredDstObjectChannel := make(chan extsort.SortType, extsortChannelBufferSize) + + go func() { + defer close(filteredDstObjectChannel) + // filter and redirect objects + for dt := range unfilteredDestObjectsChannel { + if dt.Err != nil && s.shouldStopSync(dt.Err) { + msg := log.ErrorMessage{ + Err: cleanupError(dt.Err), + Command: s.fullCommand, + Operation: s.op, + } + log.Error(msg) + cancel() + } + if s.shouldSkipObject(dt, false) { + continue + } + filteredDstObjectChannel <- *dt + } + }() + + var ( + dstSorter *extsort.SortTypeSorter + dstOutputChan chan extsort.SortType + ) + + dstSorter, dstOutputChan, dstErrCh := extsort.New(filteredDstObjectChannel, storage.FromBytes, storage.Less, extsortConfig) + dstSorter.Sort(ctx) + + for destObject := range dstOutputChan { + o := destObject.(storage.Object) + destObjects <- &o + } + + // read and print the external sort errors + go func() { + for err := range dstErrCh { + printError(s.fullCommand, s.op, err) + } + }() + }() + + return sourceObjects, destObjects, nil +} + +// planRun prepares the commands and writes them to writer 'w'. +func (s Sync) planRun( + c *cli.Context, + onlySource, onlyDest chan *url.URL, + common chan *ObjectPair, + dsturl *url.URL, + strategy SyncStrategy, + w io.WriteCloser, + isBatch bool, +) { + defer w.Close() + + // Always use raw mode since sync command generates commands + // from raw S3 objects. Otherwise, generated copy command will + // try to expand given source. + defaultFlags := map[string]interface{}{ + "raw": true, + } + + // it should wait until both of the child goroutines for onlySource and common channels + // are completed before closing the WriteCloser w to ensure that all URLs are processed. + var wg sync.WaitGroup + + // only in source + wg.Add(1) + go func() { + defer wg.Done() + for srcurl := range onlySource { + curDestURL := generateDestinationURL(srcurl, dsturl, isBatch) + command, err := generateCommand(c, "cp", defaultFlags, srcurl, curDestURL) + if err != nil { + printDebug(s.op, err, srcurl, curDestURL) + continue + } + fmt.Fprintln(w, command) + } + }() + + // both in source and destination + wg.Add(1) + go func() { + defer wg.Done() + for commonObject := range common { + sourceObject, destObject := commonObject.src, commonObject.dst + curSourceURL, curDestURL := sourceObject.URL, destObject.URL + err := strategy.ShouldSync(sourceObject, destObject) // check if object should be copied. + if err != nil { + printDebug(s.op, err, curSourceURL, curDestURL) + continue + } + + command, err := generateCommand(c, "cp", defaultFlags, curSourceURL, curDestURL) + if err != nil { + printDebug(s.op, err, curSourceURL, curDestURL) + continue + } + fmt.Fprintln(w, command) + } + }() + + // only in destination + wg.Add(1) + go func() { + defer wg.Done() + if s.delete { + // unfortunately we need to read them all! + // or rewrite generateCommand function? + dstURLs := make([]*url.URL, 0, extsortChunkSize) + + for d := range onlyDest { + dstURLs = append(dstURLs, d) + } + + if len(dstURLs) == 0 { + return + } + + command, err := generateCommand(c, "rm", defaultFlags, dstURLs...) + if err != nil { + printDebug(s.op, err, dstURLs...) + return + } + fmt.Fprintln(w, command) + } else { + // we only need to consume them from the channel so that rest of the objects + // can be sent to channel. + for d := range onlyDest { + _ = d + } + } + }() + + wg.Wait() +} + +// generateDestinationURL generates destination url for given +// source url if it would have been in destination. +func generateDestinationURL(srcurl, dsturl *url.URL, isBatch bool) *url.URL { + objname := srcurl.Base() + if isBatch { + objname = srcurl.Relative() + } + + if dsturl.IsRemote() { + if dsturl.IsPrefix() || dsturl.IsBucket() { + return dsturl.Join(objname) + } + return dsturl.Clone() + + } + + return dsturl.Join(objname) +} + +// shouldSkipObject checks is object should be skipped. +func (s Sync) shouldSkipObject(object *storage.Object, verbose bool) bool { + if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) { + return true + } + + if err := object.Err; err != nil { + if verbose { + printError(s.fullCommand, s.op, err) + } + return true + } + + if object.StorageClass.IsGlacier() { + if verbose { + err := fmt.Errorf("object '%v' is on Glacier storage", object) + printError(s.fullCommand, s.op, err) + } + return true + } + return false +} + +// shouldStopSync determines whether a sync process should be stopped or not. +func (s Sync) shouldStopSync(err error) bool { + if err == storage.ErrNoObjectFound { + return false + } + if awsErr, ok := err.(awserr.Error); ok { + switch awsErr.Code() { + case "AccessDenied", "NoSuchBucket": + return true + } + } + return s.exitOnError +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/sync_strategy.go b/vendor/github.com/peak/s5cmd/v2/command/sync_strategy.go new file mode 100644 index 000000000..fe6e948e9 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/sync_strategy.go @@ -0,0 +1,52 @@ +package command + +import ( + errorpkg "github.com/peak/s5cmd/v2/error" + "github.com/peak/s5cmd/v2/storage" +) + +// SyncStrategy is the interface to make decision whether given source object should be synced +// to destination object +type SyncStrategy interface { + ShouldSync(srcObject, dstObject *storage.Object) error +} + +func NewStrategy(sizeOnly bool) SyncStrategy { + if sizeOnly { + return &SizeOnlyStrategy{} + } else { + return &SizeAndModificationStrategy{} + } +} + +// SizeOnlyStrategy determines to sync based on objects' file sizes. +type SizeOnlyStrategy struct{} + +func (s *SizeOnlyStrategy) ShouldSync(srcObj, dstObj *storage.Object) error { + if srcObj.Size == dstObj.Size { + return errorpkg.ErrObjectSizesMatch + } + return nil +} + +// SizeAndModificationStrategy determines to sync based on objects' both sizes and modification times. +// It treats source object as the source-of-truth; +// +// time: src > dst size: src != dst should sync: yes +// time: src > dst size: src == dst should sync: yes +// time: src <= dst size: src != dst should sync: yes +// time: src <= dst size: src == dst should sync: no +type SizeAndModificationStrategy struct{} + +func (sm *SizeAndModificationStrategy) ShouldSync(srcObj, dstObj *storage.Object) error { + srcMod, dstMod := srcObj.ModTime, dstObj.ModTime + if srcMod.After(*dstMod) { + return nil + } + + if srcObj.Size != dstObj.Size { + return nil + } + + return errorpkg.ErrObjectIsNewerAndSizesMatch +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/validation.go b/vendor/github.com/peak/s5cmd/v2/command/validation.go new file mode 100644 index 000000000..6530fe82f --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/validation.go @@ -0,0 +1,75 @@ +package command + +import ( + "fmt" + urlpkg "net/url" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" + "github.com/urfave/cli/v2" +) + +const ( + versioningNotSupportedWarning = "versioning related features are not supported with the given endpoint %q" + allVersionsFlagName = "all-versions" + versionIDFlagName = "version-id" +) + +// checkVersinoningURLRemote checks if the versioning related flags are used with +// local objects. Because the versioning is only supported with s3. +func checkVersinoningURLRemote(url *url.URL) error { + if !url.IsRemote() && url.IsVersioned() { + return fmt.Errorf("%q, and %q flags can only be used with remote objects", allVersionsFlagName, versionIDFlagName) + } + return nil +} + +// checkVersioningFlagCompatibility checks if the incompatible versioning flags +// are used together. Because it is not allowed to refer to both "all versions" and +// a specific version of an object together. +func checkVersioningFlagCompatibility(ctx *cli.Context) error { + if ctx.Bool(allVersionsFlagName) && ctx.String(versionIDFlagName) != "" { + return fmt.Errorf("it is not allowed to combine %q and %q flags", allVersionsFlagName, versionIDFlagName) + } + return nil +} + +// checkVersioningWithGoogleEndpoint checks if the versioning flags are used with +// the Google Endpoint. Because the s3 versioning operations are not compatible with +// GCS's versioning API. +func checkVersioningWithGoogleEndpoint(ctx *cli.Context) error { + endpoint := ctx.String("endpoint-url") + if endpoint == "" { + return nil + } + + u, err := urlpkg.Parse(endpoint) + if err != nil { + return err + } + + if storage.IsGoogleEndpoint(*u) && (ctx.Bool(allVersionsFlagName) || ctx.String(versionIDFlagName) != "") { + return fmt.Errorf(versioningNotSupportedWarning, endpoint) + } + + return nil +} + +// checkNumberOfArguments checks if the number of the arguments is valid. +// if the max is negative then there is no upper limit of arguments. +func checkNumberOfArguments(ctx *cli.Context, min, max int) error { + l := ctx.Args().Len() + if min == 1 && max == 1 && l != 1 { + return fmt.Errorf("expected only one argument") + } + if min == 2 && max == 2 && l != 2 { + return fmt.Errorf("expected source and destination arguments") + } + if l < min { + return fmt.Errorf("expected at least %d arguments but was given %d: %q", min, l, ctx.Args().Slice()) + } + if max >= 0 && l > max { + return fmt.Errorf("expected at most %d arguments but was given %d: %q", min, l, ctx.Args().Slice()) + } + return nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/version.go b/vendor/github.com/peak/s5cmd/v2/command/version.go new file mode 100644 index 000000000..c71462d09 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/version.go @@ -0,0 +1,21 @@ +package command + +import ( + "fmt" + + "github.com/urfave/cli/v2" + + "github.com/peak/s5cmd/v2/version" +) + +func NewVersionCommand() *cli.Command { + return &cli.Command{ + Name: "version", + HelpName: "version", + Usage: "print version", + Action: func(c *cli.Context) error { + fmt.Println(version.GetHumanVersion()) + return nil + }, + } +} diff --git a/vendor/github.com/peak/s5cmd/v2/command/wildcard.go b/vendor/github.com/peak/s5cmd/v2/command/wildcard.go new file mode 100644 index 000000000..73ac239e7 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/command/wildcard.go @@ -0,0 +1,57 @@ +package command + +import ( + "path/filepath" + "regexp" + "strings" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/strutil" +) + +// createRegexFromWildcard creates regex strings from wildcard. +func createRegexFromWildcard(wildcards []string) ([]*regexp.Regexp, error) { + var result []*regexp.Regexp + for _, input := range wildcards { + if input != "" { + regex := strutil.WildCardToRegexp(input) + regex = strutil.MatchFromStartToEnd(regex) + regex = strutil.AddNewLineFlag(regex) + regexpCompiled, err := regexp.Compile(regex) + if err != nil { + return nil, err + } + result = append(result, regexpCompiled) + } + } + return result, nil +} + +func isURLMatched(regexPatterns []*regexp.Regexp, urlPath, sourcePrefix string) bool { + if len(regexPatterns) == 0 { + return false + } + if !strings.HasSuffix(sourcePrefix, "/") { + sourcePrefix += "/" + } + sourcePrefix = filepath.ToSlash(sourcePrefix) + for _, regexPattern := range regexPatterns { + if regexPattern.MatchString(strings.TrimPrefix(urlPath, sourcePrefix)) { + return true + } + } + return false +} + +func isObjectExcluded(object *storage.Object, excludePatterns []*regexp.Regexp, includePatterns []*regexp.Regexp, prefix string) (bool, error) { + if err := object.Err; err != nil { + return true, err + } + if len(excludePatterns) > 0 && isURLMatched(excludePatterns, object.URL.Path, prefix) { + return true, nil + } + if len(includePatterns) > 0 { + return !isURLMatched(includePatterns, object.URL.Path, prefix), nil + } + return false, nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/error/error.go b/vendor/github.com/peak/s5cmd/v2/error/error.go new file mode 100644 index 000000000..e6dddfd98 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/error/error.go @@ -0,0 +1,93 @@ +package error + +import ( + "context" + "errors" + "fmt" + + "github.com/hashicorp/go-multierror" + + "github.com/peak/s5cmd/v2/storage" + "github.com/peak/s5cmd/v2/storage/url" +) + +// Error is the type that implements error interface. +type Error struct { + // Op is the operation being performed, usually the name of the method + // being invoked (copy, move, etc.) + Op string + // Src is the source argument + Src *url.URL + // Dst is the destination argument + Dst *url.URL + // The underlying error if any + Err error +} + +// FullCommand returns the command string that occurred at. +func (e *Error) FullCommand() string { + return fmt.Sprintf("%v %v %v", e.Op, e.Src, e.Dst) +} + +// Error implements the error interface. +func (e *Error) Error() string { + return e.Err.Error() +} + +// Unwrap unwraps the error. +func (e *Error) Unwrap() error { + return e.Err +} + +// IsCancelation reports whether if given error is a cancelation error. +func IsCancelation(err error) bool { + if err == nil { + return false + } + + if errors.Is(err, context.Canceled) { + return true + } + + if storage.IsCancelationError(err) { + return true + } + + merr, ok := err.(*multierror.Error) + if !ok { + return false + } + + for _, err := range merr.Errors { + if IsCancelation(err) { + return true + } + } + + return false +} + +var ( + // ErrObjectExists indicates a specified object already exists. + ErrObjectExists = fmt.Errorf("object already exists") + + // ErrObjectIsNewer indicates a specified object is newer or same age. + ErrObjectIsNewer = fmt.Errorf("object is newer or same age") + + // ErrObjectSizesMatch indicates the sizes of objects match. + ErrObjectSizesMatch = fmt.Errorf("object size matches") + + // ErrObjectIsNewerAndSizesMatch indicates the specified object is newer or same age and sizes of objects match. + ErrObjectIsNewerAndSizesMatch = fmt.Errorf("%v and %v", ErrObjectIsNewer, ErrObjectSizesMatch) +) + +// IsWarning checks if given error is either ErrObjectExists, +// ErrObjectIsNewer or ErrObjectSizesMatch. +func IsWarning(err error) bool { + switch err { + case ErrObjectExists, ErrObjectIsNewer, ErrObjectSizesMatch, ErrObjectIsNewerAndSizesMatch: + return true + } + + return false +} diff --git a/vendor/github.com/peak/s5cmd/v2/log/log.go b/vendor/github.com/peak/s5cmd/v2/log/log.go new file mode 100644 index 000000000..5ec149ac9 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/log/log.go @@ -0,0 +1,154 @@ +package log + +import ( + "fmt" + "os" +) + +// output is an internal container for messages to be logged. +type output struct { + std *os.File + message string +} + +// outputCh is used to synchronize writes to standard output. Multi-line +// logging is not possible if all workers print logs at the same time. +var outputCh = make(chan output, 10000) + +var global *Logger + +// Init inits global logger. +func Init(level string, json bool) { + global = New(level, json) +} + +// Trace prints message in trace mode. +func Trace(msg Message) { + global.printf(LevelTrace, msg, os.Stdout) +} + +// Debug prints message in debug mode. +func Debug(msg Message) { + global.printf(LevelDebug, msg, os.Stdout) +} + +// Info prints message in info mode. +func Info(msg Message) { + global.printf(LevelInfo, msg, os.Stdout) +} + +// Stat prints stat message regardless of the log level with info print formatting. +// It uses printfHelper instead of printf to ignore the log level condition. +func Stat(msg Message) { + global.printfHelper(LevelInfo, msg, os.Stdout) +} + +// Error prints message in error mode. +func Error(msg Message) { + global.printf(LevelError, msg, os.Stderr) +} + +// Close closes logger and its channel. +func Close() { + if global != nil { + close(outputCh) + <-global.donech + } +} + +// Logger is a structure for logging messages. +type Logger struct { + donech chan struct{} + json bool + level LogLevel +} + +// New creates new logger. +func New(level string, json bool) *Logger { + logLevel := LevelFromString(level) + logger := &Logger{ + donech: make(chan struct{}), + json: json, + level: logLevel, + } + go logger.out() + return logger +} + +// printf prints message according to the given level, message and std mode. +func (l *Logger) printf(level LogLevel, message Message, std *os.File) { + if level < l.level { + return + } + l.printfHelper(level, message, std) +} + +func (l *Logger) printfHelper(level LogLevel, message Message, std *os.File) { + if l.json { + outputCh <- output{ + message: message.JSON(), + std: std, + } + } else { + outputCh <- output{ + message: fmt.Sprintf("%v%v", level, message.String()), + std: std, + } + } +} + +// out listens for outputCh and logs messages. +func (l *Logger) out() { + defer close(l.donech) + + for output := range outputCh { + _, _ = fmt.Fprintln(output.std, output.message) + } +} + +// LogLevel is the level of Logger. +type LogLevel int + +const ( + LevelTrace LogLevel = iota + LevelDebug + LevelInfo + LevelError +) + +// String returns the string representation of logLevel. +func (l LogLevel) String() string { + switch l { + case LevelInfo: + return "" + case LevelError: + return "ERROR " + case LevelDebug: + return "DEBUG " + case LevelTrace: + // levelTrace is used for printing aws sdk logs and + // aws-sdk-go already adds "DEBUG" prefix to logs. + // So do not add another prefix to log which makes it + // look weird. + return "" + default: + return "UNKNOWN " + } +} + +// LevelFromString returns logLevel for given string. It +// return `levelInfo` as a default. +func LevelFromString(s string) LogLevel { + switch s { + case "debug": + return LevelDebug + case "info": + return LevelInfo + case "error": + return LevelError + case "trace": + return LevelTrace + default: + return LevelInfo + } +} diff --git a/vendor/github.com/peak/s5cmd/v2/log/message.go b/vendor/github.com/peak/s5cmd/v2/log/message.go new file mode 100644 index 000000000..df485a98b --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/log/message.go @@ -0,0 +1,102 @@ +package log + +import ( + "fmt" + + "github.com/peak/s5cmd/v2/storage/url" + "github.com/peak/s5cmd/v2/strutil" +) + +// Message is an interface to print structured logs. +type Message interface { + fmt.Stringer + JSON() string +} + +// InfoMessage is a generic message structure for successful operations. +type InfoMessage struct { + Operation string `json:"operation"` + Success bool `json:"success"` + Source *url.URL `json:"source,omitempty"` + Destination *url.URL `json:"destination,omitempty"` + Object Message `json:"object,omitempty"` + + // the VersionID field exist only for JSON Marshall, it must not be used for + // any other purpose. + VersionID string `json:"version_id,omitempty"` +} + +// String is the string representation of InfoMessage. +func (i InfoMessage) String() string { + if i.Source != nil && i.Destination != nil { + return fmt.Sprintf("%v %v %v", i.Operation, i.Source, i.Destination) + } + if i.Source != nil && i.Source.VersionID != "" { + return fmt.Sprintf("%v %-50v %v", i.Operation, i.Source, i.Source.VersionID) + } + if i.Destination != nil { + return fmt.Sprintf("%v %v", i.Operation, i.Destination) + } + return fmt.Sprintf("%v %v", i.Operation, i.Source) +} + +// JSON is the JSON representation of InfoMessage. +func (i InfoMessage) JSON() string { + if i.Destination == nil && i.Source != nil { + i.VersionID = i.Source.VersionID + } + i.Success = true + return strutil.JSON(i) +} + +// ErrorMessage is a generic message structure for unsuccessful operations. +type ErrorMessage struct { + Operation string `json:"operation,omitempty"` + Command string `json:"command,omitempty"` + Err string `json:"error"` +} + +// String is the string representation of ErrorMessage. +func (e ErrorMessage) String() string { + if e.Command == "" { + return fmt.Sprint(e.Err) + } + return fmt.Sprintf("%q: %v", e.Command, e.Err) +} + +// JSON is the JSON representation of ErrorMessage. +func (e ErrorMessage) JSON() string { + return strutil.JSON(e) +} + +// DebugMessage is a generic message structure for unsuccessful operations. +type DebugMessage struct { + Operation string `json:"operation,omitempty"` + Command string `json:"job,omitempty"` + Err string `json:"error"` +} + +type TraceMessage struct { + Message string `json:"message"` +} + +func (t TraceMessage) String() string { + return t.Message +} + +func (t TraceMessage) JSON() string { + return strutil.JSON(t) +} + +// String is the string representation of ErrorMessage. +func (d DebugMessage) String() string { + if d.Command == "" { + return d.Err + } + return fmt.Sprintf("%q: %v", d.Command, d.Err) +} + +// JSON is the JSON representation of ErrorMessage. +func (d DebugMessage) JSON() string { + return strutil.JSON(d) +} diff --git a/vendor/github.com/peak/s5cmd/v2/log/stat/stat.go b/vendor/github.com/peak/s5cmd/v2/log/stat/stat.go new file mode 100644 index 000000000..5e0ed4922 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/log/stat/stat.go @@ -0,0 +1,112 @@ +package stat + +import ( + "bytes" + "fmt" + "strings" + "sync" + "text/tabwriter" + + "github.com/peak/s5cmd/v2/strutil" +) + +const ( + totalCount = iota + succCount +) + +var ( + enabled bool + stats statistics +) + +type statistics [2]syncMapStrInt64 + +// InitStat initializes collecting program statistics. +func InitStat() { + enabled = true + for i := range stats { + stats[i] = syncMapStrInt64{ + Mutex: sync.Mutex{}, + mapStrInt64: map[string]int64{}, + } + } +} + +// syncMapStrInt64 is a statically typed and synchronized map. +type syncMapStrInt64 struct { + sync.Mutex + mapStrInt64 map[string]int64 +} + +func (s *syncMapStrInt64) add(key string, val int64) { + s.Lock() + defer s.Unlock() + + s.mapStrInt64[key] += val +} + +// Stat is for storing a particular statistics. +type Stat struct { + Operation string `json:"operation"` + Success int64 `json:"success"` + Error int64 `json:"error"` +} + +// Collect collects function execution data. +func Collect(op string, err *error) func() { + return func() { + if !enabled { + return + } + if err == nil || *err == nil { + stats[succCount].add(op, 1) + } + stats[totalCount].add(op, 1) + } +} + +// Stats implements log.Message interface. +type Stats []Stat + +func (s Stats) String() string { + var buf bytes.Buffer + + w := tabwriter.NewWriter(&buf, 0, 8, 1, '\t', tabwriter.AlignRight) + + fmt.Fprintf(w, "\n%s\t%s\t%s\t%s\t\n", "Operation", "Total", "Error", "Success") + for _, stat := range s { + fmt.Fprintf(w, "%s\t%d\t%d\t%d\t\n", stat.Operation, stat.Error+stat.Success, stat.Error, stat.Success) + } + + w.Flush() + return buf.String() +} + +func (s Stats) JSON() string { + var builder strings.Builder + + for _, stat := range s { + builder.WriteString(strutil.JSON(stat) + "\n") + } + return builder.String() +} + +// Statistics will return statistics that has been collected so far. +func Statistics() Stats { + if !enabled { + return Stats{} + } + + var result Stats + for op, total := range stats[totalCount].mapStrInt64 { + success := stats[succCount].mapStrInt64[op] + + result = append(result, Stat{ + Operation: op, + Success: success, + Error: total - success, + }) + } + return result +} diff --git a/vendor/github.com/peak/s5cmd/v2/orderedwriter/orderedwriter.go b/vendor/github.com/peak/s5cmd/v2/orderedwriter/orderedwriter.go new file mode 100644 index 000000000..48336d46c --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/orderedwriter/orderedwriter.go @@ -0,0 +1,113 @@ +// Package orderedwriter implements an unbounded buffer for ordering concurrent writes for +// non-seekable writers. It keeps an internal linked list that keeps the chunks in order +// and flushes buffered chunks when the expected offset is available. +package orderedwriter + +import ( + "container/list" + "io" + "sync" +) + +type chunk struct { + offset int64 + value []byte +} + +type OrderedWriterAt struct { + mu *sync.Mutex + list *list.List + w io.Writer + written int64 +} + +func New(w io.Writer) *OrderedWriterAt { + return &OrderedWriterAt{ + mu: &sync.Mutex{}, + list: list.New(), + w: w, + written: 0, + } +} + +func (w *OrderedWriterAt) WriteAt(p []byte, offset int64) (int, error) { + w.mu.Lock() + defer w.mu.Unlock() + + // If the queue is empty and the chunk is writeable, push it without queueing. + if w.list.Front() == nil && w.written == offset { + n, err := w.w.Write(p) + if err != nil { + return n, err + } + w.written += int64(n) + return len(p), nil + } + + // Copy the chunk, buffered writers can modify + // the slice before we consume them. + b := make([]byte, len(p)) + copy(b, p) + + // If there are no items in the list and we can't write + // directly push back and return early. + if w.list.Front() == nil { + w.list.PushBack(&chunk{ + offset: offset, + value: b, + }) + return len(p), nil + } + + // Traverse the list from the beginning and insert + // it to the smallest index possible. That is, + // compare the element's offset with the offset + // that you want to buffer. + var inserted bool + for e := w.list.Front(); e != nil; e = e.Next() { + v, _ := e.Value.(*chunk) + if offset < v.offset { + w.list.InsertBefore(&chunk{ + offset: offset, + value: b, + }, e) + inserted = true + break + } + } + + // If the chunk haven't been inserted, put it at + // the end of the buffer. + if !inserted { + w.list.PushBack(&chunk{ + offset: offset, + value: b, + }) + } + + // If the expected offset is buffered, + // flush the items that you can. + var removeList []*list.Element + for e := w.list.Front(); e != nil; e = e.Next() { + v, _ := e.Value.(*chunk) + if v.offset != w.written { + break + } + + n, err := w.w.Write(v.value) + if err != nil { + return n, err + } + + removeList = append(removeList, e) + w.written += int64(n) + + } + + // Remove the items that have been written. + for _, e := range removeList { + w.list.Remove(e) + } + + return len(p), nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/parallel/fdlimit/fdlimit_unix.go b/vendor/github.com/peak/s5cmd/v2/parallel/fdlimit/fdlimit_unix.go new file mode 100644 index 000000000..538805797 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/parallel/fdlimit/fdlimit_unix.go @@ -0,0 +1,32 @@ +//go:build !windows +// +build !windows + +package fdlimit + +import ( + "syscall" +) + +const ( + minOpenFilesLimit = 1024 +) + +func Raise() error { + var rLimit syscall.Rlimit + err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit) + if err != nil { + return err + } + + if rLimit.Cur >= minOpenFilesLimit { + return nil + } + + if rLimit.Max < minOpenFilesLimit { + return nil + } + + rLimit.Cur = minOpenFilesLimit + + return syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rLimit) +} diff --git a/vendor/github.com/peak/s5cmd/v2/parallel/fdlimit/fdlimit_windows.go b/vendor/github.com/peak/s5cmd/v2/parallel/fdlimit/fdlimit_windows.go new file mode 100644 index 000000000..fe6ac0a81 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/parallel/fdlimit/fdlimit_windows.go @@ -0,0 +1,6 @@ +//go:build windows +// +build windows + +package fdlimit + +func Raise() error { return nil } diff --git a/vendor/github.com/peak/s5cmd/v2/parallel/global.go b/vendor/github.com/peak/s5cmd/v2/parallel/global.go new file mode 100644 index 000000000..61d117eff --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/parallel/global.go @@ -0,0 +1,23 @@ +package parallel + +import "github.com/peak/s5cmd/v2/parallel/fdlimit" + +var global *Manager + +// Init tries to increase the soft limit of open files and +// creates new global ParallelManager. +func Init(workercount int) { + _ = fdlimit.Raise() + global = New(workercount) +} + +// Close waits all jobs to finish and +// closes the semaphore of global ParallelManager. +func Close() { + if global != nil { + global.Close() + } +} + +// Run runs global ParallelManager. +func Run(task Task, waiter *Waiter) { global.Run(task, waiter) } diff --git a/vendor/github.com/peak/s5cmd/v2/parallel/parallel.go b/vendor/github.com/peak/s5cmd/v2/parallel/parallel.go new file mode 100644 index 000000000..40bd07d31 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/parallel/parallel.go @@ -0,0 +1,93 @@ +package parallel + +import ( + "runtime" + "sync" +) + +const ( + minNumWorkers = 2 +) + +// Task is a function type for parallel manager. +type Task func() error + +// Manager is a structure for running tasks in parallel. +type Manager struct { + wg *sync.WaitGroup + semaphore chan bool +} + +// New creates a new parallel.Manager. +func New(workercount int) *Manager { + if workercount < 0 { + workercount = runtime.NumCPU() * -workercount + } + + if workercount < minNumWorkers { + workercount = minNumWorkers + } + + return &Manager{ + wg: &sync.WaitGroup{}, + semaphore: make(chan bool, workercount), + } +} + +// acquire limits concurrency by trying to acquire the semaphore. +func (p *Manager) acquire() { + p.semaphore <- true + p.wg.Add(1) +} + +// release releases the acquired semaphore to signal that a task is finished. +func (p *Manager) release() { + p.wg.Done() + <-p.semaphore +} + +// Run runs the given task while limiting the concurrency. +func (p *Manager) Run(fn Task, waiter *Waiter) { + waiter.wg.Add(1) + p.acquire() + go func() { + defer waiter.wg.Done() + defer p.release() + + if err := fn(); err != nil { + waiter.errch <- err + } + }() +} + +// Close waits all tasks to finish. +func (p *Manager) Close() { + p.wg.Wait() + close(p.semaphore) +} + +// Waiter is a structure for waiting and reading +// error messages created by Manager. +type Waiter struct { + wg sync.WaitGroup + errch chan error +} + +// NewWaiter creates a new parallel.Waiter. +func NewWaiter() *Waiter { + return &Waiter{ + errch: make(chan error), + } +} + +// Wait blocks until the WaitGroup counter is zero +// and closes error channel. +func (w *Waiter) Wait() { + w.wg.Wait() + close(w.errch) +} + +// Err returns read-only error channel. +func (w *Waiter) Err() <-chan error { + return w.errch +} diff --git a/vendor/github.com/peak/s5cmd/v2/progressbar/progressbar.go b/vendor/github.com/peak/s5cmd/v2/progressbar/progressbar.go new file mode 100644 index 000000000..d8752d47d --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/progressbar/progressbar.go @@ -0,0 +1,78 @@ +package progressbar + +import ( + "fmt" + "sync/atomic" + + "github.com/cheggaaa/pb/v3" +) + +type ProgressBar interface { + Start() + Finish() + IncrementCompletedObjects() + IncrementTotalObjects() + AddCompletedBytes(bytes int64) + AddTotalBytes(bytes int64) +} + +type NoOp struct{} + +func (pb *NoOp) Start() {} + +func (pb *NoOp) Finish() {} + +func (pb *NoOp) IncrementCompletedObjects() {} + +func (pb *NoOp) IncrementTotalObjects() {} + +func (pb *NoOp) AddCompletedBytes(bytes int64) {} + +func (pb *NoOp) AddTotalBytes(bytes int64) {} + +type CommandProgressBar struct { + totalObjects int64 + completedObjects int64 + progressbar *pb.ProgressBar +} + +var _ ProgressBar = (*CommandProgressBar)(nil) + +const progressbarTemplate = `{{percent . | green}} {{bar . " " "━" "━" "─" " " | green}} {{counters . | green}} {{speed . "(%s/s)" | red}} {{rtime . "%s left" | blue}} {{ string . "objects" | yellow}}` + +func New() *CommandProgressBar { + return &CommandProgressBar{ + progressbar: pb.New64(0). + Set(pb.Bytes, true). + Set(pb.SIBytesPrefix, true). + SetWidth(128). + Set("objects", fmt.Sprintf("(%d/%d)", 0, 0)). + SetTemplateString(progressbarTemplate), + } +} + +func (cp *CommandProgressBar) Start() { + cp.progressbar.Start() +} + +func (cp *CommandProgressBar) Finish() { + cp.progressbar.Finish() +} + +func (cp *CommandProgressBar) IncrementCompletedObjects() { + atomic.AddInt64(&cp.completedObjects, 1) + cp.progressbar.Set("objects", fmt.Sprintf("(%d/%d)", cp.completedObjects, cp.totalObjects)) +} + +func (cp *CommandProgressBar) IncrementTotalObjects() { + atomic.AddInt64(&cp.totalObjects, 1) + cp.progressbar.Set("objects", fmt.Sprintf("(%d/%d)", cp.completedObjects, cp.totalObjects)) +} + +func (cp *CommandProgressBar) AddCompletedBytes(bytes int64) { + cp.progressbar.Add64(bytes) +} + +func (cp *CommandProgressBar) AddTotalBytes(bytes int64) { + cp.progressbar.AddTotal(bytes) +} diff --git a/vendor/github.com/peak/s5cmd/v2/storage/fs.go b/vendor/github.com/peak/s5cmd/v2/storage/fs.go new file mode 100644 index 000000000..6c192407d --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/storage/fs.go @@ -0,0 +1,269 @@ +package storage + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/karrick/godirwalk" + "github.com/termie/go-shutil" + + "github.com/peak/s5cmd/v2/storage/url" +) + +// Filesystem is the Storage implementation of a local filesystem. +type Filesystem struct { + dryRun bool +} + +// Stat returns the Object structure describing object. +func (f *Filesystem) Stat(ctx context.Context, url *url.URL) (*Object, error) { + st, err := os.Stat(url.Absolute()) + if err != nil { + if os.IsNotExist(err) { + return nil, &ErrGivenObjectNotFound{ObjectAbsPath: url.Absolute()} + } + return nil, err + } + + mod := st.ModTime() + return &Object{ + URL: url, + Type: ObjectType{st.Mode()}, + Size: st.Size(), + ModTime: &mod, + Etag: "", + }, nil +} + +// List returns the objects and directories reside in given src. +func (f *Filesystem) List(ctx context.Context, src *url.URL, followSymlinks bool) <-chan *Object { + if src.IsWildcard() { + return f.expandGlob(ctx, src, followSymlinks) + } + + obj, err := f.Stat(ctx, src) + + isDir := err == nil && obj.Type.IsDir() + + if isDir { + return f.walkDir(ctx, src, followSymlinks) + } + + return f.listSingleObject(ctx, src) +} + +func (f *Filesystem) listSingleObject(ctx context.Context, src *url.URL) <-chan *Object { + ch := make(chan *Object, 1) + defer close(ch) + + object, err := f.Stat(ctx, src) + if err != nil { + object = &Object{Err: err} + } + ch <- object + return ch +} + +func (f *Filesystem) expandGlob(ctx context.Context, src *url.URL, followSymlinks bool) <-chan *Object { + ch := make(chan *Object) + + go func() { + defer close(ch) + + matchedFiles, err := filepath.Glob(src.Absolute()) + if err != nil { + sendError(ctx, err, ch) + return + } + if len(matchedFiles) == 0 { + err := fmt.Errorf("no match found for %q", src) + sendError(ctx, err, ch) + return + } + + for _, filename := range matchedFiles { + filename := filename + + fileurl, err := url.New(filename) + if err != nil { + sendError(ctx, err, ch) + return + } + + fileurl.SetRelative(src) + + obj, err := f.Stat(ctx, fileurl) + if err != nil { + sendError(ctx, err, ch) + return + } + + if !obj.Type.IsDir() { + sendObject(ctx, obj, ch) + continue + } + + walkDir(ctx, f, fileurl, followSymlinks, func(obj *Object) { + sendObject(ctx, obj, ch) + }) + } + }() + return ch +} + +func walkDir(ctx context.Context, fs *Filesystem, src *url.URL, followSymlinks bool, fn func(o *Object)) { + //skip if symlink is pointing to a dir and --no-follow-symlink + if !ShouldProcessURL(src, followSymlinks) { + return + } + err := godirwalk.Walk(src.Absolute(), &godirwalk.Options{ + Callback: func(pathname string, dirent *godirwalk.Dirent) error { + // we're interested in files + if dirent.IsDir() { + return nil + } + + fileurl, err := url.New(pathname) + if err != nil { + return err + } + + fileurl.SetRelative(src) + + //skip if symlink is pointing to a file and --no-follow-symlink + if !ShouldProcessURL(fileurl, followSymlinks) { + return nil + } + + obj, err := fs.Stat(ctx, fileurl) + if err != nil { + return err + } + + fn(obj) + return nil + }, + FollowSymbolicLinks: followSymlinks, + }) + if err != nil { + obj := &Object{Err: err} + fn(obj) + } +} + +func (f *Filesystem) walkDir(ctx context.Context, src *url.URL, followSymlinks bool) <-chan *Object { + ch := make(chan *Object) + go func() { + defer close(ch) + + walkDir(ctx, f, src, followSymlinks, func(obj *Object) { + sendObject(ctx, obj, ch) + }) + }() + return ch +} + +// Copy copies given source to destination. +func (f *Filesystem) Copy(ctx context.Context, src, dst *url.URL, _ Metadata) error { + if f.dryRun { + return nil + } + + if err := os.MkdirAll(dst.Dir(), os.ModePerm); err != nil { + return err + } + _, err := shutil.Copy(src.Absolute(), dst.Absolute(), true) + return err +} + +// Delete deletes given file. +func (f *Filesystem) Delete(ctx context.Context, url *url.URL) error { + if f.dryRun { + return nil + } + + return os.Remove(url.Absolute()) +} + +// MultiDelete deletes all files returned from given channel. +func (f *Filesystem) MultiDelete(ctx context.Context, urlch <-chan *url.URL) <-chan *Object { + resultch := make(chan *Object) + go func() { + defer close(resultch) + + for url := range urlch { + err := f.Delete(ctx, url) + obj := &Object{ + URL: url, + Err: err, + } + resultch <- obj + } + }() + return resultch +} + +// MkdirAll calls os.MkdirAll. +func (f *Filesystem) MkdirAll(path string) error { + if f.dryRun { + return nil + } + return os.MkdirAll(path, os.ModePerm) +} + +// Create creates a new os.File. +func (f *Filesystem) Create(path string) (*os.File, error) { + if f.dryRun { + return &os.File{}, nil + } + + return os.Create(path) +} + +// Open opens the given source. +func (f *Filesystem) Open(path string) (*os.File, error) { + file, err := os.OpenFile(path, os.O_RDONLY, 0644) + if err != nil { + return nil, err + } + + return file, nil +} + +// CreateTemp creates a new temporary file +func (f *Filesystem) CreateTemp(dir, pattern string) (*os.File, error) { + if f.dryRun { + return &os.File{}, nil + } + + file, err := os.CreateTemp(dir, pattern) + if err != nil { + return nil, err + } + + err = file.Chmod(0644) + return file, err +} + +// Rename a file +func (f *Filesystem) Rename(file *os.File, newpath string) error { + if f.dryRun { + return nil + } + + return os.Rename(file.Name(), newpath) +} + +func sendObject(ctx context.Context, obj *Object, ch chan *Object) { + select { + case <-ctx.Done(): + case ch <- obj: + } +} + +func sendError(ctx context.Context, err error, ch chan *Object) { + obj := &Object{Err: err} + sendObject(ctx, obj, ch) +} diff --git a/vendor/github.com/peak/s5cmd/v2/storage/mock_storage.go b/vendor/github.com/peak/s5cmd/v2/storage/mock_storage.go new file mode 100644 index 000000000..921beb306 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/storage/mock_storage.go @@ -0,0 +1,107 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: storage.go + +// Package storage is a generated GoMock package. +package storage + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + url "github.com/peak/s5cmd/v2/storage/url" +) + +// MockStorage is a mock of Storage interface. +type MockStorage struct { + ctrl *gomock.Controller + recorder *MockStorageMockRecorder +} + +// MockStorageMockRecorder is the mock recorder for MockStorage. +type MockStorageMockRecorder struct { + mock *MockStorage +} + +// NewMockStorage creates a new mock instance. +func NewMockStorage(ctrl *gomock.Controller) *MockStorage { + mock := &MockStorage{ctrl: ctrl} + mock.recorder = &MockStorageMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockStorage) EXPECT() *MockStorageMockRecorder { + return m.recorder +} + +// Copy mocks base method. +func (m *MockStorage) Copy(ctx context.Context, src, dst *url.URL, metadata Metadata) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Copy", ctx, src, dst, metadata) + ret0, _ := ret[0].(error) + return ret0 +} + +// Copy indicates an expected call of Copy. +func (mr *MockStorageMockRecorder) Copy(ctx, src, dst, metadata interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Copy", reflect.TypeOf((*MockStorage)(nil).Copy), ctx, src, dst, metadata) +} + +// Delete mocks base method. +func (m *MockStorage) Delete(ctx context.Context, src *url.URL) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Delete", ctx, src) + ret0, _ := ret[0].(error) + return ret0 +} + +// Delete indicates an expected call of Delete. +func (mr *MockStorageMockRecorder) Delete(ctx, src interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Delete", reflect.TypeOf((*MockStorage)(nil).Delete), ctx, src) +} + +// List mocks base method. +func (m *MockStorage) List(ctx context.Context, src *url.URL, followSymlinks bool) <-chan *Object { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "List", ctx, src, followSymlinks) + ret0, _ := ret[0].(<-chan *Object) + return ret0 +} + +// List indicates an expected call of List. +func (mr *MockStorageMockRecorder) List(ctx, src, followSymlinks interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "List", reflect.TypeOf((*MockStorage)(nil).List), ctx, src, followSymlinks) +} + +// MultiDelete mocks base method. +func (m *MockStorage) MultiDelete(ctx context.Context, urls <-chan *url.URL) <-chan *Object { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "MultiDelete", ctx, urls) + ret0, _ := ret[0].(<-chan *Object) + return ret0 +} + +// MultiDelete indicates an expected call of MultiDelete. +func (mr *MockStorageMockRecorder) MultiDelete(ctx, urls interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MultiDelete", reflect.TypeOf((*MockStorage)(nil).MultiDelete), ctx, urls) +} + +// Stat mocks base method. +func (m *MockStorage) Stat(ctx context.Context, src *url.URL) (*Object, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Stat", ctx, src) + ret0, _ := ret[0].(*Object) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Stat indicates an expected call of Stat. +func (mr *MockStorageMockRecorder) Stat(ctx, src interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Stat", reflect.TypeOf((*MockStorage)(nil).Stat), ctx, src) +} diff --git a/vendor/github.com/peak/s5cmd/v2/storage/s3.go b/vendor/github.com/peak/s5cmd/v2/storage/s3.go new file mode 100644 index 000000000..5676c0e57 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/storage/s3.go @@ -0,0 +1,1485 @@ +package storage + +import ( + "context" + "crypto/rand" + "crypto/tls" + "encoding/csv" + "encoding/json" + "errors" + "fmt" + "io" + "math" + "math/big" + "net/http" + urlpkg "net/url" + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/client" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/endpoints" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/aws/aws-sdk-go/service/s3/s3iface" + "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/aws/aws-sdk-go/service/s3/s3manager/s3manageriface" + + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/storage/url" +) + +var sentinelURL = urlpkg.URL{} + +const ( + // deleteObjectsMax is the max allowed objects to be deleted on single HTTP + // request. + deleteObjectsMax = 1000 + + // Amazon Accelerated Transfer endpoint + transferAccelEndpoint = "s3-accelerate.amazonaws.com" + + // Google Cloud Storage endpoint + gcsEndpoint = "storage.googleapis.com" + + // the key of the object metadata which is used to handle retry decision on NoSuchUpload error + metadataKeyRetryID = "s5cmd-upload-retry-id" +) + +// Re-used AWS sessions dramatically improve performance. +var globalSessionCache = &SessionCache{ + sessions: map[Options]*session.Session{}, +} + +// S3 is a storage type which interacts with S3API, DownloaderAPI and +// UploaderAPI. +type S3 struct { + api s3iface.S3API + downloader s3manageriface.DownloaderAPI + uploader s3manageriface.UploaderAPI + endpointURL urlpkg.URL + dryRun bool + useListObjectsV1 bool + noSuchUploadRetryCount int + requestPayer string +} + +func (s *S3) RequestPayer() *string { + if s.requestPayer == "" { + return nil + } + return &s.requestPayer +} + +func parseEndpoint(endpoint string) (urlpkg.URL, error) { + if endpoint == "" { + return sentinelURL, nil + } + + u, err := urlpkg.Parse(endpoint) + if err != nil { + return sentinelURL, fmt.Errorf("parse endpoint %q: %v", endpoint, err) + } + + return *u, nil +} + +// NewS3Storage creates new S3 session. +func newS3Storage(ctx context.Context, opts Options) (*S3, error) { + endpointURL, err := parseEndpoint(opts.Endpoint) + if err != nil { + return nil, err + } + + awsSession, err := globalSessionCache.newSession(ctx, opts) + if err != nil { + return nil, err + } + + return &S3{ + api: s3.New(awsSession), + downloader: s3manager.NewDownloader(awsSession), + uploader: s3manager.NewUploader(awsSession), + endpointURL: endpointURL, + dryRun: opts.DryRun, + useListObjectsV1: opts.UseListObjectsV1, + requestPayer: opts.RequestPayer, + noSuchUploadRetryCount: opts.NoSuchUploadRetryCount, + }, nil +} + +// Stat retrieves metadata from S3 object without returning the object itself. +func (s *S3) Stat(ctx context.Context, url *url.URL) (*Object, error) { + input := &s3.HeadObjectInput{ + Bucket: aws.String(url.Bucket), + Key: aws.String(url.Path), + RequestPayer: s.RequestPayer(), + } + if url.VersionID != "" { + input.SetVersionId(url.VersionID) + } + + output, err := s.api.HeadObjectWithContext(ctx, input) + if err != nil { + if errHasCode(err, "NotFound") { + return nil, &ErrGivenObjectNotFound{ObjectAbsPath: url.Absolute()} + } + return nil, err + } + + etag := aws.StringValue(output.ETag) + mod := aws.TimeValue(output.LastModified) + + obj := &Object{ + URL: url, + Etag: strings.Trim(etag, `"`), + ModTime: &mod, + Size: aws.Int64Value(output.ContentLength), + } + + if s.noSuchUploadRetryCount > 0 { + if retryID, ok := output.Metadata[metadataKeyRetryID]; ok { + obj.retryID = *retryID + } + } + + return obj, nil +} + +// List is a non-blocking S3 list operation which paginates and filters S3 +// keys. If no object found or an error is encountered during this period, +// it sends these errors to object channel. +func (s *S3) List(ctx context.Context, url *url.URL, _ bool) <-chan *Object { + if url.VersionID != "" || url.AllVersions { + return s.listObjectVersions(ctx, url) + } + if s.useListObjectsV1 { + return s.listObjects(ctx, url) + } + + return s.listObjectsV2(ctx, url) +} + +func (s *S3) listObjectVersions(ctx context.Context, url *url.URL) <-chan *Object { + listInput := s3.ListObjectVersionsInput{ + Bucket: aws.String(url.Bucket), + Prefix: aws.String(url.Prefix), + } + + if url.Delimiter != "" { + listInput.SetDelimiter(url.Delimiter) + } + + objCh := make(chan *Object) + + go func() { + defer close(objCh) + objectFound := false + + var now time.Time + + err := s.api.ListObjectVersionsPagesWithContext(ctx, &listInput, + func(p *s3.ListObjectVersionsOutput, lastPage bool) bool { + for _, c := range p.CommonPrefixes { + prefix := aws.StringValue(c.Prefix) + if !url.Match(prefix) { + continue + } + + newurl := url.Clone() + newurl.Path = prefix + objCh <- &Object{ + URL: newurl, + Type: ObjectType{os.ModeDir}, + } + + objectFound = true + } + // track the instant object iteration began, + // so it can be used to bypass objects created after this instant + if now.IsZero() { + now = time.Now().UTC() + } + + // iterate over all versions of the objects (except the delete markers) + for _, v := range p.Versions { + key := aws.StringValue(v.Key) + if !url.Match(key) { + continue + } + if url.VersionID != "" && url.VersionID != aws.StringValue(v.VersionId) { + continue + } + + mod := aws.TimeValue(v.LastModified).UTC() + if mod.After(now) { + objectFound = true + continue + } + + var objtype os.FileMode + if strings.HasSuffix(key, "/") { + objtype = os.ModeDir + } + + newurl := url.Clone() + newurl.Path = aws.StringValue(v.Key) + newurl.VersionID = aws.StringValue(v.VersionId) + etag := aws.StringValue(v.ETag) + + objCh <- &Object{ + URL: newurl, + Etag: strings.Trim(etag, `"`), + ModTime: &mod, + Type: ObjectType{objtype}, + Size: aws.Int64Value(v.Size), + StorageClass: StorageClass(aws.StringValue(v.StorageClass)), + } + + objectFound = true + } + + // iterate over all delete marker versions of the objects + for _, d := range p.DeleteMarkers { + key := aws.StringValue(d.Key) + if !url.Match(key) { + continue + } + if url.VersionID != "" && url.VersionID != aws.StringValue(d.VersionId) { + continue + } + + mod := aws.TimeValue(d.LastModified).UTC() + if mod.After(now) { + objectFound = true + continue + } + + var objtype os.FileMode + if strings.HasSuffix(key, "/") { + objtype = os.ModeDir + } + + newurl := url.Clone() + newurl.Path = aws.StringValue(d.Key) + newurl.VersionID = aws.StringValue(d.VersionId) + + objCh <- &Object{ + URL: newurl, + ModTime: &mod, + Type: ObjectType{objtype}, + Size: 0, + } + + objectFound = true + } + + return !lastPage + }) + + if err != nil { + objCh <- &Object{Err: err} + return + } + + if !objectFound { + objCh <- &Object{Err: ErrNoObjectFound} + } + }() + + return objCh +} + +func (s *S3) listObjectsV2(ctx context.Context, url *url.URL) <-chan *Object { + listInput := s3.ListObjectsV2Input{ + Bucket: aws.String(url.Bucket), + Prefix: aws.String(url.Prefix), + RequestPayer: s.RequestPayer(), + } + + if url.Delimiter != "" { + listInput.SetDelimiter(url.Delimiter) + } + + objCh := make(chan *Object) + + go func() { + defer close(objCh) + objectFound := false + + var now time.Time + + err := s.api.ListObjectsV2PagesWithContext(ctx, &listInput, func(p *s3.ListObjectsV2Output, lastPage bool) bool { + for _, c := range p.CommonPrefixes { + prefix := aws.StringValue(c.Prefix) + if !url.Match(prefix) { + continue + } + + newurl := url.Clone() + newurl.Path = prefix + objCh <- &Object{ + URL: newurl, + Type: ObjectType{os.ModeDir}, + } + + objectFound = true + } + // track the instant object iteration began, + // so it can be used to bypass objects created after this instant + if now.IsZero() { + now = time.Now().UTC() + } + + for _, c := range p.Contents { + key := aws.StringValue(c.Key) + if !url.Match(key) { + continue + } + + mod := aws.TimeValue(c.LastModified).UTC() + if mod.After(now) { + objectFound = true + continue + } + + var objtype os.FileMode + if strings.HasSuffix(key, "/") { + objtype = os.ModeDir + } + + newurl := url.Clone() + newurl.Path = aws.StringValue(c.Key) + etag := aws.StringValue(c.ETag) + + objCh <- &Object{ + URL: newurl, + Etag: strings.Trim(etag, `"`), + ModTime: &mod, + Type: ObjectType{objtype}, + Size: aws.Int64Value(c.Size), + StorageClass: StorageClass(aws.StringValue(c.StorageClass)), + } + + objectFound = true + } + + return !lastPage + }) + + if err != nil { + objCh <- &Object{Err: err} + return + } + + if !objectFound { + objCh <- &Object{Err: ErrNoObjectFound} + } + }() + + return objCh +} + +// listObjects is used for cloud services that does not support S3 +// ListObjectsV2 API. I'm looking at you GCS. +func (s *S3) listObjects(ctx context.Context, url *url.URL) <-chan *Object { + listInput := s3.ListObjectsInput{ + Bucket: aws.String(url.Bucket), + Prefix: aws.String(url.Prefix), + RequestPayer: s.RequestPayer(), + } + + if url.Delimiter != "" { + listInput.SetDelimiter(url.Delimiter) + } + + objCh := make(chan *Object) + + go func() { + defer close(objCh) + objectFound := false + + var now time.Time + + err := s.api.ListObjectsPagesWithContext(ctx, &listInput, func(p *s3.ListObjectsOutput, lastPage bool) bool { + for _, c := range p.CommonPrefixes { + prefix := aws.StringValue(c.Prefix) + if !url.Match(prefix) { + continue + } + + newurl := url.Clone() + newurl.Path = prefix + objCh <- &Object{ + URL: newurl, + Type: ObjectType{os.ModeDir}, + } + + objectFound = true + } + // track the instant object iteration began, + // so it can be used to bypass objects created after this instant + if now.IsZero() { + now = time.Now().UTC() + } + + for _, c := range p.Contents { + key := aws.StringValue(c.Key) + if !url.Match(key) { + continue + } + + mod := aws.TimeValue(c.LastModified).UTC() + if mod.After(now) { + objectFound = true + continue + } + + var objtype os.FileMode + if strings.HasSuffix(key, "/") { + objtype = os.ModeDir + } + + newurl := url.Clone() + newurl.Path = aws.StringValue(c.Key) + etag := aws.StringValue(c.ETag) + + objCh <- &Object{ + URL: newurl, + Etag: strings.Trim(etag, `"`), + ModTime: &mod, + Type: ObjectType{objtype}, + Size: aws.Int64Value(c.Size), + StorageClass: StorageClass(aws.StringValue(c.StorageClass)), + } + + objectFound = true + } + + return !lastPage + }) + + if err != nil { + objCh <- &Object{Err: err} + return + } + + if !objectFound { + objCh <- &Object{Err: ErrNoObjectFound} + } + }() + + return objCh +} + +// Copy is a single-object copy operation which copies objects to S3 +// destination from another S3 source. +func (s *S3) Copy(ctx context.Context, from, to *url.URL, metadata Metadata) error { + if s.dryRun { + return nil + } + + // SDK expects CopySource like "bucket[/key]" + copySource := from.EscapedPath() + + input := &s3.CopyObjectInput{ + Bucket: aws.String(to.Bucket), + Key: aws.String(to.Path), + CopySource: aws.String(copySource), + RequestPayer: s.RequestPayer(), + } + if from.VersionID != "" { + // Unlike many other *Input and *Output types version ID is not a field, + // but rather something that must be appended to CopySource string. + // This is same in both v1 and v2 SDKs: + // https://pkg.go.dev/github.com/aws/aws-sdk-go/service/s3#CopyObjectInput + // https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/service/s3#CopyObjectInput + input.CopySource = aws.String(copySource + "?versionId=" + from.VersionID) + } + + storageClass := metadata.StorageClass + if storageClass != "" { + input.StorageClass = aws.String(storageClass) + } + + acl := metadata.ACL + if acl != "" { + input.ACL = aws.String(acl) + } + + cacheControl := metadata.CacheControl + if cacheControl != "" { + input.CacheControl = aws.String(cacheControl) + } + + expires := metadata.Expires + if expires != "" { + t, err := time.Parse(time.RFC3339, expires) + if err != nil { + return err + } + input.Expires = aws.Time(t) + } + + sseEncryption := metadata.EncryptionMethod + if sseEncryption != "" { + input.ServerSideEncryption = aws.String(sseEncryption) + sseKmsKeyID := metadata.EncryptionKeyID + if sseKmsKeyID != "" { + input.SSEKMSKeyId = aws.String(sseKmsKeyID) + } + } + + contentEncoding := metadata.ContentEncoding + if contentEncoding != "" { + input.ContentEncoding = aws.String(contentEncoding) + } + + contentDisposition := metadata.ContentDisposition + if contentDisposition != "" { + input.ContentDisposition = aws.String(contentDisposition) + } + + // add retry ID to the object metadata + if s.noSuchUploadRetryCount > 0 { + input.Metadata[metadataKeyRetryID] = generateRetryID() + } + + if len(metadata.UserDefined) != 0 { + m := make(map[string]*string) + for k, v := range metadata.UserDefined { + m[k] = aws.String(v) + } + input.Metadata = m + } + + _, err := s.api.CopyObject(input) + return err +} + +// Read fetches the remote object and returns its contents as an io.ReadCloser. +func (s *S3) Read(ctx context.Context, src *url.URL) (io.ReadCloser, error) { + input := &s3.GetObjectInput{ + Bucket: aws.String(src.Bucket), + Key: aws.String(src.Path), + RequestPayer: s.RequestPayer(), + } + if src.VersionID != "" { + input.SetVersionId(src.VersionID) + } + + resp, err := s.api.GetObjectWithContext(ctx, input) + + if err != nil { + return nil, err + } + return resp.Body, nil +} + +func (s *S3) Presign(ctx context.Context, from *url.URL, expire time.Duration) (string, error) { + input := &s3.GetObjectInput{ + Bucket: aws.String(from.Bucket), + Key: aws.String(from.Path), + RequestPayer: s.RequestPayer(), + } + + req, _ := s.api.GetObjectRequest(input) + + return req.Presign(expire) +} + +// Get is a multipart download operation which downloads S3 objects into any +// destination that implements io.WriterAt interface. +// Makes a single 'GetObject' call if 'concurrency' is 1 and ignores 'partSize'. +func (s *S3) Get( + ctx context.Context, + from *url.URL, + to io.WriterAt, + concurrency int, + partSize int64, +) (int64, error) { + if s.dryRun { + return 0, nil + } + + input := &s3.GetObjectInput{ + Bucket: aws.String(from.Bucket), + Key: aws.String(from.Path), + RequestPayer: s.RequestPayer(), + } + if from.VersionID != "" { + input.VersionId = aws.String(from.VersionID) + } + + return s.downloader.DownloadWithContext(ctx, to, input, func(u *s3manager.Downloader) { + u.PartSize = partSize + u.Concurrency = concurrency + }) +} + +type SelectQuery struct { + InputFormat string + InputContentStructure string + FileHeaderInfo string + OutputFormat string + ExpressionType string + Expression string + CompressionType string +} + +type eventType string + +const ( + jsonType eventType = "json" + csvType eventType = "csv" + parquetType eventType = "parquet" +) + +func parseInputSerialization(e eventType, c string, delimiter string, headerInfo string) (*s3.InputSerialization, error) { + var s *s3.InputSerialization + + switch e { + case jsonType: + s = &s3.InputSerialization{ + JSON: &s3.JSONInput{ + Type: aws.String(delimiter), + }, + } + if c != "" { + s.CompressionType = aws.String(c) + } + case csvType: + s = &s3.InputSerialization{ + CSV: &s3.CSVInput{ + FieldDelimiter: aws.String(delimiter), + FileHeaderInfo: aws.String(headerInfo), + }, + } + if c != "" { + s.CompressionType = aws.String(c) + } + case parquetType: + s = &s3.InputSerialization{ + Parquet: &s3.ParquetInput{}, + } + default: + return nil, fmt.Errorf("input format is not valid") + } + + return s, nil +} + +func parseOutputSerialization(e eventType, delimiter string, reader io.Reader) (*s3.OutputSerialization, EventStreamDecoder, error) { + var s *s3.OutputSerialization + var decoder EventStreamDecoder + + switch e { + case jsonType: + s = &s3.OutputSerialization{ + JSON: &s3.JSONOutput{}, + } + decoder = NewJSONDecoder(reader) + case csvType: + s = &s3.OutputSerialization{ + CSV: &s3.CSVOutput{ + FieldDelimiter: aws.String(delimiter), + }, + } + decoder = NewCsvDecoder(reader) + default: + return nil, nil, fmt.Errorf("output serialization is not valid") + } + return s, decoder, nil +} + +func (s *S3) Select(ctx context.Context, url *url.URL, query *SelectQuery, resultCh chan<- json.RawMessage) error { + if s.dryRun { + return nil + } + + var ( + inputFormat *s3.InputSerialization + outputFormat *s3.OutputSerialization + decoder EventStreamDecoder + ) + reader, writer := io.Pipe() + + inputFormat, err := parseInputSerialization( + eventType(query.InputFormat), + query.CompressionType, + query.InputContentStructure, + query.FileHeaderInfo, + ) + + if err != nil { + return err + } + + // set the delimiter to ','. Otherwise, delimiter is set to "lines" or "document" + // for json queries. + if query.InputFormat == string(jsonType) && query.OutputFormat == string(csvType) { + query.InputContentStructure = "," + } + + outputFormat, decoder, err = parseOutputSerialization( + eventType(query.OutputFormat), + query.InputContentStructure, + reader, + ) + + if err != nil { + return err + } + + input := &s3.SelectObjectContentInput{ + Bucket: aws.String(url.Bucket), + Key: aws.String(url.Path), + ExpressionType: aws.String(query.ExpressionType), + Expression: aws.String(query.Expression), + InputSerialization: inputFormat, + OutputSerialization: outputFormat, + } + + resp, err := s.api.SelectObjectContentWithContext(ctx, input) + if err != nil { + return err + } + + go func() { + defer writer.Close() + + eventch := resp.EventStream.Reader.Events() + defer resp.EventStream.Close() + + for { + select { + case <-ctx.Done(): + return + case event, ok := <-eventch: + if !ok { + return + } + + switch e := event.(type) { + case *s3.RecordsEvent: + writer.Write(e.Payload) + } + } + } + }() + for { + val, err := decoder.Decode() + if err == io.EOF { + break + } + if err != nil { + return err + } + resultCh <- val + } + + return resp.EventStream.Reader.Err() +} + +// Put is a multipart upload operation to upload resources, which implements +// io.Reader interface, into S3 destination. +func (s *S3) Put( + ctx context.Context, + reader io.Reader, + to *url.URL, + metadata Metadata, + concurrency int, + partSize int64, +) error { + if s.dryRun { + return nil + } + + contentType := metadata.ContentType + if contentType == "" { + contentType = "application/octet-stream" + } + + input := &s3manager.UploadInput{ + Bucket: aws.String(to.Bucket), + Key: aws.String(to.Path), + Body: reader, + ContentType: aws.String(contentType), + Metadata: make(map[string]*string), + RequestPayer: s.RequestPayer(), + } + + storageClass := metadata.StorageClass + if storageClass != "" { + input.StorageClass = aws.String(storageClass) + } + + acl := metadata.ACL + if acl != "" { + input.ACL = aws.String(acl) + } + + cacheControl := metadata.CacheControl + if cacheControl != "" { + input.CacheControl = aws.String(cacheControl) + } + + expires := metadata.Expires + if expires != "" { + t, err := time.Parse(time.RFC3339, expires) + if err != nil { + return err + } + input.Expires = aws.Time(t) + } + + sseEncryption := metadata.EncryptionMethod + if sseEncryption != "" { + input.ServerSideEncryption = aws.String(sseEncryption) + sseKmsKeyID := metadata.EncryptionKeyID + if sseKmsKeyID != "" { + input.SSEKMSKeyId = aws.String(sseKmsKeyID) + } + } + + contentEncoding := metadata.ContentEncoding + if contentEncoding != "" { + input.ContentEncoding = aws.String(contentEncoding) + } + + contentDisposition := metadata.ContentDisposition + if contentDisposition != "" { + input.ContentDisposition = aws.String(contentDisposition) + } + + // add retry ID to the object metadata + if s.noSuchUploadRetryCount > 0 { + input.Metadata[metadataKeyRetryID] = generateRetryID() + } + + if len(metadata.UserDefined) != 0 { + m := make(map[string]*string) + for k, v := range metadata.UserDefined { + m[k] = aws.String(v) + } + input.Metadata = m + } + + uploaderOptsFn := func(u *s3manager.Uploader) { + u.PartSize = partSize + u.Concurrency = concurrency + } + _, err := s.uploader.UploadWithContext(ctx, input, uploaderOptsFn) + + if errHasCode(err, s3.ErrCodeNoSuchUpload) && s.noSuchUploadRetryCount > 0 { + return s.retryOnNoSuchUpload(ctx, to, input, err, uploaderOptsFn) + } + + return err +} + +func (s *S3) retryOnNoSuchUpload(ctx aws.Context, to *url.URL, input *s3manager.UploadInput, + err error, uploaderOpts ...func(*s3manager.Uploader)) error { + + var expectedRetryID string + if ID, ok := input.Metadata[metadataKeyRetryID]; ok { + expectedRetryID = *ID + } + + attempts := 0 + for ; errHasCode(err, s3.ErrCodeNoSuchUpload) && attempts < s.noSuchUploadRetryCount; attempts++ { + // check if object exists and has the retry ID we provided, if it does + // then it means that one of previous uploads was succesfull despite the received error. + obj, sErr := s.Stat(ctx, to) + if sErr == nil && obj.retryID == expectedRetryID { + err = nil + break + } + + msg := log.DebugMessage{Err: fmt.Sprintf("Retrying to upload %v upon error: %q", to, err.Error())} + log.Debug(msg) + + _, err = s.uploader.UploadWithContext(ctx, input, uploaderOpts...) + } + + if errHasCode(err, s3.ErrCodeNoSuchUpload) && s.noSuchUploadRetryCount > 0 { + err = awserr.New(s3.ErrCodeNoSuchUpload, fmt.Sprintf( + "RetryOnNoSuchUpload: %v attempts to retry resulted in %v", attempts, + s3.ErrCodeNoSuchUpload), err) + } + return err +} + +// chunk is an object identifier container which is used on MultiDelete +// operations. Since DeleteObjects API allows deleting objects up to 1000, +// splitting keys into multiple chunks is required. +type chunk struct { + Bucket string + Keys []*s3.ObjectIdentifier +} + +// calculateChunks calculates chunks for given URL channel and returns +// read-only chunk channel. +func (s *S3) calculateChunks(ch <-chan *url.URL) <-chan chunk { + chunkch := make(chan chunk) + + chunkSize := deleteObjectsMax + // delete each object individually if using gcs. + if IsGoogleEndpoint(s.endpointURL) { + chunkSize = 1 + } + + go func() { + defer close(chunkch) + + var keys []*s3.ObjectIdentifier + initKeys := func() { + keys = make([]*s3.ObjectIdentifier, 0) + } + + var bucket string + for url := range ch { + bucket = url.Bucket + + objid := &s3.ObjectIdentifier{Key: aws.String(url.Path)} + if url.VersionID != "" { + objid.VersionId = &url.VersionID + } + + keys = append(keys, objid) + if len(keys) == chunkSize { + chunkch <- chunk{ + Bucket: bucket, + Keys: keys, + } + initKeys() + } + } + + if len(keys) > 0 { + chunkch <- chunk{ + Bucket: bucket, + Keys: keys, + } + } + }() + + return chunkch +} + +// Delete is a single object delete operation. +func (s *S3) Delete(ctx context.Context, url *url.URL) error { + chunk := chunk{ + Bucket: url.Bucket, + Keys: []*s3.ObjectIdentifier{ + {Key: aws.String(url.Path)}, + }, + } + + resultch := make(chan *Object, 1) + defer close(resultch) + + s.doDelete(ctx, chunk, resultch) + obj := <-resultch + return obj.Err +} + +// doDelete deletes the given keys given by chunk. Results are piggybacked via +// the Object container. +func (s *S3) doDelete(ctx context.Context, chunk chunk, resultch chan *Object) { + if s.dryRun { + for _, k := range chunk.Keys { + key := fmt.Sprintf("s3://%v/%v", chunk.Bucket, aws.StringValue(k.Key)) + url, _ := url.New(key) + url.VersionID = aws.StringValue(k.VersionId) + resultch <- &Object{URL: url} + } + return + } + + // GCS does not support multi delete. + if IsGoogleEndpoint(s.endpointURL) { + for _, k := range chunk.Keys { + _, err := s.api.DeleteObjectWithContext(ctx, &s3.DeleteObjectInput{ + Bucket: aws.String(chunk.Bucket), + Key: k.Key, + RequestPayer: s.RequestPayer(), + }) + if err != nil { + resultch <- &Object{Err: err} + return + } + key := fmt.Sprintf("s3://%v/%v", chunk.Bucket, aws.StringValue(k.Key)) + url, _ := url.New(key) + resultch <- &Object{URL: url} + } + return + } + + bucket := chunk.Bucket + o, err := s.api.DeleteObjectsWithContext(ctx, &s3.DeleteObjectsInput{ + Bucket: aws.String(bucket), + Delete: &s3.Delete{Objects: chunk.Keys}, + RequestPayer: s.RequestPayer(), + }) + if err != nil { + resultch <- &Object{Err: err} + return + } + + for _, d := range o.Deleted { + key := fmt.Sprintf("s3://%v/%v", bucket, aws.StringValue(d.Key)) + url, _ := url.New(key) + url.VersionID = aws.StringValue(d.VersionId) + resultch <- &Object{URL: url} + } + + for _, e := range o.Errors { + key := fmt.Sprintf("s3://%v/%v", bucket, aws.StringValue(e.Key)) + url, _ := url.New(key) + url.VersionID = aws.StringValue(e.VersionId) + + resultch <- &Object{ + URL: url, + Err: fmt.Errorf(aws.StringValue(e.Message)), + } + } +} + +// MultiDelete is a asynchronous removal operation for multiple objects. +// It reads given url channel, creates multiple chunks and run these +// chunks in parallel. Each chunk may have at most 1000 objects since DeleteObjects +// API has a limitation. +// See: https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html. +func (s *S3) MultiDelete(ctx context.Context, urlch <-chan *url.URL) <-chan *Object { + resultch := make(chan *Object) + + go func() { + sem := make(chan bool, 10) + defer close(sem) + defer close(resultch) + + chunks := s.calculateChunks(urlch) + + var wg sync.WaitGroup + for chunk := range chunks { + chunk := chunk + + wg.Add(1) + sem <- true + + go func() { + defer wg.Done() + s.doDelete(ctx, chunk, resultch) + <-sem + }() + } + + wg.Wait() + }() + + return resultch +} + +// ListBuckets is a blocking list-operation which gets bucket list and returns +// the buckets that match with given prefix. +func (s *S3) ListBuckets(ctx context.Context, prefix string) ([]Bucket, error) { + o, err := s.api.ListBucketsWithContext(ctx, &s3.ListBucketsInput{}) + if err != nil { + return nil, err + } + + var buckets []Bucket + for _, b := range o.Buckets { + bucketName := aws.StringValue(b.Name) + if prefix == "" || strings.HasPrefix(bucketName, prefix) { + buckets = append(buckets, Bucket{ + CreationDate: aws.TimeValue(b.CreationDate), + Name: bucketName, + }) + } + } + return buckets, nil +} + +// MakeBucket creates an S3 bucket with the given name. +func (s *S3) MakeBucket(ctx context.Context, name string) error { + if s.dryRun { + return nil + } + + _, err := s.api.CreateBucketWithContext(ctx, &s3.CreateBucketInput{ + Bucket: aws.String(name), + }) + return err +} + +// RemoveBucket removes an S3 bucket with the given name. +func (s *S3) RemoveBucket(ctx context.Context, name string) error { + if s.dryRun { + return nil + } + + _, err := s.api.DeleteBucketWithContext(ctx, &s3.DeleteBucketInput{ + Bucket: aws.String(name), + }) + return err +} + +// SetBucketVersioning sets the versioning property of the bucket +func (s *S3) SetBucketVersioning(ctx context.Context, versioningStatus, bucket string) error { + if s.dryRun { + return nil + } + + _, err := s.api.PutBucketVersioningWithContext(ctx, &s3.PutBucketVersioningInput{ + Bucket: aws.String(bucket), + VersioningConfiguration: &s3.VersioningConfiguration{ + Status: aws.String(versioningStatus), + }, + }) + return err +} + +// GetBucketVersioning returnsversioning property of the bucket +func (s *S3) GetBucketVersioning(ctx context.Context, bucket string) (string, error) { + output, err := s.api.GetBucketVersioningWithContext(ctx, &s3.GetBucketVersioningInput{ + Bucket: aws.String(bucket), + }) + if err != nil || output.Status == nil { + return "", err + } + + return *output.Status, nil + +} + +type sdkLogger struct{} + +func (l sdkLogger) Log(args ...interface{}) { + msg := log.TraceMessage{ + Message: fmt.Sprint(args...), + } + log.Trace(msg) +} + +// SessionCache holds session.Session according to s3Opts and it synchronizes +// access/modification. +type SessionCache struct { + sync.Mutex + sessions map[Options]*session.Session +} + +// newSession initializes a new AWS session with region fallback and custom +// options. +func (sc *SessionCache) newSession(ctx context.Context, opts Options) (*session.Session, error) { + sc.Lock() + defer sc.Unlock() + + if sess, ok := sc.sessions[opts]; ok { + return sess, nil + } + + awsCfg := aws.NewConfig() + + if opts.NoSignRequest { + // do not sign requests when making service API calls + awsCfg = awsCfg.WithCredentials(credentials.AnonymousCredentials) + } else if opts.CredentialFile != "" || opts.Profile != "" { + awsCfg = awsCfg.WithCredentials( + credentials.NewSharedCredentials(opts.CredentialFile, opts.Profile), + ) + } + + endpointURL, err := parseEndpoint(opts.Endpoint) + if err != nil { + return nil, err + } + + // use virtual-host-style if the endpoint is known to support it, + // otherwise use the path-style approach. + isVirtualHostStyle := isVirtualHostStyle(endpointURL) + + useAccelerate := supportsTransferAcceleration(endpointURL) + // AWS SDK handles transfer acceleration automatically. Setting the + // Endpoint to a transfer acceleration endpoint would cause bucket + // operations fail. + if useAccelerate { + endpointURL = sentinelURL + } + + var httpClient *http.Client + if opts.NoVerifySSL { + httpClient = insecureHTTPClient + } + awsCfg = awsCfg. + WithEndpoint(endpointURL.String()). + WithS3ForcePathStyle(!isVirtualHostStyle). + WithS3UseAccelerate(useAccelerate). + WithHTTPClient(httpClient). + // TODO WithLowerCaseHeaderMaps and WithDisableRestProtocolURICleaning options + // are going to be unnecessary and unsupported in AWS-SDK version 2. + // They should be removed during migration. + WithLowerCaseHeaderMaps(true). + // Disable URI cleaning to allow adjacent slashes to be used in S3 object keys. + WithDisableRestProtocolURICleaning(true) + + if opts.LogLevel == log.LevelTrace { + awsCfg = awsCfg.WithLogLevel(aws.LogDebug). + WithLogger(sdkLogger{}) + } + + awsCfg.Retryer = newCustomRetryer(sc, opts.MaxRetries) + + useSharedConfig := session.SharedConfigDisable + { + // Reverse of what the SDK does: if AWS_SDK_LOAD_CONFIG is 0 (or a + // falsy value) disable shared configs + loadCfg := os.Getenv("AWS_SDK_LOAD_CONFIG") + if loadCfg != "" { + if enable, _ := strconv.ParseBool(loadCfg); !enable { + useSharedConfig = session.SharedConfigDisable + } + } + } + + sess, err := session.NewSessionWithOptions( + session.Options{ + Config: *awsCfg, + SharedConfigState: useSharedConfig, + }, + ) + if err != nil { + return nil, err + } + + // get region of the bucket and create session accordingly. if the region + // is not provided, it means we want region-independent session + // for operations such as listing buckets, making a new bucket etc. + // only get bucket region when it is not specified. + if opts.region != "" { + sess.Config.Region = aws.String(opts.region) + } else { + if err := setSessionRegion(ctx, sess, opts.bucket); err != nil { + return nil, err + } + } + + sc.sessions[opts] = sess + + return sess, nil +} + +func (sc *SessionCache) Clear() { + sc.Lock() + defer sc.Unlock() + sc.sessions = map[Options]*session.Session{} +} + +func setSessionRegion(ctx context.Context, sess *session.Session, bucket string) error { + region := aws.StringValue(sess.Config.Region) + + if region != "" { + return nil + } + + // set default region + sess.Config.Region = aws.String(endpoints.UsEast1RegionID) + + if bucket == "" { + return nil + } + + // auto-detection + region, err := s3manager.GetBucketRegion(ctx, sess, bucket, "", func(r *request.Request) { + // s3manager.GetBucketRegion uses Path style addressing and + // AnonymousCredentials by default, updating Request's Config to match + // the session config. + r.Config.S3ForcePathStyle = sess.Config.S3ForcePathStyle + r.Config.Credentials = sess.Config.Credentials + }) + if err != nil { + if errHasCode(err, "NotFound") { + return err + } + // don't deny any request to the service if region auto-fetching + // receives an error. Delegate error handling to command execution. + err = fmt.Errorf("session: fetching region failed: %v", err) + msg := log.ErrorMessage{Err: err.Error()} + log.Error(msg) + } else { + sess.Config.Region = aws.String(region) + } + + return nil +} + +// customRetryer wraps the SDK's built in DefaultRetryer adding additional +// error codes. Such as, retry for S3 InternalError code. +type customRetryer struct { + client.DefaultRetryer + sc *SessionCache +} + +func newCustomRetryer(sc *SessionCache, maxRetries int) *customRetryer { + return &customRetryer{ + sc: sc, + DefaultRetryer: client.DefaultRetryer{ + NumMaxRetries: maxRetries, + }, + } +} + +// ShouldRetry overrides SDK's built in DefaultRetryer, adding custom retry +// logics that are not included in the SDK. +func (c *customRetryer) ShouldRetry(req *request.Request) bool { + log.Error(log.ErrorMessage{ + Command: "retrier", + Err: req.Error.Error(), + }) + + shouldRetry := errHasCode(req.Error, "InternalError") || errHasCode(req.Error, "RequestTimeTooSkewed") || errHasCode(req.Error, "SlowDown") || strings.Contains(req.Error.Error(), "connection reset") || strings.Contains(req.Error.Error(), "connection timed out") || errHasCode(req.Error, "ExpiredToken") || errHasCode(req.Error, "ExpiredTokenException") + + if errHasCode(req.Error, "ExpiredToken") || errHasCode(req.Error, "ExpiredTokenException") { + log.Debug(log.DebugMessage{ + Err: "Clearing the token", + }) + + c.sc.Clear() + } + + if !shouldRetry { + shouldRetry = c.DefaultRetryer.ShouldRetry(req) + } + + // Errors related to tokens + if errHasCode(req.Error, "InvalidToken") { + return false + } + + if shouldRetry && req.Error != nil { + err := fmt.Errorf("retryable error: %v", req.Error) + msg := log.DebugMessage{Err: err.Error()} + log.Debug(msg) + } + + return shouldRetry +} + +var insecureHTTPClient = &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + Proxy: http.ProxyFromEnvironment, + }, +} + +func supportsTransferAcceleration(endpoint urlpkg.URL) bool { + return endpoint.Hostname() == transferAccelEndpoint +} + +func IsGoogleEndpoint(endpoint urlpkg.URL) bool { + return endpoint.Hostname() == gcsEndpoint +} + +// isVirtualHostStyle reports whether the given endpoint supports S3 virtual +// host style bucket name resolving. If a custom S3 API compatible endpoint is +// given, resolve the bucketname from the URL path. +func isVirtualHostStyle(endpoint urlpkg.URL) bool { + return endpoint == sentinelURL || supportsTransferAcceleration(endpoint) || IsGoogleEndpoint(endpoint) +} + +func errHasCode(err error, code string) bool { + if err == nil || code == "" { + return false + } + + var awsErr awserr.Error + if errors.As(err, &awsErr) { + if awsErr.Code() == code { + return true + } + } + + var multiUploadErr s3manager.MultiUploadFailure + if errors.As(err, &multiUploadErr) { + return errHasCode(multiUploadErr.OrigErr(), code) + } + + return false + +} + +// IsCancelationError reports whether given error is a storage related +// cancelation error. +func IsCancelationError(err error) bool { + return errHasCode(err, request.CanceledErrorCode) +} + +// generate a retry ID for this upload attempt +func generateRetryID() *string { + num, _ := rand.Int(rand.Reader, big.NewInt(math.MaxInt64)) + return aws.String(num.String()) +} + +// EventStreamDecoder decodes a s3.Event with +// the given decoder. +type EventStreamDecoder interface { + Decode() ([]byte, error) +} + +type JSONDecoder struct { + decoder *json.Decoder +} + +func NewJSONDecoder(reader io.Reader) EventStreamDecoder { + return &JSONDecoder{ + decoder: json.NewDecoder(reader), + } +} + +func (jd *JSONDecoder) Decode() ([]byte, error) { + var val json.RawMessage + err := jd.decoder.Decode(&val) + if err != nil { + return nil, err + } + return val, nil +} + +type CsvDecoder struct { + decoder *csv.Reader + delimiter string +} + +func NewCsvDecoder(reader io.Reader) EventStreamDecoder { + csvDecoder := &CsvDecoder{ + decoder: csv.NewReader(reader), + delimiter: ",", + } + // returned values from AWS has double quotes in it + // so we enable lazy quotes + csvDecoder.decoder.LazyQuotes = true + return csvDecoder +} + +func (cd *CsvDecoder) Decode() ([]byte, error) { + res, err := cd.decoder.Read() + if err != nil { + return nil, err + } + + result := []byte{} + for i, str := range res { + if i != len(res)-1 { + str = fmt.Sprintf("%s%s", str, cd.delimiter) + } + result = append(result, []byte(str)...) + } + return result, nil +} diff --git a/vendor/github.com/peak/s5cmd/v2/storage/storage.go b/vendor/github.com/peak/s5cmd/v2/storage/storage.go new file mode 100644 index 000000000..982341662 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/storage/storage.go @@ -0,0 +1,269 @@ +//go:generate mockgen -source=$GOFILE -destination=mock_$GOFILE -package=$GOPACKAGE Storage + +// Package storage implements operations for s3 and fs. +package storage + +import ( + "bytes" + "context" + "encoding/gob" + "encoding/json" + "fmt" + "os" + "time" + + "github.com/lanrat/extsort" + "github.com/peak/s5cmd/v2/log" + "github.com/peak/s5cmd/v2/storage/url" + "github.com/peak/s5cmd/v2/strutil" +) + +var ( + + // ErrNoObjectFound indicates there are no objects found from a given directory. + ErrNoObjectFound = fmt.Errorf("no object found") +) + +// ErrGivenObjectNotFound indicates a specified object is not found. +type ErrGivenObjectNotFound struct { + ObjectAbsPath string +} + +func (e *ErrGivenObjectNotFound) Error() string { + return fmt.Sprintf("given object %v not found", e.ObjectAbsPath) +} + +// Storage is an interface for storage operations that is common +// to local filesystem and remote object storage. +type Storage interface { + // Stat returns the Object structure describing object. If src is not + // found, ErrGivenObjectNotFound is returned. + Stat(ctx context.Context, src *url.URL) (*Object, error) + + // List the objects and directories/prefixes in the src. + List(ctx context.Context, src *url.URL, followSymlinks bool) <-chan *Object + + // Delete deletes the given src. + Delete(ctx context.Context, src *url.URL) error + + // MultiDelete deletes all items returned from given urls in batches. + MultiDelete(ctx context.Context, urls <-chan *url.URL) <-chan *Object + + // Copy src to dst, optionally setting the given metadata. Src and dst + // arguments are of the same type. If src is a remote type, server side + // copying will be used. + Copy(ctx context.Context, src, dst *url.URL, metadata Metadata) error +} + +func NewLocalClient(opts Options) *Filesystem { + return &Filesystem{dryRun: opts.DryRun} +} + +func NewRemoteClient(ctx context.Context, url *url.URL, opts Options) (*S3, error) { + newOpts := Options{ + MaxRetries: opts.MaxRetries, + NoSuchUploadRetryCount: opts.NoSuchUploadRetryCount, + Endpoint: opts.Endpoint, + NoVerifySSL: opts.NoVerifySSL, + DryRun: opts.DryRun, + NoSignRequest: opts.NoSignRequest, + UseListObjectsV1: opts.UseListObjectsV1, + RequestPayer: opts.RequestPayer, + Profile: opts.Profile, + CredentialFile: opts.CredentialFile, + LogLevel: opts.LogLevel, + bucket: url.Bucket, + region: opts.region, + } + return newS3Storage(ctx, newOpts) +} + +func NewClient(ctx context.Context, url *url.URL, opts Options) (Storage, error) { + if url.IsRemote() { + return NewRemoteClient(ctx, url, opts) + } + return NewLocalClient(opts), nil +} + +// Options stores configuration for storage. +type Options struct { + MaxRetries int + NoSuchUploadRetryCount int + Endpoint string + NoVerifySSL bool + DryRun bool + NoSignRequest bool + UseListObjectsV1 bool + LogLevel log.LogLevel + RequestPayer string + Profile string + CredentialFile string + bucket string + region string +} + +func (o *Options) SetRegion(region string) { + o.region = region +} + +// Object is a generic type which contains metadata for storage items. +type Object struct { + URL *url.URL `json:"key,omitempty"` + Etag string `json:"etag,omitempty"` + ModTime *time.Time `json:"last_modified,omitempty"` + Type ObjectType `json:"type,omitempty"` + Size int64 `json:"size,omitempty"` + StorageClass StorageClass `json:"storage_class,omitempty"` + Err error `json:"error,omitempty"` + retryID string + + // the VersionID field exist only for JSON Marshall, it must not be used for + // any other purpose. URL.VersionID must be used instead. + VersionID string `json:"version_id,omitempty"` +} + +// String returns the string representation of Object. +func (o *Object) String() string { + return o.URL.String() +} + +// JSON returns the JSON representation of Object. +func (o *Object) JSON() string { + if o.URL != nil { + o.VersionID = o.URL.VersionID + } + return strutil.JSON(o) +} + +// ObjectType is the type of Object. +type ObjectType struct { + mode os.FileMode +} + +// String returns the string representation of ObjectType. +func (o ObjectType) String() string { + switch mode := o.mode; { + case mode.IsRegular(): + return "file" + case mode.IsDir(): + return "directory" + case mode&os.ModeSymlink != 0: + return "symlink" + } + return "" +} + +// MarshalJSON returns the stringer of ObjectType as a marshalled json. +func (o ObjectType) MarshalJSON() ([]byte, error) { + return json.Marshal(o.String()) +} + +// IsDir checks if the object is a directory. +func (o ObjectType) IsDir() bool { + return o.mode.IsDir() +} + +// IsSymlink checks if the object is a symbolic link. +func (o ObjectType) IsSymlink() bool { + return o.mode&os.ModeSymlink != 0 +} + +// IsRegular checks if the object is a regular file. +func (o ObjectType) IsRegular() bool { + return o.mode.IsRegular() +} + +// ShouldProcessURL returns true if follow symlinks is enabled. +// If follow symlinks is disabled we should not process the url. +// (this check is needed only for local files) +func ShouldProcessURL(url *url.URL, followSymlinks bool) bool { + if followSymlinks { + return true + } + + if url.IsRemote() { + return true + } + fi, err := os.Lstat(url.Absolute()) + if err != nil { + return false + } + + // do not process symlinks + return fi.Mode()&os.ModeSymlink == 0 +} + +// dateFormat is a constant time template for the bucket. +const dateFormat = "2006/01/02 15:04:05" + +// Bucket is a container for storage objects. +type Bucket struct { + CreationDate time.Time `json:"created_at"` + Name string `json:"name"` +} + +// String returns the string representation of Bucket. +func (b Bucket) String() string { + return fmt.Sprintf("%s s3://%s", b.CreationDate.Format(dateFormat), b.Name) +} + +// JSON returns the JSON representation of Bucket. +func (b Bucket) JSON() string { + return strutil.JSON(b) +} + +// StorageClass represents the storage used to store an object. +type StorageClass string + +func (s StorageClass) IsGlacier() bool { + return s == "GLACIER" +} + +type Metadata struct { + ACL string + CacheControl string + Expires string + StorageClass string + ContentType string + ContentEncoding string + ContentDisposition string + EncryptionMethod string + EncryptionKeyID string + + UserDefined map[string]string +} + +func (o Object) ToBytes() []byte { + buf := bytes.NewBuffer(make([]byte, 0, 200)) + enc := gob.NewEncoder(buf) + enc.Encode(o.URL.ToBytes()) + enc.Encode(o.ModTime.Format(time.RFC3339Nano)) + enc.Encode(o.Type.mode) + enc.Encode(o.Size) + + return buf.Bytes() +} + +func FromBytes(data []byte) extsort.SortType { + dec := gob.NewDecoder(bytes.NewBuffer(data)) + var gobURL []byte + dec.Decode(&gobURL) + u := url.FromBytes(gobURL).(*url.URL) + o := Object{ + URL: u, + } + str := "" + dec.Decode(&str) + tmp, _ := time.Parse(time.RFC3339Nano, str) + o.ModTime = &tmp + dec.Decode(&o.Type.mode) + dec.Decode(&o.Size) + return o +} + +// Less returns if relative path of storage.Object a's URL comes before the one +// of b's in the lexicographic order. +// It assumes that both a, and b are the instances of Object +func Less(a, b extsort.SortType) bool { + return a.(Object).URL.Relative() < b.(Object).URL.Relative() +} diff --git a/vendor/github.com/peak/s5cmd/v2/storage/url/url.go b/vendor/github.com/peak/s5cmd/v2/storage/url/url.go new file mode 100644 index 000000000..35290f5da --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/storage/url/url.go @@ -0,0 +1,480 @@ +// Package url abstracts local and remote file URLs. +package url + +import ( + "bytes" + "encoding/gob" + "encoding/json" + "fmt" + "net/url" + "path" + "path/filepath" + "regexp" + "runtime" + "strings" + + "github.com/lanrat/extsort" + "github.com/peak/s5cmd/v2/strutil" +) + +const ( + globCharacters string = "?*" + + // s3Scheme is the schema used on s3 URLs + s3Scheme string = "s3://" + + // s3Separator is the path separator for s3 URLs + s3Separator string = "/" + + // matchAllRe is the regex to match everything + matchAllRe string = ".*" +) + +type urlType int + +const ( + remoteObject urlType = iota + localObject +) + +// URL is the canonical representation of an object, either on local or remote +// storage. +type URL struct { + Type urlType + Scheme string + Bucket string + Path string + Delimiter string + Prefix string + VersionID string + AllVersions bool + + relativePath string + filter string + filterRegex *regexp.Regexp + raw bool +} + +type Option func(u *URL) + +func WithRaw(mode bool) Option { + return func(u *URL) { + u.raw = mode + } +} + +func WithVersion(versionID string) Option { + return func(u *URL) { + u.VersionID = versionID + } +} + +func WithAllVersions(isAllVersions bool) Option { + return func(u *URL) { + u.AllVersions = isAllVersions + } +} + +// New creates a new URL from given path string. +func New(s string, opts ...Option) (*URL, error) { + scheme, rest, isFound := strings.Cut(s, "://") + if !isFound { + url := &URL{ + Type: localObject, + Scheme: "", + Path: s, + } + + for _, opt := range opts { + opt(url) + } + + if err := url.setPrefixAndFilter(); err != nil { + return nil, err + } + + if runtime.GOOS == "windows" { + url.Path = filepath.ToSlash(url.Path) + } + return url, nil + } + + if scheme != "s3" { + return nil, fmt.Errorf("s3 url should start with %q", s3Scheme) + } + + parts := strings.SplitN(rest, s3Separator, 2) + + key := "" + bucket := parts[0] + if len(parts) == 2 { + key = parts[1] + } + + if bucket == "" { + return nil, fmt.Errorf("s3 url should have a bucket") + } + + if hasGlobCharacter(bucket) { + return nil, fmt.Errorf("bucket name cannot contain wildcards") + } + + url := &URL{ + Type: remoteObject, + Scheme: "s3", + Bucket: bucket, + Path: key, + } + + for _, opt := range opts { + opt(url) + } + + if err := url.setPrefixAndFilter(); err != nil { + return nil, err + } + return url, nil +} + +// IsRemote reports whether the object is stored on a remote storage system. +func (u *URL) IsRemote() bool { + return u.Type == remoteObject +} + +// IsPrefix reports whether the remote object is an S3 prefix, and does not +// look like an object. +func (u *URL) IsPrefix() bool { + return u.IsRemote() && strings.HasSuffix(u.Path, "/") +} + +// IsBucket returns true if the object url contains only bucket name +func (u *URL) IsBucket() bool { + return u.IsRemote() && u.Path == "" +} + +// IsVersioned returns true if the URL has versioning related values +func (u *URL) IsVersioned() bool { + return u.AllVersions || u.VersionID != "" +} + +// Absolute returns the absolute URL format of the object. +func (u *URL) Absolute() string { + if !u.IsRemote() { + return u.Path + } + + return u.remoteURL() +} + +// Relative returns a URI reference based on the calculated prefix. +func (u *URL) Relative() string { + if u.relativePath != "" { + return u.relativePath + } + return u.Absolute() +} + +// Base returns the last element of object path. +func (u *URL) Base() string { + basefn := filepath.Base + if u.IsRemote() { + basefn = path.Base + } + + return basefn(u.Path) +} + +// Dir returns all but the last element of path, typically the path's +// directory. +func (u *URL) Dir() string { + basefn := filepath.Dir + if u.IsRemote() { + basefn = path.Dir + } + + return basefn(u.Path) +} + +// Join joins string and returns new URL. +func (u *URL) Join(s string) *URL { + if runtime.GOOS == "windows" { + s = filepath.ToSlash(s) + } + + clone := u.Clone() + if !clone.IsRemote() { + // URL is local, thus clean the path by using path.Join which + // removes adjacent slashes. + clone.Path = path.Join(clone.Path, s) + return clone + } + // URL is remote, keep them as it is and join using string.Join which + // allows to use adjacent slashes + clone.Path = strings.Join([]string{clone.Path, s}, "") + return clone +} + +func (u *URL) remoteURL() string { + s := u.Scheme + "://" + if u.Bucket != "" { + s += u.Bucket + } + + if u.Path != "" { + s += "/" + u.Path + } + + return s +} + +// setPrefixAndFilter creates url metadata for both wildcard and non-wildcard +// operations. +// +// It converts wildcard strings to regex format +// and pre-compiles it for later usage. It is default to +// ".*" to match every key on S3. +// +// filter is the part that comes after the wildcard string. +// prefix is the part that comes before the wildcard string. +// +// Example: +// +// key: a/b/test?/c/*.tsv +// prefix: a/b/test +// filter: ?/c/* +// regex: ^a/b/test./c/.*?\\.tsv$ +// delimiter: "" +// +// It prepares delimiter, prefix and regex for regular strings. +// These are used in S3 listing operations. +// See: https://docs.aws.amazon.com/AmazonS3/latest/dev/ListingKeysHierarchy.html +// +// Example: +// +// key: a/b/c +// prefix: a/b/c +// filter: "" +// regex: ^a/b/c.*$ +// delimiter: "/" +func (u *URL) setPrefixAndFilter() error { + if u.raw { + return nil + } + + if loc := strings.IndexAny(u.Path, globCharacters); loc < 0 { + u.Delimiter = s3Separator + u.Prefix = u.Path + } else { + u.Prefix = u.Path[:loc] + u.filter = u.Path[loc:] + } + + filterRegex := matchAllRe + if u.filter != "" { + filterRegex = strutil.WildCardToRegexp(u.filter) + } + filterRegex = regexp.QuoteMeta(u.Prefix) + filterRegex + filterRegex = strutil.MatchFromStartToEnd(filterRegex) + filterRegex = strutil.AddNewLineFlag(filterRegex) + r, err := regexp.Compile(filterRegex) + if err != nil { + return err + } + u.filterRegex = r + return nil +} + +// Clone creates a copy of the receiver. +func (u *URL) Clone() *URL { + return &URL{ + Type: u.Type, + Scheme: u.Scheme, + Bucket: u.Bucket, + Path: u.Path, + Delimiter: u.Delimiter, + Prefix: u.Prefix, + VersionID: u.VersionID, + AllVersions: u.AllVersions, + + relativePath: u.relativePath, + filter: u.filter, + filterRegex: u.filterRegex, + raw: u.raw, + } +} + +// SetRelative explicitly sets the relative path of u against given base value. +// If the base path contains `globCharacters` then, the relative path is +// determined with respect to the parent directory of the so called wildcarded +// object. +func (u *URL) SetRelative(base *URL) { + basePath := base.Absolute() + if base.IsWildcard() { + // When the basePath includes a wildcard character (globCharacters) + // replace basePath with its substring up to the + // index of the first instance of a wildcard character. + // + // If we don't handle this, the filepath.Dir() + // will assume those wildcards as a part of the name. + // Consequently the filepath.Rel() will determine + // the relative path incorrectly since the wildcarded + // path string won't match with the actual name of the + // object. + // e.g. base.Absolute(): "/a/*/n" + // u.Absolute() : "/a/b/n" + // + // if we don't trim substring from globCharacters on + // filepath.Dir() will give: "/a/*" + // consequently the + // filepath.Rel() will give: "../b/c" rather than "b/c" + // since "b" and "*" are not the same. + loc := strings.IndexAny(basePath, globCharacters) + if loc >= 0 { + basePath = basePath[:loc] + } + } + baseDir := filepath.Dir(basePath) + u.relativePath, _ = filepath.Rel(baseDir, u.Absolute()) +} + +// Match reports whether if given key matches with the object. +func (u *URL) Match(key string) bool { + if u.filterRegex == nil { + return false + } + + if !u.filterRegex.MatchString(key) { + return false + } + + isBatch := u.filter != "" + if isBatch { + v := parseBatch(u.Prefix, key) + u.relativePath = v + return true + } + + v := parseNonBatch(u.Prefix, key) + u.relativePath = v + return true +} + +// String is the fmt.Stringer implementation of URL. +func (u *URL) String() string { + return u.Absolute() +} + +// MarshalJSON is the json.Marshaler implementation of URL. +func (u *URL) MarshalJSON() ([]byte, error) { + return json.Marshal(u.String()) +} + +func (u URL) ToBytes() []byte { + buf := bytes.NewBuffer(make([]byte, 0)) + enc := gob.NewEncoder(buf) + enc.Encode(u.Absolute()) + enc.Encode(u.relativePath) + enc.Encode(u.raw) + return buf.Bytes() +} + +func FromBytes(data []byte) extsort.SortType { + buf := bytes.NewBuffer(data) + dec := gob.NewDecoder(buf) + var ( + abs, rel string + raw bool + ) + dec.Decode(&abs) + dec.Decode(&rel) + dec.Decode(&raw) + + url, _ := New(abs, WithRaw(raw)) + url.relativePath = rel + return url +} + +// IsWildcard reports whether if a string contains any wildcard chars. +func (u *URL) IsWildcard() bool { + return !u.raw && hasGlobCharacter(u.Path) +} + +// parseBatch parses keys for wildcard operations. +// It cuts the key starting from first directory before the +// wildcard part (filter) +// +// Example: +// +// key: a/b/test2/c/example_file.tsv +// prefix: a/b/ +// output: test2/c/example_file.tsv +func parseBatch(prefix string, key string) string { + index := strings.LastIndex(prefix, s3Separator) + if index < 0 || !strings.HasPrefix(key, prefix) { + return key + } + trimmedKey := key[index:] + trimmedKey = strings.TrimPrefix(trimmedKey, s3Separator) + return trimmedKey +} + +// parseNonBatch parses keys for non-wildcard operations. +// It subtracts prefix part from the key and gets first +// path. +// +// Example: +// +// key: a/b/c/d +// prefix: a/b +// output: c/ +func parseNonBatch(prefix string, key string) string { + if key == prefix || !strings.HasPrefix(key, prefix) { + return key + } + parsedKey := strings.TrimSuffix(key, s3Separator) + if loc := strings.LastIndex(parsedKey, s3Separator); loc < len(prefix) { + if loc < 0 { + return key + } + parsedKey = key[loc:] + return strings.TrimPrefix(parsedKey, s3Separator) + } + parsedKey = strings.TrimPrefix(key, prefix) + parsedKey = strings.TrimPrefix(parsedKey, s3Separator) + index := strings.Index(parsedKey, s3Separator) + 1 + if index <= 0 || index >= len(parsedKey) { + return parsedKey + } + trimmedKey := parsedKey[:index] + return trimmedKey +} + +// hasGlobCharacter reports whether if a string contains any wildcard chars. +func hasGlobCharacter(s string) bool { + return strings.ContainsAny(s, globCharacters) +} + +func (u *URL) EscapedPath() string { + sourceKey := strings.TrimPrefix(u.String(), "s3://") + sourceKeyElements := strings.Split(sourceKey, "/") + for i, element := range sourceKeyElements { + sourceKeyElements[i] = url.QueryEscape(element) + } + return strings.Join(sourceKeyElements, "/") +} + +// check if all fields of URL equal +func (u *URL) deepEqual(url *URL) bool { + if url.Absolute() != u.Absolute() || + url.Type != u.Type || + url.Scheme != u.Scheme || + url.Bucket != u.Bucket || + url.Delimiter != u.Delimiter || + url.Path != u.Path || + url.Prefix != u.Prefix || + url.relativePath != u.relativePath || + url.filter != u.filter { + return false + } + return true +} diff --git a/vendor/github.com/peak/s5cmd/v2/strutil/strutil.go b/vendor/github.com/peak/s5cmd/v2/strutil/strutil.go new file mode 100644 index 000000000..49c66183d --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/strutil/strutil.go @@ -0,0 +1,73 @@ +package strutil + +import ( + "encoding/json" + "fmt" + "regexp" + "strconv" + "strings" +) + +var humanDivisors = [...]struct { + suffix string + div int64 +}{ + {"K", 1 << 10}, + {"M", 1 << 20}, + {"G", 1 << 30}, + {"T", 1 << 40}, +} + +// HumanizeBytes takes a byte-size and returns a human-readable string +func HumanizeBytes(b int64) string { + var ( + suffix string + div int64 + ) + for _, f := range humanDivisors { + if b > f.div { + suffix = f.suffix + div = f.div + } + } + if suffix == "" { + return strconv.FormatInt(b, 10) + } + + return fmt.Sprintf("%.1f%s", float64(b)/float64(div), suffix) +} + +// JSON is a helper function for creating JSON-encoded strings. +func JSON(v interface{}) string { + bytes, _ := json.Marshal(v) + return string(bytes) +} + +// CapitalizeFirstRune converts first rune to uppercase, and converts rest of +// the string to lower case. +func CapitalizeFirstRune(str string) string { + if str == "" { + return str + } + runes := []rune(str) + first, rest := runes[0], runes[1:] + return strings.ToUpper(string(first)) + strings.ToLower(string(rest)) +} + +// AddNewLineFlag adds a flag that allows . to match new line character "\n". +// It assumes that the pattern does not have any flags. +func AddNewLineFlag(pattern string) string { + return "(?s)" + pattern +} + +// WildCardToRegexp converts a wildcarded expresiion to equivalent regular expression +func WildCardToRegexp(pattern string) string { + patternRegex := regexp.QuoteMeta(pattern) + patternRegex = strings.Replace(patternRegex, "\\?", ".", -1) + return strings.Replace(patternRegex, "\\*", ".*", -1) +} + +// MatchFromStartToEnd enforces that the regex will match the full string +func MatchFromStartToEnd(pattern string) string { + return "^" + pattern + "$" +} diff --git a/vendor/github.com/peak/s5cmd/v2/version/version.go b/vendor/github.com/peak/s5cmd/v2/version/version.go new file mode 100644 index 000000000..f538f3a44 --- /dev/null +++ b/vendor/github.com/peak/s5cmd/v2/version/version.go @@ -0,0 +1,21 @@ +package version + +import "strings" + +var ( + // Version represents the git tag of a particular release. + Version = "v0.0.0" + + // GitCommit represents git commit hash of a particular release. + GitCommit = "dev" +) + +// GetHumanVersion returns human readable version information. +func GetHumanVersion() string { + version := Version + if !strings.HasPrefix(version, "v") { + version = "v" + Version + } + + return version + "-" + GitCommit +} diff --git a/vendor/modules.txt b/vendor/modules.txt index bb1512904..267c21f3d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -63,8 +63,6 @@ github.com/cheggaaa/pb/v3/termutil # github.com/cpuguy83/go-md2man/v2 v2.0.2 ## explicit; go 1.11 github.com/cpuguy83/go-md2man/v2/md2man -# github.com/davecgh/go-spew v1.1.1 -## explicit # github.com/fatih/color v1.15.0 ## explicit; go 1.17 github.com/fatih/color @@ -104,8 +102,8 @@ github.com/karrick/godirwalk # github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 ## explicit github.com/kballard/go-shellquote -# github.com/kr/pretty v0.3.0 -## explicit; go 1.12 +# github.com/kr/text v0.2.0 +## explicit # github.com/lanrat/extsort v1.0.0 ## explicit; go 1.13 github.com/lanrat/extsort @@ -120,12 +118,28 @@ github.com/mattn/go-isatty # github.com/mattn/go-runewidth v0.0.14 ## explicit; go 1.9 github.com/mattn/go-runewidth +# github.com/peak/s5cmd/v2 v2.0.0-00010101000000-000000000000 => ./ +## explicit; go 1.19 +github.com/peak/s5cmd/v2/command +github.com/peak/s5cmd/v2/error +github.com/peak/s5cmd/v2/log +github.com/peak/s5cmd/v2/log/stat +github.com/peak/s5cmd/v2/orderedwriter +github.com/peak/s5cmd/v2/parallel +github.com/peak/s5cmd/v2/parallel/fdlimit +github.com/peak/s5cmd/v2/progressbar +github.com/peak/s5cmd/v2/storage +github.com/peak/s5cmd/v2/storage/url +github.com/peak/s5cmd/v2/strutil +github.com/peak/s5cmd/v2/version # github.com/pkg/errors v0.9.1 ## explicit github.com/pkg/errors # github.com/rivo/uniseg v0.2.0 ## explicit; go 1.12 github.com/rivo/uniseg +# github.com/rogpeppe/go-internal v1.6.1 +## explicit; go 1.11 # github.com/russross/blackfriday/v2 v2.1.0 ## explicit github.com/russross/blackfriday/v2 @@ -158,8 +172,6 @@ golang.org/x/sys/windows # golang.org/x/tools v0.8.0 ## explicit; go 1.18 golang.org/x/tools/cover -# gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 -## explicit # gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce ## explicit gopkg.in/mgo.v2/bson @@ -176,3 +188,4 @@ gotest.tools/v3/internal/cleanup gotest.tools/v3/internal/difflib gotest.tools/v3/internal/format gotest.tools/v3/internal/source +# github.com/peak/s5cmd/v2 => ./