diff --git a/ChangeLog.md b/ChangeLog.md index f968ced2d..c4d779137 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,6 +1,30 @@ # Change Log +## Version 10.3.3 + +### New features + +1. `azcopy list` is now supported on Azure Files and ADLS Gen 2, in addition to Blob Storage. +1. The `--exclude-path` flag is now supported in the `sync` command. +1. Added new environment variable `AZCOPY_USER_AGENT_PREFIX` to allow a prefix to be appended to the user agent strings. + +### Bug fixes + +1. Content properties (such as Content-Encoding and Cache-Control) are now included when syncing Blob -> Blob and Azure + Files -> Azure Files +1. Custom metadata is now included when syncing Blob -> Blob and Azure Files -> Azure Files +1. The `azcopy list` command no longer repeats parts of its output. (Previously it would sometimes repeat itself and show the same blob multiple times in the output.) +1. The `--aad-endpoint` parameter is now visible, instead of hidden. It allows use of Azure Active Directory + authentication in national clouds (e.g. Azure China). +1. On Windows, AzCopy now caches information about which proxy server should be used, instead of looking it up every + time. This significantly reduces CPU + usage when transferring many small files. It also solves a rare bug when transfers got permanently "stuck" with + one uncompleted file. +1. When uploading to a write-only destination, there is now a clearer error message when the built-in file length check + fails. The message says how to fix the problem using `--check-length=false`. +1. Size checks on managed disk imports are now clearer, and all run at the start of the import process instead of the end. + ## Version 10.3.2 ### Bug fixes diff --git a/cmd/copyEnumeratorInit.go b/cmd/copyEnumeratorInit.go index 55986d73b..48c816532 100644 --- a/cmd/copyEnumeratorInit.go +++ b/cmd/copyEnumeratorInit.go @@ -230,20 +230,11 @@ func (cca *cookedCopyCmdArgs) initEnumerator(jobPartOrder common.CopyJobPartOrde srcRelPath := cca.makeEscapedRelativePath(true, isDestDir, object) dstRelPath := cca.makeEscapedRelativePath(false, isDestDir, object) - transfer := common.NewCopyTransfer( + transfer := object.ToNewCopyTransfer( cca.autoDecompress && cca.fromTo.IsDownload(), srcRelPath, dstRelPath, - object.lastModifiedTime, - object.size, - object.contentType, object.contentEncoding, object.contentDisposition, object.contentLanguage, object.cacheControl, - object.md5, - object.Metadata, - object.blobType, - azblob.AccessTierNone) // access tier is assigned conditionally - - if cca.s2sPreserveAccessTier { - transfer.BlobTier = object.blobAccessTier - } + cca.s2sPreserveAccessTier, + ) return addTransfer(&jobPartOrder, transfer, cca) } diff --git a/cmd/credentialUtil.go b/cmd/credentialUtil.go index 6d2b9b06c..1e30202db 100644 --- a/cmd/credentialUtil.go +++ b/cmd/credentialUtil.go @@ -367,7 +367,7 @@ func createBlobPipeline(ctx context.Context, credInfo common.CredentialInfo) (pi credential, azblob.PipelineOptions{ Telemetry: azblob.TelemetryOptions{ - Value: common.UserAgent, + Value: glcm.AddUserAgentPrefix(common.UserAgent), }, }, ste.XferRetryOptions{ @@ -402,7 +402,7 @@ func createBlobFSPipeline(ctx context.Context, credInfo common.CredentialInfo) ( MaxRetryDelay: ste.UploadMaxRetryDelay, }, Telemetry: azbfs.TelemetryOptions{ - Value: common.UserAgent, + Value: glcm.AddUserAgentPrefix(common.UserAgent), }, }), nil } @@ -420,7 +420,7 @@ func createFilePipeline(ctx context.Context, credInfo common.CredentialInfo) (pi MaxRetryDelay: ste.UploadMaxRetryDelay, }, Telemetry: azfile.TelemetryOptions{ - Value: common.UserAgent, + Value: glcm.AddUserAgentPrefix(common.UserAgent), }, }), nil } diff --git a/cmd/helpMessages.go b/cmd/helpMessages.go index 4bba9e2f9..a560d1d9a 100644 --- a/cmd/helpMessages.go +++ b/cmd/helpMessages.go @@ -198,7 +198,7 @@ const cleanJobsCmdExample = " azcopy jobs clean --with-status=completed" // ===================================== LIST COMMAND ===================================== // const listCmdShortDescription = "List the entities in a given resource" -const listCmdLongDescription = `List the entities in a given resource. In the current release, only Blob containers are supported.` +const listCmdLongDescription = `List the entities in a given resource. Blob, Files, and ADLS Gen 2 containers, folders, and accounts are supported.` const listCmdExample = "azcopy list [containerURL]" diff --git a/cmd/list.go b/cmd/list.go index 6e1c1774d..b9701d75d 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -24,14 +24,12 @@ import ( "context" "errors" "fmt" - "net/url" "strconv" - "strings" + + "github.com/spf13/cobra" "github.com/Azure/azure-storage-azcopy/common" "github.com/Azure/azure-storage-azcopy/ste" - "github.com/Azure/azure-storage-blob-go/azblob" - "github.com/spf13/cobra" ) func init() { @@ -59,11 +57,12 @@ func init() { // the expected argument in input is the container sas / or path of virtual directory in the container. // verifying the location type location := inferArgumentLocation(sourcePath) - if location != location.Blob() { + // Only support listing for Azure locations + if location != location.Blob() && location != location.File() && location != location.BlobFS() { glcm.Error("invalid path passed for listing. given source is of type " + location.String() + " while expect is container / container path ") } - err := HandleListContainerCommand(sourcePath) + err := HandleListContainerCommand(sourcePath, location) if err == nil { glcm.Exit(nil, common.EExitCode.Success()) } else { @@ -89,17 +88,29 @@ type ListParameters struct { var parameters = ListParameters{} // HandleListContainerCommand handles the list container command -func HandleListContainerCommand(source string) (err error) { +func HandleListContainerCommand(source string, location common.Location) (err error) { // TODO: Temporarily use context.TODO(), this should be replaced with a root context from main. ctx := context.WithValue(context.TODO(), ste.ServiceAPIVersionOverride, ste.DefaultServiceApiVersion) credentialInfo := common.CredentialInfo{} - // Use source as resource URL, and it can be public access resource URL. - if credentialInfo.CredentialType, _, err = getBlobCredentialType(ctx, source, true, false); err != nil { + + base, token, err := SplitAuthTokenFromResource(source, location) + if err != nil { + return err + } + + level, err := determineLocationLevel(source, location, true) + + if err != nil { return err + } + + // Treat our check as a destination because the isSource flag was designed for S2S transfers. + if credentialInfo, _, err = getCredentialInfoForLocation(ctx, location, base, token, false); err != nil { + return fmt.Errorf("failed to obtain credential info: %s", err.Error()) + } else if location == location.File() && token == "" { + return errors.New("azure files requires a SAS token for authentication") } else if credentialInfo.CredentialType == common.ECredentialType.OAuthToken() { - // Message user that they are using Oauth token for authentication, - // in case of silently using cached token without consciousness。 glcm.Info("List is using OAuth token for authentication.") uotm := GetUserOAuthTokenManagerInstance() @@ -110,85 +121,56 @@ func HandleListContainerCommand(source string) (err error) { } } - // Create Pipeline which will be used further in the blob operations. - p, err := createBlobPipeline(ctx, credentialInfo) - if err != nil { - return err - } + traverser, err := initResourceTraverser(source, location, &ctx, &credentialInfo, nil, nil, true, false, func() {}) - // attempt to parse the source url - sourceURL, err := url.Parse(source) if err != nil { - return errors.New("cannot parse source URL") + return fmt.Errorf("failed to initialize traverser: %s", err.Error()) } - util := copyHandlerUtil{} // TODO: util could be further refactored - // get the container url to be used for listing - literalContainerURL := util.getContainerURLFromString(*sourceURL) - containerURL := azblob.NewContainerURL(literalContainerURL, p) + var fileCount int64 = 0 + var sizeCount int64 = 0 - // get the search prefix to query the service - searchPrefix := "" - // if the source is container url, then searchPrefix is empty - if !util.urlIsContainerOrVirtualDirectory(sourceURL) { - searchPrefix = util.getBlobNameFromURL(sourceURL.Path) - } - if len(searchPrefix) > 0 { - // if the user did not specify / at the end of the virtual directory, add it before doing the prefix search - if strings.LastIndex(searchPrefix, "/") != len(searchPrefix)-1 { - searchPrefix += "/" - } - } + processor := func(object storedObject) error { + objectSummary := object.relativePath + "; Content Length: " - summary := common.ListContainerResponse{} + if level == level.Service() { + objectSummary = object.containerName + "/" + objectSummary + } - fileCount := 0 - sizeCount := 0 + if parameters.MachineReadable { + objectSummary += strconv.Itoa(int(object.size)) + } else { + objectSummary += byteSizeToString(object.size) + } - // perform a list blob - for marker := (azblob.Marker{}); marker.NotDone(); { - // look for all blobs that start with the prefix - listBlob, err := containerURL.ListBlobsFlatSegment(ctx, marker, - azblob.ListBlobsSegmentOptions{Prefix: searchPrefix}) - if err != nil { - return fmt.Errorf("cannot list blobs for download. Failed with error %s", err.Error()) + if parameters.RunningTally { + fileCount++ + sizeCount += object.size } - // Process the blobs returned in this result segment (if the segment is empty, the loop body won't execute) - for _, blobInfo := range listBlob.Segment.BlobItems { - blobName := blobInfo.Name + "; Content Size: " + glcm.Info(objectSummary) - if parameters.MachineReadable { - blobName += strconv.Itoa(int(*blobInfo.Properties.ContentLength)) - } else { - blobName += byteSizeToString(*blobInfo.Properties.ContentLength) - } + // No need to strip away from the name as the traverser has already done so. + return nil + } - if parameters.RunningTally { - fileCount++ - sizeCount += int(*blobInfo.Properties.ContentLength) - } + err = traverser.traverse(nil, processor, nil) - if len(searchPrefix) > 0 { - // strip away search prefix from the blob name. - blobName = strings.Replace(blobName, searchPrefix, "", 1) - } - summary.Blobs = append(summary.Blobs, blobName) - } - marker = listBlob.NextMarker - printListContainerResponse(&summary) + if err != nil { + return fmt.Errorf("failed to traverse container: %s", err.Error()) + } - if parameters.RunningTally { - glcm.Info("") - glcm.Info("File count: " + strconv.Itoa(fileCount)) + if parameters.RunningTally { + glcm.Info("") + glcm.Info("File count: " + strconv.Itoa(int(fileCount))) - if parameters.MachineReadable { - glcm.Info("Total file size: " + strconv.Itoa(sizeCount)) - } else { - glcm.Info("Total file size: " + byteSizeToString(int64(sizeCount))) - } + if parameters.MachineReadable { + glcm.Info("Total file size: " + strconv.Itoa(int(sizeCount))) + } else { + glcm.Info("Total file size: " + byteSizeToString(sizeCount)) } } + return nil } @@ -222,7 +204,7 @@ func byteSizeToString(size int64) string { "GiB", "TiB", "PiB", - "EiB", //Let's face it, a file probably won't be more than 1000 exabytes in YEARS. (and int64 literally isn't large enough to handle too many exbibytes. 128 bit processors when) + "EiB", // Let's face it, a file, account, or container probably won't be more than 1000 exabytes in YEARS. (and int64 literally isn't large enough to handle too many exbibytes. 128 bit processors when) } unit := 0 floatSize := float64(size) diff --git a/cmd/login.go b/cmd/login.go index 2ff980795..75f9cc63a 100644 --- a/cmd/login.go +++ b/cmd/login.go @@ -63,7 +63,7 @@ func init() { rootCmd.AddCommand(lgCmd) lgCmd.PersistentFlags().StringVar(&loginCmdArgs.tenantID, "tenant-id", "", "The Azure Active Directory tenant ID to use for OAuth device interactive login.") - lgCmd.PersistentFlags().StringVar(&loginCmdArgs.aadEndpoint, "aad-endpoint", "", "The Azure Active Directory endpoint to use for OAuth user interactive login.") + lgCmd.PersistentFlags().StringVar(&loginCmdArgs.aadEndpoint, "aad-endpoint", "", "The Azure Active Directory endpoint to use. The default ("+common.DefaultActiveDirectoryEndpoint+") is correct for the public Azure cloud. Set this parameter when authenticating in a national cloud. Not needed for Managed Service Identity") // Use identity which aligns to Azure powershell and CLI. lgCmd.PersistentFlags().BoolVar(&loginCmdArgs.identity, "identity", false, "Log in using virtual machine's identity, also known as managed service identity (MSI).") // Use SPN certificate to log in. @@ -78,10 +78,6 @@ func init() { //login with SPN lgCmd.PersistentFlags().StringVar(&loginCmdArgs.applicationID, "application-id", "", "Application ID of user-assigned identity. Required for service principal auth.") lgCmd.PersistentFlags().StringVar(&loginCmdArgs.certPath, "certificate-path", "", "Path to certificate for SPN authentication. Required for certificate-based service principal auth.") - - // hide flags - // temporaily hide aad-endpoint and support Production environment only. - lgCmd.PersistentFlags().MarkHidden("aad-endpoint") } type loginCmdArgs struct { diff --git a/cmd/pathUtils.go b/cmd/pathUtils.go index 281ec0569..5409138b6 100644 --- a/cmd/pathUtils.go +++ b/cmd/pathUtils.go @@ -59,47 +59,24 @@ func determineLocationLevel(location string, locationType common.Location, sourc case common.ELocation.Blob(), common.ELocation.File(), - common.ELocation.BlobFS(): + common.ELocation.BlobFS(), + common.ELocation.S3(): URL, err := url.Parse(location) if err != nil { return ELocationLevel.Service(), err } - // blobURLParts is the same format and doesn't care about endpoint - bURL := azblob.NewBlobURLParts(*URL) + // GenericURLParts determines the correct resource URL parts to make use of + bURL := common.NewGenericResourceURLParts(*URL, locationType) - if strings.Contains(bURL.ContainerName, "*") && bURL.BlobName != "" { + if strings.Contains(bURL.GetContainerName(), "*") && bURL.GetObjectName() != "" { return ELocationLevel.Service(), errors.New("can't use a wildcarded container name and specific blob name in combination") } - if bURL.BlobName != "" { - return ELocationLevel.Object(), nil - } else if bURL.ContainerName != "" && !strings.Contains(bURL.ContainerName, "*") { - return ELocationLevel.Container(), nil - } else { - return ELocationLevel.Service(), nil - } - case common.ELocation.S3(): - URL, err := url.Parse(location) - - if err != nil { - return ELocationLevel.Service(), nil - } - - s3URL, err := common.NewS3URLParts(*URL) - - if err != nil { - return ELocationLevel.Service(), nil - } - - if strings.Contains(s3URL.BucketName, "*") && s3URL.ObjectKey != "" { - return ELocationLevel.Service(), errors.New("can't use a wildcarded container name and specific object name in combination") - } - - if s3URL.ObjectKey != "" { + if bURL.GetObjectName() != "" { return ELocationLevel.Object(), nil - } else if s3URL.BucketName != "" && !strings.Contains(s3URL.BucketName, "*") { + } else if bURL.GetContainerName() != "" && !strings.Contains(bURL.GetContainerName(), "*") { return ELocationLevel.Container(), nil } else { return ELocationLevel.Service(), nil diff --git a/cmd/removeProcessor.go b/cmd/removeProcessor.go index 8e4f9341c..5e19544b0 100644 --- a/cmd/removeProcessor.go +++ b/cmd/removeProcessor.go @@ -53,5 +53,5 @@ func newRemoveTransferProcessor(cca *cookedCopyCmdArgs, numOfTransfersPerPart in // note that the source and destination, along with the template are given to the generic processor's constructor // this means that given an object with a relative path, this processor already knows how to schedule the right kind of transfers return newCopyTransferProcessor(copyJobTemplate, numOfTransfersPerPart, cca.source, cca.destination, - shouldEncodeSource, false, reportFirstPart, reportFinalPart) + shouldEncodeSource, false, reportFirstPart, reportFinalPart, false) } diff --git a/cmd/sync.go b/cmd/sync.go index b0de51b40..f4fe4fc6c 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -45,6 +45,7 @@ type rawSyncCmdArgs struct { logVerbosity string include string exclude string + excludePath string includeFileAttributes string excludeFileAttributes string legacyInclude string // for warning messages only @@ -57,6 +58,8 @@ type rawSyncCmdArgs struct { // which do not exists at source. With this flag turned on/off, users will not be asked for permission. // otherwise the user is prompted to make a decision deleteDestination string + + s2sPreserveAccessTier bool } func (raw *rawSyncCmdArgs) parsePatterns(pattern string) (cookedPatterns []string) { @@ -160,8 +163,9 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { } // parse the filter patterns - cooked.include = raw.parsePatterns(raw.include) - cooked.exclude = raw.parsePatterns(raw.exclude) + cooked.includePatterns = raw.parsePatterns(raw.include) + cooked.excludePatterns = raw.parsePatterns(raw.exclude) + cooked.excludePaths = raw.parsePatterns(raw.excludePath) // parse the attribute filter patterns cooked.includeFileAttributes = raw.parsePatterns(raw.includeFileAttributes) @@ -185,6 +189,10 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) { return cooked, err } + if cooked.fromTo.IsS2S() { + cooked.preserveAccessTier = raw.s2sPreserveAccessTier + } + return cooked, nil } @@ -216,8 +224,9 @@ type cookedSyncCmdArgs struct { // filters recursive bool followSymlinks bool - include []string - exclude []string + includePatterns []string + excludePatterns []string + excludePaths []string includeFileAttributes []string excludeFileAttributes []string @@ -252,6 +261,8 @@ type cookedSyncCmdArgs struct { // which do not exists at source. With this flag turned on/off, users will not be asked for permission. // otherwise the user is prompted to make a decision deleteDestination common.DeleteDestination + + preserveAccessTier bool } func (cca *cookedSyncCmdArgs) incrementDeletionCount() { @@ -554,6 +565,8 @@ func init() { syncCmd.PersistentFlags().Float64Var(&raw.blockSizeMB, "block-size-mb", 0, "Use this block size (specified in MiB) when uploading to Azure Storage or downloading from Azure Storage. Default is automatically calculated based on file size. Decimal fractions are allowed (For example: 0.25).") syncCmd.PersistentFlags().StringVar(&raw.include, "include-pattern", "", "Include only files where the name matches the pattern list. For example: *.jpg;*.pdf;exactName") syncCmd.PersistentFlags().StringVar(&raw.exclude, "exclude-pattern", "", "Exclude files where the name matches the pattern list. For example: *.jpg;*.pdf;exactName") + syncCmd.PersistentFlags().StringVar(&raw.excludePath, "exclude-path", "", "Exclude these paths when comparing the source against the destination. "+ + "This option does not support wildcard characters (*). Checks relative path prefix(For example: myFolder;myFolder/subDirName/file.pdf).") syncCmd.PersistentFlags().StringVar(&raw.includeFileAttributes, "include-attributes", "", "(Windows only) Include only files whose attributes match the attribute list. For example: A;S;R") syncCmd.PersistentFlags().StringVar(&raw.excludeFileAttributes, "exclude-attributes", "", "(Windows only) Exclude files whose attributes match the attribute list. For example: A;S;R") syncCmd.PersistentFlags().StringVar(&raw.logVerbosity, "log-level", "INFO", "Define the log verbosity for the log file, available levels: INFO(all requests and responses), WARNING(slow responses), ERROR(only failed requests), and NONE(no output logs). (default INFO).") @@ -561,6 +574,9 @@ func init() { "If set to prompt, the user will be asked a question before scheduling files and blobs for deletion. (default 'false').") syncCmd.PersistentFlags().BoolVar(&raw.putMd5, "put-md5", false, "Create an MD5 hash of each file, and save the hash as the Content-MD5 property of the destination blob or file. (By default the hash is NOT created.) Only available when uploading.") syncCmd.PersistentFlags().StringVar(&raw.md5ValidationOption, "check-md5", common.DefaultHashValidationOption.String(), "Specifies how strictly MD5 hashes should be validated when downloading. This option is only available when downloading. Available values include: NoCheck, LogOnly, FailIfDifferent, FailIfDifferentOrMissing. (default 'FailIfDifferent').") + syncCmd.PersistentFlags().BoolVar(&raw.s2sPreserveAccessTier, "s2s-preserve-access-tier", true, "Preserve access tier during service to service copy. "+ + "Please refer to [Azure Blob storage: hot, cool, and archive access tiers](https://docs.microsoft.com/azure/storage/blobs/storage-blob-storage-tiers) to ensure destination storage account supports setting access tier. "+ + "In the cases that setting access tier is not supported, please use s2sPreserveAccessTier=false to bypass copying access tier. (default true). ") // temp, to assist users with change in param names, by providing a clearer message when these obsolete ones are accidentally used syncCmd.PersistentFlags().StringVar(&raw.legacyInclude, "include", "", "Legacy include param. DO NOT USE") @@ -571,5 +587,5 @@ func init() { // TODO follow sym link is not implemented, clarify behavior first //syncCmd.PersistentFlags().BoolVar(&raw.followSymlinks, "follow-symlinks", false, "follow symbolic links when performing sync from local file system.") - // TODO sync does not support any BlobAttributes, this functionality should be added + // TODO sync does not support all BlobAttributes on the command line, this functionality should be added } diff --git a/cmd/syncEnumerator.go b/cmd/syncEnumerator.go index c130cd4d2..4674b3d62 100644 --- a/cmd/syncEnumerator.go +++ b/cmd/syncEnumerator.go @@ -76,13 +76,14 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s // Note: includeFilters and includeAttrFilters are ANDed // They must both pass to get the file included // Same rule applies to excludeFilters and excludeAttrFilters - filters := buildIncludeFilters(cca.include) + filters := buildIncludeFilters(cca.includePatterns) if cca.fromTo.From() == common.ELocation.Local() { includeAttrFilters := buildAttrFilters(cca.includeFileAttributes, src, true) filters = append(filters, includeAttrFilters...) } - filters = append(filters, buildExcludeFilters(cca.exclude, false)...) + filters = append(filters, buildExcludeFilters(cca.excludePatterns, false)...) + filters = append(filters, buildExcludeFilters(cca.excludePaths, true)...) if cca.fromTo.From() == common.ELocation.Local() { excludeAttrFilters := buildAttrFilters(cca.excludeFileAttributes, src, false) filters = append(filters, excludeAttrFilters...) diff --git a/cmd/syncProcessor.go b/cmd/syncProcessor.go index ead6f7119..76183aa4e 100644 --- a/cmd/syncProcessor.go +++ b/cmd/syncProcessor.go @@ -72,7 +72,7 @@ func newSyncTransferProcessor(cca *cookedSyncCmdArgs, numOfTransfersPerPart int) // note that the source and destination, along with the template are given to the generic processor's constructor // this means that given an object with a relative path, this processor already knows how to schedule the right kind of transfers return newCopyTransferProcessor(copyJobTemplate, numOfTransfersPerPart, cca.source, cca.destination, - shouldEncodeSource, shouldEncodeDestination, reportFirstPart, reportFinalPart) + shouldEncodeSource, shouldEncodeDestination, reportFirstPart, reportFinalPart, cca.preserveAccessTier) } // base for delete processors targeting different resources diff --git a/cmd/zc_enumerator.go b/cmd/zc_enumerator.go index 3a9065f63..7358593cc 100644 --- a/cmd/zc_enumerator.go +++ b/cmd/zc_enumerator.go @@ -81,8 +81,60 @@ const ( blobTypeNA = azblob.BlobNone // some things, e.g. local files, aren't blobs so they don't have their own blob type so we use this "not applicable" constant ) -func (storedObject *storedObject) isMoreRecentThan(storedObject2 storedObject) bool { - return storedObject.lastModifiedTime.After(storedObject2.lastModifiedTime) +func (s *storedObject) isMoreRecentThan(storedObject2 storedObject) bool { + return s.lastModifiedTime.After(storedObject2.lastModifiedTime) +} + +func (s *storedObject) ToNewCopyTransfer( + steWillAutoDecompress bool, + Source string, + Destination string, + preserveBlobTier bool) common.CopyTransfer { + + if steWillAutoDecompress { + Destination = stripCompressionExtension(Destination, s.contentEncoding) + } + + t := common.CopyTransfer{ + Source: Source, + Destination: Destination, + LastModifiedTime: s.lastModifiedTime, + SourceSize: s.size, + ContentType: s.contentType, + ContentEncoding: s.contentEncoding, + ContentDisposition: s.contentDisposition, + ContentLanguage: s.contentLanguage, + CacheControl: s.cacheControl, + ContentMD5: s.md5, + Metadata: s.Metadata, + BlobType: s.blobType, + // set this below, conditionally: BlobTier + } + + if preserveBlobTier { + t.BlobTier = s.blobAccessTier + } + + return t +} + +// stripCompressionExtension strips any file extension that corresponds to the +// compression indicated by the encoding type. +// Why remove this extension here, at enumeration time, instead of just doing it +// in the STE when we are about to save the file? +// Because by doing it here we get the accurate name in things that +// directly read the Plan files, like the jobs show command +func stripCompressionExtension(dest string, contentEncoding string) string { + // Ignore error getting compression type. We can't easily report it now, and we don't need to know about the error + // cases here when deciding renaming. STE will log error on the error cases + ct, _ := common.GetCompressionType(contentEncoding) + ext := strings.ToLower(filepath.Ext(dest)) + stripGzip := ct == common.ECompressionType.GZip() && (ext == ".gz" || ext == ".gzip") + stripZlib := ct == common.ECompressionType.ZLib() && ext == ".zz" // "standard" extension for zlib-wrapped files, according to pigz doc and Stack Overflow + if stripGzip || stripZlib { + return strings.TrimSuffix(dest, filepath.Ext(dest)) + } + return dest } // a constructor is used so that in case the storedObject has to change, the callers would get a compilation error diff --git a/cmd/zc_filter.go b/cmd/zc_filter.go index 92a42b3f0..370f3d555 100644 --- a/cmd/zc_filter.go +++ b/cmd/zc_filter.go @@ -54,7 +54,7 @@ func (f *excludeBlobTypeFilter) doesPass(object storedObject) bool { type excludeFilter struct { pattern string - targetsPath bool // TODO: include targetsPath in sync + targetsPath bool } func (f *excludeFilter) doesSupportThisOS() (msg string, supported bool) { diff --git a/cmd/zc_processor.go b/cmd/zc_processor.go index e269b25d0..344fea110 100644 --- a/cmd/zc_processor.go +++ b/cmd/zc_processor.go @@ -42,11 +42,13 @@ type copyTransferProcessor struct { // handles for progress tracking reportFirstPartDispatched func(jobStarted bool) reportFinalPartDispatched func() + + preserveAccessTier bool } func newCopyTransferProcessor(copyJobTemplate *common.CopyJobPartOrderRequest, numOfTransfersPerPart int, source string, destination string, shouldEscapeSourceObjectName bool, shouldEscapeDestinationObjectName bool, - reportFirstPartDispatched func(bool), reportFinalPartDispatched func()) *copyTransferProcessor { + reportFirstPartDispatched func(bool), reportFinalPartDispatched func(), preserveAccessTier bool) *copyTransferProcessor { return ©TransferProcessor{ numOfTransfersPerPart: numOfTransfersPerPart, copyJobTemplate: copyJobTemplate, @@ -56,6 +58,7 @@ func newCopyTransferProcessor(copyJobTemplate *common.CopyJobPartOrderRequest, n shouldEscapeDestinationObjectName: shouldEscapeDestinationObjectName, reportFirstPartDispatched: reportFirstPartDispatched, reportFinalPartDispatched: reportFinalPartDispatched, + preserveAccessTier: preserveAccessTier, } } @@ -75,15 +78,13 @@ func (s *copyTransferProcessor) scheduleCopyTransfer(storedObject storedObject) // only append the transfer after we've checked and dispatched a part // so that there is at least one transfer for the final part - s.copyJobTemplate.Transfers = append(s.copyJobTemplate.Transfers, common.CopyTransfer{ - Source: s.escapeIfNecessary(storedObject.relativePath, s.shouldEscapeSourceObjectName), - Destination: s.escapeIfNecessary(storedObject.relativePath, s.shouldEscapeDestinationObjectName), - SourceSize: storedObject.size, - LastModifiedTime: storedObject.lastModifiedTime, - ContentMD5: storedObject.md5, - BlobType: storedObject.blobType, - ContentEncoding: storedObject.contentEncoding, - }) + s.copyJobTemplate.Transfers = append(s.copyJobTemplate.Transfers, storedObject.ToNewCopyTransfer( + false, // sync has no --decompress option + s.escapeIfNecessary(storedObject.relativePath, s.shouldEscapeSourceObjectName), + s.escapeIfNecessary(storedObject.relativePath, s.shouldEscapeDestinationObjectName), + s.preserveAccessTier, + )) + return nil } diff --git a/cmd/zt_generic_processor_test.go b/cmd/zt_generic_processor_test.go index c676eee1f..2cb87d999 100644 --- a/cmd/zt_generic_processor_test.go +++ b/cmd/zt_generic_processor_test.go @@ -78,7 +78,7 @@ func (s *genericProcessorSuite) TestCopyTransferProcessorMultipleFiles(c *chk.C) for _, numOfParts := range []int{1, 3} { numOfTransfersPerPart := len(sampleObjects) / numOfParts copyProcessor := newCopyTransferProcessor(processorTestSuiteHelper{}.getCopyJobTemplate(), numOfTransfersPerPart, - containerURL.String(), dstDirName, false, false, nil, nil) + containerURL.String(), dstDirName, false, false, nil, nil, false) // go through the objects and make sure they are processed without error for _, storedObject := range sampleObjects { @@ -125,7 +125,7 @@ func (s *genericProcessorSuite) TestCopyTransferProcessorSingleFile(c *chk.C) { // set up the processor blobURL := containerURL.NewBlockBlobURL(blobList[0]).String() copyProcessor := newCopyTransferProcessor(processorTestSuiteHelper{}.getCopyJobTemplate(), 2, - blobURL, filepath.Join(dstDirName, dstFileName), false, false, nil, nil) + blobURL, filepath.Join(dstDirName, dstFileName), false, false, nil, nil, false) // exercise the copy transfer processor storedObject := newStoredObject(noPreProccessor, blobList[0], "", time.Now(), 0, nil, blobTypeNA, "") diff --git a/cmd/zt_interceptors_for_test.go b/cmd/zt_interceptors_for_test.go index 391988d18..b777e79cd 100644 --- a/cmd/zt_interceptors_for_test.go +++ b/cmd/zt_interceptors_for_test.go @@ -125,6 +125,9 @@ func (*mockedLifecycleManager) GetEnvironmentVariable(env common.EnvironmentVari func (*mockedLifecycleManager) SetOutputFormat(common.OutputFormat) {} func (*mockedLifecycleManager) EnableInputWatcher() {} func (*mockedLifecycleManager) EnableCancelFromStdIn() {} +func (*mockedLifecycleManager) AddUserAgentPrefix(userAgent string) string { + return userAgent +} type dummyProcessor struct { record []storedObject diff --git a/cmd/zt_sync_blob_blob_test.go b/cmd/zt_sync_blob_blob_test.go index 1b6250e43..9b308ddd7 100644 --- a/cmd/zt_sync_blob_blob_test.go +++ b/cmd/zt_sync_blob_blob_test.go @@ -216,7 +216,7 @@ func (s *cmdIntegrationSuite) TestSyncS2SWithMismatchedDestination(c *chk.C) { } // include flag limits the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncS2SWithIncludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncS2SWithIncludePatternFlag(c *chk.C) { bsu := getBSU() srcContainerURL, srcContainerName := createNewContainer(c, bsu) dstContainerURL, dstContainerName := createNewContainer(c, bsu) @@ -251,7 +251,7 @@ func (s *cmdIntegrationSuite) TestSyncS2SWithIncludeFlag(c *chk.C) { } // exclude flag limits the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncS2SWithExcludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncS2SWithExcludePatternFlag(c *chk.C) { bsu := getBSU() srcContainerURL, srcContainerName := createNewContainer(c, bsu) dstContainerURL, dstContainerName := createNewContainer(c, bsu) @@ -286,7 +286,7 @@ func (s *cmdIntegrationSuite) TestSyncS2SWithExcludeFlag(c *chk.C) { } // include and exclude flag can work together to limit the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncS2SWithIncludeAndExcludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncS2SWithIncludeAndExcludePatternFlag(c *chk.C) { bsu := getBSU() srcContainerURL, srcContainerName := createNewContainer(c, bsu) dstContainerURL, dstContainerName := createNewContainer(c, bsu) @@ -327,6 +327,59 @@ func (s *cmdIntegrationSuite) TestSyncS2SWithIncludeAndExcludeFlag(c *chk.C) { }) } +// a specific path is avoided in the comparison +func (s *cmdIntegrationSuite) TestSyncS2SWithExcludePathFlag(c *chk.C) { + bsu := getBSU() + srcContainerURL, srcContainerName := createNewContainer(c, bsu) + dstContainerURL, dstContainerName := createNewContainer(c, bsu) + defer deleteContainer(c, srcContainerURL) + defer deleteContainer(c, dstContainerURL) + + // set up the source container with numerous blobs + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlob(c, srcContainerURL, "") + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to exclude + blobsToExclude := []string{"excludeSub/notGood.pdf", "excludeSub/lame.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromList(c, srcContainerURL, blobsToExclude, blockBlobDefaultData) + excludeString := "excludeSub;exactName" + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + srcContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, srcContainerName) + dstContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, dstContainerName) + raw := getDefaultSyncRawInput(srcContainerURLWithSAS.String(), dstContainerURLWithSAS.String()) + raw.excludePath = excludeString + + // make sure the list doesn't include the blobs specified by the exclude flag + runSyncAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateS2SSyncTransfersAreScheduled(c, "", "", blobList, mockedRPC) + }) + + // now set up the destination with the blobs to be excluded, and make sure they are not touched + scenarioHelper{}.generateBlobsFromList(c, dstContainerURL, blobsToExclude, blockBlobDefaultData) + + // re-create the ones at the source so that their lmts are newer + scenarioHelper{}.generateBlobsFromList(c, srcContainerURL, blobsToExclude, blockBlobDefaultData) + + mockedRPC.reset() + runSyncAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateS2SSyncTransfersAreScheduled(c, "", "", blobList, mockedRPC) + + // make sure the extra blobs were not touched + for _, blobName := range blobsToExclude { + exists := scenarioHelper{}.blobExists(dstContainerURL.NewBlobURL(blobName)) + c.Assert(exists, chk.Equals, true) + } + }) +} + // validate the bug fix for this scenario func (s *cmdIntegrationSuite) TestSyncS2SWithMissingDestination(c *chk.C) { bsu := getBSU() diff --git a/cmd/zt_sync_blob_local_test.go b/cmd/zt_sync_blob_local_test.go index 9cb108809..cb1acc8f8 100644 --- a/cmd/zt_sync_blob_local_test.go +++ b/cmd/zt_sync_blob_local_test.go @@ -219,7 +219,7 @@ func (s *cmdIntegrationSuite) TestSyncDownloadWithMismatchedDestination(c *chk.C } // include flag limits the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncDownloadWithIncludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncDownloadWithIncludePatternFlag(c *chk.C) { bsu := getBSU() // set up the container with numerous blobs @@ -255,7 +255,7 @@ func (s *cmdIntegrationSuite) TestSyncDownloadWithIncludeFlag(c *chk.C) { } // exclude flag limits the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncDownloadWithExcludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncDownloadWithExcludePatternFlag(c *chk.C) { bsu := getBSU() // set up the container with numerous blobs @@ -291,7 +291,7 @@ func (s *cmdIntegrationSuite) TestSyncDownloadWithExcludeFlag(c *chk.C) { } // include and exclude flag can work together to limit the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncDownloadWithIncludeAndExcludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncDownloadWithIncludeAndExcludePatternFlag(c *chk.C) { bsu := getBSU() // set up the container with numerous blobs @@ -333,6 +333,60 @@ func (s *cmdIntegrationSuite) TestSyncDownloadWithIncludeAndExcludeFlag(c *chk.C }) } +// a specific path is avoided in the comparison +func (s *cmdIntegrationSuite) TestSyncDownloadWithExcludePathFlag(c *chk.C) { + bsu := getBSU() + + // set up the container with numerous blobs + containerURL, containerName := createNewContainer(c, bsu) + blobList := scenarioHelper{}.generateCommonRemoteScenarioForBlob(c, containerURL, "") + defer deleteContainer(c, containerURL) + c.Assert(containerURL, chk.NotNil) + c.Assert(len(blobList), chk.Not(chk.Equals), 0) + + // add special blobs that we wish to exclude + blobsToExclude := []string{"excludeSub/notGood.pdf", "excludeSub/lame.jpeg", "exactName"} + scenarioHelper{}.generateBlobsFromList(c, containerURL, blobsToExclude, blockBlobDefaultData) + excludeString := "excludeSub;exactName" + + // set up the destination with an empty folder + dstDirName := scenarioHelper{}.generateLocalDirectory(c) + defer os.RemoveAll(dstDirName) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + raw := getDefaultSyncRawInput(rawContainerURLWithSAS.String(), dstDirName) + raw.excludePath = excludeString + + runSyncAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobList, mockedRPC) + }) + + // now set up the destination with the files to be excluded, and make sure they are not touched + scenarioHelper{}.generateLocalFilesFromList(c, dstDirName, blobsToExclude) + + // re-create the ones at the source so that their lmts are newer + scenarioHelper{}.generateBlobsFromList(c, containerURL, blobsToExclude, blockBlobDefaultData) + + mockedRPC.reset() + runSyncAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateDownloadTransfersAreScheduled(c, "", "", blobList, mockedRPC) + + // make sure the extra files were not touched + for _, blobName := range blobsToExclude { + _, err := os.Stat(filepath.Join(dstDirName, blobName)) + c.Assert(err, chk.IsNil) + } + }) +} + // validate the bug fix for this scenario func (s *cmdIntegrationSuite) TestSyncDownloadWithMissingDestination(c *chk.C) { bsu := getBSU() diff --git a/cmd/zt_sync_local_blob_test.go b/cmd/zt_sync_local_blob_test.go index ce19c4ce0..8c917360e 100644 --- a/cmd/zt_sync_local_blob_test.go +++ b/cmd/zt_sync_local_blob_test.go @@ -208,7 +208,7 @@ func (s *cmdIntegrationSuite) TestSyncUploadWithMismatchedDestination(c *chk.C) } // include flag limits the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncUploadWithIncludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncUploadWithIncludePatternFlag(c *chk.C) { bsu := getBSU() // set up the source with numerous files @@ -242,7 +242,7 @@ func (s *cmdIntegrationSuite) TestSyncUploadWithIncludeFlag(c *chk.C) { } // exclude flag limits the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncUploadWithExcludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncUploadWithExcludePatternFlag(c *chk.C) { bsu := getBSU() // set up the source with numerous files @@ -276,7 +276,7 @@ func (s *cmdIntegrationSuite) TestSyncUploadWithExcludeFlag(c *chk.C) { } // include and exclude flag can work together to limit the scope of source/destination comparison -func (s *cmdIntegrationSuite) TestSyncUploadWithIncludeAndExcludeFlag(c *chk.C) { +func (s *cmdIntegrationSuite) TestSyncUploadWithIncludeAndExcludePatternFlag(c *chk.C) { bsu := getBSU() // set up the source with numerous files @@ -316,6 +316,58 @@ func (s *cmdIntegrationSuite) TestSyncUploadWithIncludeAndExcludeFlag(c *chk.C) }) } +// a specific path is avoided in the comparison +func (s *cmdIntegrationSuite) TestSyncUploadWithExcludePathFlag(c *chk.C) { + bsu := getBSU() + + // set up the source with numerous files + srcDirName := scenarioHelper{}.generateLocalDirectory(c) + defer os.RemoveAll(srcDirName) + fileList := scenarioHelper{}.generateCommonRemoteScenarioForLocal(c, srcDirName, "") + + // add special files that we wish to exclude + filesToExclude := []string{"excludeSub/notGood.pdf", "excludeSub/lame.jpeg", "exactName"} + scenarioHelper{}.generateLocalFilesFromList(c, srcDirName, filesToExclude) + excludeString := "excludeSub;exactName" + + // set up the destination as an empty container + containerURL, containerName := createNewContainer(c, bsu) + defer deleteContainer(c, containerURL) + + // set up interceptor + mockedRPC := interceptor{} + Rpc = mockedRPC.intercept + mockedRPC.init() + + // construct the raw input to simulate user input + rawContainerURLWithSAS := scenarioHelper{}.getRawContainerURLWithSAS(c, containerName) + raw := getDefaultSyncRawInput(srcDirName, rawContainerURLWithSAS.String()) + raw.excludePath = excludeString + + runSyncAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateUploadTransfersAreScheduled(c, "", "", fileList, mockedRPC) + }) + + // now set up the destination with the blobs to be excluded, and make sure they are not touched + scenarioHelper{}.generateBlobsFromList(c, containerURL, filesToExclude, blockBlobDefaultData) + + // re-create the ones at the source so that their lmts are newer + scenarioHelper{}.generateLocalFilesFromList(c, srcDirName, filesToExclude) + + mockedRPC.reset() + runSyncAndVerify(c, raw, func(err error) { + c.Assert(err, chk.IsNil) + validateUploadTransfersAreScheduled(c, "", "", fileList, mockedRPC) + + // make sure the extra blobs were not touched + for _, blobName := range filesToExclude { + exists := scenarioHelper{}.blobExists(containerURL.NewBlobURL(blobName)) + c.Assert(exists, chk.Equals, true) + } + }) +} + // validate the bug fix for this scenario func (s *cmdIntegrationSuite) TestSyncUploadWithMissingDestination(c *chk.C) { bsu := getBSU() diff --git a/common/ProxyLookupCache.go b/common/ProxyLookupCache.go new file mode 100644 index 000000000..160150a64 --- /dev/null +++ b/common/ProxyLookupCache.go @@ -0,0 +1,166 @@ +// Copyright © Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package common + +import ( + "errors" + "github.com/mattn/go-ieproxy" + "net/http" + "net/url" + "strings" + "sync" + "time" +) + +type ProxyLookupFunc func(req *http.Request) (*url.URL, error) // signature of normal Transport.Proxy lookup + +var GlobalProxyLookup ProxyLookupFunc + +func init() { + c := &proxyLookupCache{ + m: &sync.Map{}, + refreshInterval: time.Minute * 5, // this is plenty, given the usual retry policies in AzCopy span a much longer time period in the total retry sequence + lookupTimeout: time.Minute, // equals the documented max allowable execution time for WinHttpGetProxyForUrl + lookupLock: &sync.Mutex{}, + lookupMethod: ieproxy.GetProxyFunc(), + } + + ev := GetLifecycleMgr().GetEnvironmentVariable(EEnvironmentVariable.CacheProxyLookup()) + if strings.ToLower(ev) == "true" { + GlobalProxyLookup = c.getProxy + } else { + // Use full URL in the lookup, and don't cache the result + // In theory, WinHttpGetProxyForUrl can take the path portion of the URL into account, + // to give a different proxy server depending on the path. That's only possible if + // there's a lookup done for each request. + // In practice, we expect very few users will need this. + GlobalProxyLookup = func(req *http.Request) (*url.URL, error) { + v := c.getProxyNoCache(req) + return v.url, v.err + } + } +} + +var ProxyLookupTimeoutError = errors.New("proxy lookup timed out") + +type proxyLookupResult struct { + url *url.URL + err error +} + +// proxyLookupCache caches the result of proxy lookups +// It's here, rather than contributed to mattn.go-ieproxy because it's not general-purpose. +// In particular, it assumes that there will not be lots and lots of different hosts in the cache, +// and so it has no ability to clear them or reduce the size of the cache, and it runs one +// permanent GR per cache entry. That assumption makes sense in AzCopy, but is not correct in general (e.g. +// if used in something with usage patterns like a web browser). +// TODO: should we one day find a better solution, so it can be contributed to mattn.go-ieproxy instead of done here? +// or maybe just the code in getProxyNoCache could be contributed there? +// TODO: consider that one consequence of the current lack of integration with mattn.go-ieproxy is that pipelines created by +// pipeline.NewPipeline don't use proxyLookupCache at all. However, that only affects the enumeration portion of our code, +// for Azure Files and ADLS Gen2. The issues that proxyLookupCache solves have not been reported there. The issues matter in +// the STE, where request counts are much higher (and there, we always do use this cache, because we make our own pipelines). +type proxyLookupCache struct { + m *sync.Map // is optimized for caches that only grow (as is the case here) + refreshInterval time.Duration + lookupTimeout time.Duration + lookupLock *sync.Mutex + lookupMethod ProxyLookupFunc +} + +func (c *proxyLookupCache) getProxyNoCache(req *http.Request) proxyLookupResult { + // Look up in background, so we can set timeout + // We do this since we have observed cases that don't return on Windows 10, presumably due to OS issue as described here: + // https://developercommunity.visualstudio.com/content/problem/282756/intermittent-and-indefinite-wcf-hang-blocking-requ.html + ch := make(chan proxyLookupResult) + go func() { + u, err := c.lookupMethod(req) // typically configured to call the result of ieproxy.GetProxyFunc + ch <- proxyLookupResult{u, err} + }() + + select { + case v := <-ch: + return v + case <-time.After(c.lookupTimeout): + return proxyLookupResult{nil, ProxyLookupTimeoutError} + // Note: in testing the the real app, this code path wasn't triggered. Not sure if its just luck that in many Win10 test runs, + // with hundreds of thousands of files each, this didn't trigger - even though the underlying issue did trigger + // on about 25% of similar test runs prior to this code being added. Maybe just luck, or maybe something about spinning up + // the separate goroutine here as "magically" prevented the issue. + } +} + +// getProxy returns the cached proxy, or looks it up if its not already cached +func (c *proxyLookupCache) getProxy(req *http.Request) (*url.URL, error) { + var value proxyLookupResult + var ok bool + + // key is the scheme+host portion of the URL + key := url.URL{ + Scheme: req.URL.Scheme, + User: req.URL.User, + Host: req.URL.Host, + } + + // if we've got it return it + if value, ok = c.mapLoad(key); ok { + return value.url, value.err + } + + // else, look it up with the (potentially expensive) lookup function + // Because the function is potentially expensive(ish) and because we only want to kick off one refresh GR per key, + // we use a lock here + c.lookupLock.Lock() + defer c.lookupLock.Unlock() + + if value, ok = c.mapLoad(key); !ok { + value = c.getProxyNoCache(req) + c.m.Store(key, value) + go c.endlessTimedRefresh(key, req) + } + + return value.url, value.err +} + +func (c *proxyLookupCache) mapLoad(key url.URL) (proxyLookupResult, bool) { + value, ok := c.m.Load(key) + if ok { + return value.(proxyLookupResult), true + } else { + return proxyLookupResult{}, false + } +} + +// timedRefresh runs an endless loop, refreshing the given key +// on a coarse-grained interval. This is in case something changes in the user's network +// configuration. E.g. they switch between wifi and wired if on a laptop. Shouldn't be common +// with AzCopy, but could happen so we may as well cater for it. +func (c *proxyLookupCache) endlessTimedRefresh(key url.URL, representativeFullRequest *http.Request) { + if c.refreshInterval == 0 { + return + } + + for { + time.Sleep(c.refreshInterval) + value := c.getProxyNoCache(representativeFullRequest) + c.m.Store(key, value) + } +} diff --git a/common/environment.go b/common/environment.go index a2897f97e..0db2b6bfa 100644 --- a/common/environment.go +++ b/common/environment.go @@ -46,6 +46,8 @@ var VisibleEnvironmentVariables = []EnvironmentVariable{ EEnvironmentVariable.ClientSecret(), EEnvironmentVariable.CertificatePassword(), EEnvironmentVariable.AutoTuneToCpu(), + EEnvironmentVariable.CacheProxyLookup(), + EEnvironmentVariable.UserAgentPrefix(), } var EEnvironmentVariable = EnvironmentVariable{} @@ -103,6 +105,14 @@ func (EnvironmentVariable) OptimizeSparsePageBlobTransfers() EnvironmentVariable } } +func (EnvironmentVariable) CacheProxyLookup() EnvironmentVariable { + return EnvironmentVariable{ + Name: "AZCOPY_CACHE_PROXY_LOOKUP", + Description: "By default AzCopy on Windows will cache proxy server lookups at hostname level (not taking URL path into account). Set to any other value than 'true' to disable the cache.", + DefaultValue: "true", + } +} + func (EnvironmentVariable) LogLocation() EnvironmentVariable { return EnvironmentVariable{ Name: "AZCOPY_LOG_LOCATION", @@ -194,3 +204,10 @@ func (EnvironmentVariable) DefaultServiceApiVersion() EnvironmentVariable { Description: "Overrides the service API version so that AzCopy could accommodate custom environments such as Azure Stack.", } } + +func (EnvironmentVariable) UserAgentPrefix() EnvironmentVariable { + return EnvironmentVariable{ + Name: "AZCOPY_USER_AGENT_PREFIX", + Description: "Add a prefix to the default AzCopy User Agent, which is used for telemetry purposes. A space is automatically inserted.", + } +} diff --git a/common/fe-ste-models.go b/common/fe-ste-models.go index d2b588938..bab5dd666 100644 --- a/common/fe-ste-models.go +++ b/common/fe-ste-models.go @@ -24,7 +24,6 @@ import ( "bytes" "encoding/json" "math" - "path/filepath" "reflect" "regexp" "strings" @@ -851,6 +850,7 @@ const ( ) // This struct represent a single transfer entry with source and destination details +// ** DO NOT construct directly. Use cmd.storedObject.ToNewCopyTransfer ** type CopyTransfer struct { Source string Destination string @@ -871,57 +871,6 @@ type CopyTransfer struct { BlobTier azblob.AccessTierType } -func NewCopyTransfer( - steWillAutoDecompress bool, - Source, Destination string, - LMT time.Time, - ContentSize int64, - ContentType, ContentEncoding, ContentDisposition, ContentLanguage, CacheControl string, - ContentMD5 []byte, - Metadata Metadata, - BlobType azblob.BlobType, - BlobTier azblob.AccessTierType) CopyTransfer { - - if steWillAutoDecompress { - Destination = stripCompressionExtension(Destination, ContentEncoding) - } - - return CopyTransfer{ - Source: Source, - Destination: Destination, - LastModifiedTime: LMT, - SourceSize: ContentSize, - ContentType: ContentType, - ContentEncoding: ContentEncoding, - ContentDisposition: ContentDisposition, - ContentLanguage: ContentLanguage, - CacheControl: CacheControl, - ContentMD5: ContentMD5, - Metadata: Metadata, - BlobType: BlobType, - BlobTier: BlobTier, - } -} - -// stringCompressionExtension strips any file extension that corresponds to the -// compression indicated by the encoding type. -// Why remove this extension here, at enumeration time, instead of just doing it -// in the STE when we are about to save the file? -// Because by doing it here we get the accurate name in things that -// directly read the Plan files, like the jobs show command -func stripCompressionExtension(dest string, contentEncoding string) string { - // Ignore error getting compression type. We can't easily report it now, and we don't need to know about the error - // cases here when deciding renaming. STE will log error on the error cases - ct, _ := GetCompressionType(contentEncoding) - ext := strings.ToLower(filepath.Ext(dest)) - stripGzip := ct == ECompressionType.GZip() && (ext == ".gz" || ext == ".gzip") - stripZlib := ct == ECompressionType.ZLib() && ext == ".zz" // "standard" extension for zlib-wrapped files, according to pigz doc and Stack Overflow - if stripGzip || stripZlib { - return strings.TrimSuffix(dest, filepath.Ext(dest)) - } - return dest -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Metadata used in AzCopy. diff --git a/common/lifecyleMgr.go b/common/lifecyleMgr.go index a3f23a2f3..349c3e00c 100644 --- a/common/lifecyleMgr.go +++ b/common/lifecyleMgr.go @@ -56,6 +56,7 @@ type LifecycleMgr interface { SetOutputFormat(OutputFormat) // change the output format of the entire application EnableInputWatcher() // depending on the command, we may allow user to give input through Stdin EnableCancelFromStdIn() // allow user to send in `cancel` to stop the job + AddUserAgentPrefix(string) string // append the global user agent prefix, if applicable } func GetLifecycleMgr() LifecycleMgr { @@ -466,6 +467,15 @@ func (lcm *lifecycleMgr) GetEnvironmentVariable(env EnvironmentVariable) string return value } +func (lcm *lifecycleMgr) AddUserAgentPrefix(userAgent string) string { + prefix := lcm.GetEnvironmentVariable(EEnvironmentVariable.UserAgentPrefix()) + if len(prefix) > 0 { + userAgent = prefix + " " + userAgent + } + + return userAgent +} + // captures the common logic of exiting if there's an expected error func PanicIfErr(err error) { if err != nil { diff --git a/common/oauthTokenManager.go b/common/oauthTokenManager.go index 7c920293f..bf919e57e 100644 --- a/common/oauthTokenManager.go +++ b/common/oauthTokenManager.go @@ -39,8 +39,6 @@ import ( "strings" "time" - "github.com/mattn/go-ieproxy" - "golang.org/x/crypto/pkcs12" "github.com/Azure/go-autorest/autorest/adal" @@ -79,7 +77,7 @@ func NewUserOAuthTokenManagerInstance(credCacheOptions CredCacheOptions) *UserOA func newAzcopyHTTPClient() *http.Client { return &http.Client{ Transport: &http.Transport{ - Proxy: ieproxy.GetProxyFunc(), + Proxy: GlobalProxyLookup, // We use Dial instead of DialContext as DialContext has been reported to cause slower performance. Dial /*Context*/ : (&net.Dialer{ Timeout: 30 * time.Second, diff --git a/common/version.go b/common/version.go index 909f93b21..e9150b260 100644 --- a/common/version.go +++ b/common/version.go @@ -1,6 +1,6 @@ package common -const AzcopyVersion = "10.3.2" +const AzcopyVersion = "10.3.3" const UserAgent = "AzCopy/" + AzcopyVersion const S3ImportUserAgent = "S3Import " + UserAgent const BenchmarkUserAgent = "Benchmark " + UserAgent diff --git a/common/zt_ProxyLookupCache_test.go b/common/zt_ProxyLookupCache_test.go new file mode 100644 index 000000000..efddc5dd2 --- /dev/null +++ b/common/zt_ProxyLookupCache_test.go @@ -0,0 +1,151 @@ +// Copyright © Microsoft +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package common + +import ( + chk "gopkg.in/check.v1" + "net/http" + "net/url" + "sync" + "time" +) + +type proxyLookupCacheSuite struct{} + +var _ = chk.Suite(&proxyLookupCacheSuite{}) + +func (s *proxyLookupCacheSuite) TestCacheIsUsed(c *chk.C) { + fakeMu := &sync.Mutex{} // avoids race condition in test code + var fakeResult *url.URL + var fakeError error + + pc := &proxyLookupCache{ + m: &sync.Map{}, + lookupTimeout: time.Minute, + lookupLock: &sync.Mutex{}, + lookupMethod: func(req *http.Request) (*url.URL, error) { + fakeMu.Lock() + defer fakeMu.Unlock() + return fakeResult, fakeError + }, + } + + // fill the cache with 3 entries, one of which has an error + fakeMu.Lock() + fakeResult, fakeError = url.Parse("http://fooproxy") + fakeMu.Unlock() + fooRequest, _ := http.NewRequest("GET", "http://foo.com/a", nil) + fooResult1, err := pc.getProxy(fooRequest) + c.Check(err, chk.IsNil) + c.Check(fooResult1.String(), chk.Equals, "http://fooproxy") + + fakeMu.Lock() + fakeResult, fakeError = url.Parse("http://barproxy") + fakeMu.Unlock() + barRequest, _ := http.NewRequest("GET", "http://bar.com/a", nil) + barResult1, err := pc.getProxy(barRequest) + c.Check(err, chk.IsNil) + c.Check(barResult1.String(), chk.Equals, "http://barproxy") + + fakeMu.Lock() + fakeResult, fakeError = url.Parse("http://this will give a parsing error") + fakeMu.Unlock() + erroringRequest, _ := http.NewRequest("GET", "http://willerror.com/a", nil) + _, expectedErr := pc.getProxy(erroringRequest) + c.Check(expectedErr, chk.NotNil) + + // set dummy values for next lookup, so we can be sure that lookups don't happen (i.e. we don't get these values, so we know we hit the cache) + fakeMu.Lock() + fakeResult, _ = url.Parse("http://thisShouldNeverBeReturnedBecauseResultsAreAlreadyCached") + fakeMu.Unlock() + fakeError = nil + + // lookup URLs with same host portion, but different paths. Expect cache hits. + fooRequest, _ = http.NewRequest("GET", "http://foo.com/differentPathFromBefore", nil) + fooResult2, err := pc.getProxy(fooRequest) + c.Check(err, chk.IsNil) + c.Check(fooResult2.String(), chk.Equals, fooResult1.String()) + + barRequest, _ = http.NewRequest("GET", "http://bar.com/differentPathFromBefore", nil) + barResult2, err := pc.getProxy(barRequest) + c.Check(err, chk.IsNil) + c.Check(barResult2.String(), chk.Equals, barResult1.String()) + + erroringRequest, _ = http.NewRequest("GET", "http://willerror.com/differentPathFromBefore", nil) + _, expectedErr = pc.getProxy(erroringRequest) + c.Check(expectedErr, chk.NotNil) +} + +func (s *proxyLookupCacheSuite) TestCacheEntriesGetRefreshed(c *chk.C) { + fakeMu := &sync.Mutex{} // avoids race condition in test code + var fakeResult *url.URL + var fakeError error + + pc := &proxyLookupCache{ + m: &sync.Map{}, + lookupLock: &sync.Mutex{}, + refreshInterval: time.Second, // much shorter than normal, for testing + lookupTimeout: time.Minute, + lookupMethod: func(req *http.Request) (*url.URL, error) { + fakeMu.Lock() + defer fakeMu.Unlock() + return fakeResult, fakeError + }, + } + + // load the cache + fakeMu.Lock() + fakeResult, fakeError = url.Parse("http://fooproxy") + fakeMu.Unlock() + fooRequest, _ := http.NewRequest("GET", "http://foo.com/a", nil) + fooResult1, err := pc.getProxy(fooRequest) + c.Check(err, chk.IsNil) + c.Check(fooResult1.String(), chk.Equals, "http://fooproxy") + + // prime the refresh to actually produce a change + fakeMu.Lock() + fakeResult, fakeError = url.Parse("http://updatedFooProxy") + fakeMu.Unlock() + + // wait while refresh runs + time.Sleep(time.Second * 2) + + // read from cache, and check we get the update result + fooResult2, err := pc.getProxy(fooRequest) + c.Check(err, chk.IsNil) + c.Check(fooResult2.String(), chk.Equals, "http://updatedFooProxy") +} + +func (s *proxyLookupCacheSuite) TestUseOfLookupMethodHasTimout(c *chk.C) { + pc := &proxyLookupCache{ + m: &sync.Map{}, + lookupLock: &sync.Mutex{}, + lookupTimeout: time.Second, // very short, since this is the timeout we are testing in this test + lookupMethod: func(req *http.Request) (*url.URL, error) { + time.Sleep(time.Hour * 24) // "never" return, since we want the timeout to take effect + return nil, nil + }, + } + + fooRequest, _ := http.NewRequest("GET", "http://foo.com/a", nil) + tuple := pc.getProxyNoCache(fooRequest) + c.Check(tuple.err, chk.Equals, ProxyLookupTimeoutError) +} diff --git a/main_windows.go b/main_windows.go index 13811eb19..9f12077c1 100644 --- a/main_windows.go +++ b/main_windows.go @@ -21,7 +21,6 @@ package main import ( - "github.com/mattn/go-ieproxy" "math" "net/http" "os" @@ -68,5 +67,5 @@ func GetAzCopyAppPath() string { func init() { //Catch everything that uses http.DefaultTransport with ieproxy.GetProxyFunc() - http.DefaultTransport.(*http.Transport).Proxy = ieproxy.GetProxyFunc() + http.DefaultTransport.(*http.Transport).Proxy = common.GlobalProxyLookup } diff --git a/ste/JobPartPlanFileName.go b/ste/JobPartPlanFileName.go index 0aefefdd1..8158f30e0 100644 --- a/ste/JobPartPlanFileName.go +++ b/ste/JobPartPlanFileName.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "math" "os" "reflect" "strings" @@ -215,6 +216,10 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { // Write each transfer to the Job Part Plan file (except for the src/dst strings; comes come later) for t := range order.Transfers { + if len(order.Transfers[t].Source) > math.MaxInt16 || len(order.Transfers[t].Destination) > math.MaxInt16 { + panic(fmt.Sprintf("The file %s exceeds azcopy's current maximum path length on either the source or the destination.", order.Transfers[t].Source)) + } + // Prepare info for JobPartPlanTransfer // Sending Metadata type to Transfer could ensure strong type validation. // TODO: discuss the performance drop of marshaling metadata twice @@ -226,6 +231,9 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { } srcMetadataLength = len(metadataStr) } + if srcMetadataLength > math.MaxInt16 { + panic(fmt.Sprintf("The metadata on source file %s exceeds azcopy's current maximum metadata length, and cannot be processed.", order.Transfers[t].Source)) + } // Create & initialize this transfer's Job Part Plan Transfer jppt := JobPartPlanTransfer{ SrcOffset: currentSrcStringOffset, // SrcOffset of the src string @@ -263,14 +271,14 @@ func (jpfn JobPartPlanFileName) Create(order common.CopyJobPartOrderRequest) { for t := range order.Transfers { // Sanity check: Verify that we are were we think we are and that no bug has occurred if eof != srcDstStringsOffset[t] { - panic(errors.New("error writing src/dst strings to job part plan file")) + panic(errors.New("job plan file's EOF and the transfer's offset didn't line up; filename: " + order.Transfers[t].Source)) } // Write the src & dst strings to the job part plan file bytesWritten, err := file.WriteString(order.Transfers[t].Source) common.PanicIfErr(err) - // write the destination string in memory map file eof += int64(bytesWritten) + // write the destination string in memory map file bytesWritten, err = file.WriteString(order.Transfers[t].Destination) common.PanicIfErr(err) eof += int64(bytesWritten) diff --git a/ste/mgr-JobPartMgr.go b/ste/mgr-JobPartMgr.go index b834a7f2c..4ee4f6122 100644 --- a/ste/mgr-JobPartMgr.go +++ b/ste/mgr-JobPartMgr.go @@ -12,8 +12,6 @@ import ( "sync/atomic" "time" - autoProxy "github.com/mattn/go-ieproxy" - "github.com/Azure/azure-pipeline-go/pipeline" "github.com/Azure/azure-storage-azcopy/azbfs" "github.com/Azure/azure-storage-azcopy/common" @@ -86,7 +84,7 @@ func NewVersionPolicyFactory() pipeline.Factory { func NewAzcopyHTTPClient(maxIdleConns int) *http.Client { return &http.Client{ Transport: &http.Transport{ - Proxy: autoProxy.GetProxyFunc(), + Proxy: common.GlobalProxyLookup, DialContext: newDialRateLimiter(&net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, @@ -436,6 +434,7 @@ func (jpm *jobPartMgr) createPipelines(ctx context.Context) { } else if fromTo.From() == common.ELocation.Benchmark() || fromTo.To() == common.ELocation.Benchmark() { userAgent = common.BenchmarkUserAgent } + userAgent = common.GetLifecycleMgr().AddUserAgentPrefix(common.UserAgent) credOption := common.CredentialOpOptions{ LogInfo: func(str string) { jpm.Log(pipeline.LogInfo, str) }, diff --git a/ste/sender-pageBlob.go b/ste/sender-pageBlob.go index a7fa5b6d0..4208c0e66 100644 --- a/ste/sender-pageBlob.go +++ b/ste/sender-pageBlob.go @@ -165,13 +165,26 @@ func (s *pageBlobSenderBase) Prologue(ps common.PrologueState) (destinationModif // Check its length, since it already has a size, and the upload will fail at the end if you what // upload to it is bigger than its existing size. (And, for big files, it may be hours until you discover that // difference if we don't check here). + // + // We use an equality check (rather than ensuring sourceSize <= dest), because customer should have declared the correct exact size when + // making the disk in Azure. (And if we don't check equality here, by default we do check it after upload for all blobs, as of version 10.3) + // + // Note re types and sizes: + // Currently (2019) only VHDs are supported for Azure managed disk upload. VHDXs (which have a different footer size, are not). + // Azure requires VHD size to be a multiple of 1MB plus 512 bytes for the VHD footer. And the VHD must be fixed size. + // E.g. these are the values reported by PowerShell's Get-VHD for a valid 1 GB VHD: + // VhdFormat : VHD + // VhdType : Fixed + // FileSize : 1073742336 (equals our s.srcSize, i.e. the size of the disk file) + // Size : 1073741824 + p, err := s.destPageBlobURL.GetProperties(s.jptm.Context(), azblob.BlobAccessConditions{}) if err != nil { s.jptm.FailActiveSend("Checking size of managed disk blob", err) return } - if s.srcSize > p.ContentLength() { - sizeErr := errors.New(fmt.Sprintf("source file is too big for the destination page blob. Source size is %d bytes but destination size is %d bytes", + if s.srcSize != p.ContentLength() { + sizeErr := errors.New(fmt.Sprintf("source file is not same size as the destination page blob. Source size is %d bytes but destination size is %d bytes. Re-create the destination with exactly the right size. E.g. see parameter UploadSizeInBytes in PowerShell's New-AzDiskConfig. Ensure the source is a fixed-size VHD", s.srcSize, p.ContentLength())) s.jptm.FailActiveSend("Checking size of managed disk blob", sizeErr) return diff --git a/ste/xfer-anyToRemote.go b/ste/xfer-anyToRemote.go index 8d8903a3e..4ae635818 100644 --- a/ste/xfer-anyToRemote.go +++ b/ste/xfer-anyToRemote.go @@ -331,7 +331,8 @@ func epilogueWithCleanupSendToRemote(jptm IJobPartTransferMgr, s ISenderBase, si destLength, err := s.GetDestinationLength() if err != nil { - jptm.FailActiveSend(common.IffString(isS2SCopier, "S2S ", "Upload ")+"Length check: Get destination length", err) + wrapped := fmt.Errorf("could not read destination length. If destination is write-only, use --check-length=false on the AzCopy command line. %w", err) + jptm.FailActiveSend(common.IffString(isS2SCopier, "S2S ", "Upload ")+"Length check: Get destination length", wrapped) } if destLength != jptm.Info().SourceSize { diff --git a/testSuite/scripts/run.py b/testSuite/scripts/run.py index ffb6267c6..805b01821 100644 --- a/testSuite/scripts/run.py +++ b/testSuite/scripts/run.py @@ -21,6 +21,7 @@ import platform import sys import unittest +import re def parse_config_file_set_env(): @@ -103,6 +104,10 @@ def check_env_not_exist(key): return True return False +def get_env_logged(key): + value = os.environ.get(key) + print(key + " = " + re.sub("(?i)(?Psig[ \t]*[:=][ \t]*)(?P[^& ,;\t\n\r]+)", "sig=REDACTED", value)) + return value def init(): # Check the environment variables. @@ -122,43 +127,56 @@ def init(): # Get the environment variables value # test_dir_path is the location where test_data folder will be created and test files will be created further. - test_dir_path = os.environ.get('TEST_DIRECTORY_PATH') + test_dir_path = get_env_logged('TEST_DIRECTORY_PATH') # azcopy_exec_location is the location of the azcopy executable # azcopy executable will be copied to test data folder. - azcopy_exec_location = os.environ.get('AZCOPY_EXECUTABLE_PATH') + azcopy_exec_location = get_env_logged('AZCOPY_EXECUTABLE_PATH') # test_suite_exec_location is the location of the test suite executable # test suite executable will be copied to test data folder. - test_suite_exec_location = os.environ.get('TEST_SUITE_EXECUTABLE_LOCATION') + test_suite_exec_location = get_env_logged('TEST_SUITE_EXECUTABLE_LOCATION') # container_sas is the shared access signature of the container # where test data will be uploaded to and downloaded from. - container_sas = os.environ.get('CONTAINER_SAS_URL') + container_sas = get_env_logged('CONTAINER_SAS_URL') # container_oauth is container for oauth testing. - container_oauth = os.environ.get('CONTAINER_OAUTH_URL') + container_oauth = get_env_logged('CONTAINER_OAUTH_URL') # container_oauth_validate is the URL with SAS for oauth validation. - container_oauth_validate = os.environ.get('CONTAINER_OAUTH_VALIDATE_SAS_URL') + container_oauth_validate = get_env_logged('CONTAINER_OAUTH_VALIDATE_SAS_URL') # share_sas_url is the URL with SAS of the share where test data will be uploaded to and downloaded from. - share_sas_url = os.environ.get('SHARE_SAS_URL') + share_sas_url = get_env_logged('SHARE_SAS_URL') # container sas of the premium storage account. - premium_container_sas = os.environ.get('PREMIUM_CONTAINER_SAS_URL') + premium_container_sas = get_env_logged('PREMIUM_CONTAINER_SAS_URL') # get the filesystem url - filesystem_url = os.environ.get('FILESYSTEM_URL') - filesystem_sas_url = os.environ.get('FILESYSTEM_SAS_URL') + filesystem_url = get_env_logged('FILESYSTEM_URL') + filesystem_sas_url = get_env_logged('FILESYSTEM_SAS_URL') # get the s2s copy src URLs - s2s_src_blob_account_url = os.environ.get('S2S_SRC_BLOB_ACCOUNT_SAS_URL') - s2s_src_file_account_url = os.environ.get('S2S_SRC_FILE_ACCOUNT_SAS_URL') - s2s_src_s3_service_url = os.environ.get('S2S_SRC_S3_SERVICE_URL') + s2s_src_blob_account_url = get_env_logged('S2S_SRC_BLOB_ACCOUNT_SAS_URL') + s2s_src_file_account_url = get_env_logged('S2S_SRC_FILE_ACCOUNT_SAS_URL') + s2s_src_s3_service_url = get_env_logged('S2S_SRC_S3_SERVICE_URL') # get the s2s copy dest account URLs - s2s_dst_blob_account_url = os.environ.get('S2S_DST_BLOB_ACCOUNT_SAS_URL') + s2s_dst_blob_account_url = get_env_logged('S2S_DST_BLOB_ACCOUNT_SAS_URL') + + get_env_logged("ACCOUNT_NAME") + # do NOT log ACCOUNT_KEY + + # don't log, it will just get redacted by DevOps logging system: get_env_logged("AWS_ACCESS_KEY_ID") + # do NOT log AWS_SECRET_ACCESS_KEY + + get_env_logged("OAUTH_AAD_ENDPOINT") + # don't log, it will just get redacted by DevOps logging system: get_env_logged("OAUTH_TENANT_ID") + # do not log AZCOPY_OAUTH_TOKEN_INFO + + get_env_logged("S3_TESTS_OFF") + # deleting the log files. cleanup()