From 79e4b23fd04f8483cbf4196ca93ab0ebe9083659 Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Fri, 8 Dec 2023 11:09:51 +0100 Subject: [PATCH 1/8] Add option to configure different endpoint suffix for Azure Blob Storage --- .../storage/azure/AzureAccountConfig.java | 24 ++++++++++++++++--- .../storage/azure/AzureDataSegmentPuller.java | 9 +++++-- .../storage/azure/AzureDataSegmentPusher.java | 2 +- .../azure/AzureStorageDruidModule.java | 4 ++-- .../druid/storage/azure/AzureUtils.java | 23 ++++++++++++++---- .../azure/AzureDataSegmentPullerTest.java | 14 +++++++---- .../azure/AzureStorageDruidModuleTest.java | 19 +++++++++++++++ .../druid/storage/azure/AzureUtilsTest.java | 8 ++++--- 8 files changed, 83 insertions(+), 20 deletions(-) diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java index 235ae6f3c609..be54fa6bb0a7 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java @@ -46,6 +46,9 @@ public class AzureAccountConfig @JsonProperty private String sharedAccessStorageToken; + @JsonProperty + private String endpointSuffix = AzureUtils.DEFAULT_AZURE_ENDPOINT_SUFFIX; + @SuppressWarnings("unused") // Used by Jackson deserialization? public void setProtocol(String protocol) { @@ -69,6 +72,17 @@ public void setKey(String key) this.key = key; } + public void setSharedAccessStorageToken(String sharedAccessStorageToken) + { + this.sharedAccessStorageToken = sharedAccessStorageToken; + } + + @SuppressWarnings("unused") // Used by Jackson deserialization? + public void setEndpointSuffix(String endpointSuffix) + { + this.endpointSuffix = endpointSuffix; + } + public String getProtocol() { return protocol; @@ -94,9 +108,13 @@ public String getSharedAccessStorageToken() return sharedAccessStorageToken; } - @SuppressWarnings("unused") // Used by Jackson deserialization? - public void setSharedAccessStorageToken(String sharedAccessStorageToken) + public String getEndpointSuffix() { - this.sharedAccessStorageToken = sharedAccessStorageToken; + return endpointSuffix; + } + + public String getBlobStorageEndpointSuffix() + { + return "blob." + endpointSuffix; } } diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java index 571ecc683509..f2ddea959902 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java @@ -38,11 +38,16 @@ public class AzureDataSegmentPuller private final AzureByteSourceFactory byteSourceFactory; + private final AzureUtils azureUtils; + @Inject public AzureDataSegmentPuller( - AzureByteSourceFactory byteSourceFactory) + AzureByteSourceFactory byteSourceFactory, + AzureUtils azureUtils + ) { this.byteSourceFactory = byteSourceFactory; + this.azureUtils = azureUtils; } FileUtils.FileCopyResult getSegmentFiles( @@ -59,7 +64,7 @@ FileUtils.FileCopyResult getSegmentFiles( "Loading container: [%s], with blobPath: [%s] and outDir: [%s]", containerName, blobPath, outDir ); - final String actualBlobPath = AzureUtils.maybeRemoveAzurePathPrefix(blobPath); + final String actualBlobPath = azureUtils.maybeRemoveAzurePathPrefix(blobPath); final ByteSource byteSource = byteSourceFactory.create(containerName, actualBlobPath); final FileUtils.FileCopyResult result = CompressionUtils.unzip( diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java index 9f97256b1da8..ba83123e2161 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java @@ -79,7 +79,7 @@ public String getPathForHadoop() AzureUtils.AZURE_STORAGE_HADOOP_PROTOCOL, segmentConfig.getContainer(), accountConfig.getAccount(), - AzureUtils.AZURE_STORAGE_HOST_ADDRESS, + accountConfig.getBlobStorageEndpointSuffix(), prefixIsNullOrEmpty ? "" : StringUtils.maybeRemoveTrailingSlash(prefix) + '/' ); diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java index 674e451de51a..62473aff0ae8 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java @@ -53,9 +53,9 @@ public class AzureStorageDruidModule implements DruidModule public static final String SCHEME = "azure"; public static final String - STORAGE_CONNECTION_STRING_WITH_KEY = "DefaultEndpointsProtocol=%s;AccountName=%s;AccountKey=%s"; + STORAGE_CONNECTION_STRING_WITH_KEY = "DefaultEndpointsProtocol=%s;AccountName=%s;AccountKey=%s;EndpointSuffix=%s;"; public static final String - STORAGE_CONNECTION_STRING_WITH_TOKEN = "DefaultEndpointsProtocol=%s;AccountName=%s;SharedAccessSignature=%s"; + STORAGE_CONNECTION_STRING_WITH_TOKEN = "DefaultEndpointsProtocol=%s;AccountName=%s;SharedAccessSignature=%s;EndpointSuffix=%s;"; public static final String INDEX_ZIP_FILE_NAME = "index.zip"; @Override diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java index 63322404f08b..34c38028b367 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java @@ -40,11 +40,26 @@ public class AzureUtils { + public static final String DEFAULT_AZURE_ENDPOINT_SUFFIX = "core.windows.net"; @VisibleForTesting static final String AZURE_STORAGE_HOST_ADDRESS = "blob.core.windows.net"; + public static final String DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX = "blob." + DEFAULT_AZURE_ENDPOINT_SUFFIX; + private final String blobStorageEndpointSuffix; + + /** + * Creates an AzureUtils object with the blob storage endpoint suffix. + * + * @param blobStorageEndpointSuffix the blob storage endpoint, like "blob.core.windows.net", + * "blob.core.chinacloudapi.cn" or + * "blob.core.usgovcloudapi.net" + */ + public AzureUtils(String blobStorageEndpointSuffix) + { + this.blobStorageEndpointSuffix = blobStorageEndpointSuffix; + } // The azure storage hadoop access pattern is: - // wasb[s]://@.blob.core.windows.net/ + // wasb[s]://@./ // (from https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-use-blob-storage) static final String AZURE_STORAGE_HADOOP_PROTOCOL = "wasbs"; @@ -87,14 +102,14 @@ public static String extractAzureKey(URI uri) * @return a String representing the blob path component of the uri with any leading 'blob.core.windows.net/' string * removed characters removed. */ - public static String maybeRemoveAzurePathPrefix(String blobPath) + public String maybeRemoveAzurePathPrefix(String blobPath) { - boolean blobPathIsHadoop = blobPath.contains(AZURE_STORAGE_HOST_ADDRESS); + boolean blobPathIsHadoop = blobPath.contains(blobStorageEndpointSuffix); if (blobPathIsHadoop) { // Remove azure's hadoop prefix to match realtime ingestion path return blobPath.substring( - blobPath.indexOf(AZURE_STORAGE_HOST_ADDRESS) + AZURE_STORAGE_HOST_ADDRESS.length() + 1); + blobPath.indexOf(blobStorageEndpointSuffix) + blobStorageEndpointSuffix.length() + 1); } else { return blobPath; } diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java index 13820072cb7b..69d3b620abe7 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java @@ -40,7 +40,7 @@ public class AzureDataSegmentPullerTest extends EasyMockSupport private static final String SEGMENT_FILE_NAME = "segment"; private static final String CONTAINER_NAME = "container"; private static final String BLOB_PATH = "path/to/storage/index.zip"; - private static final String BLOB_PATH_HADOOP = AzureUtils.AZURE_STORAGE_HOST_ADDRESS + "/path/to/storage/index.zip"; + private static final String BLOB_PATH_HADOOP = AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX + "/path/to/storage/index.zip"; private AzureStorage azureStorage; private AzureByteSourceFactory byteSourceFactory; @@ -60,13 +60,14 @@ public void test_getSegmentFiles_success() final File toDir = FileUtils.createTempDir(); try { final InputStream zipStream = new FileInputStream(pulledFile); + final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME, BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME, BLOB_PATH)); EasyMock.expect(azureStorage.getBlockBlobInputStream(0L, CONTAINER_NAME, BLOB_PATH)).andReturn(zipStream); replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); FileUtils.FileCopyResult result = puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, toDir); @@ -92,13 +93,14 @@ public void test_getSegmentFiles_blobPathIsHadoop_success() final File toDir = FileUtils.createTempDir(); try { final InputStream zipStream = new FileInputStream(pulledFile); + final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME, BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME, BLOB_PATH)); EasyMock.expect(azureStorage.getBlockBlobInputStream(0L, CONTAINER_NAME, BLOB_PATH)).andReturn(zipStream); replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); FileUtils.FileCopyResult result = puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH_HADOOP, toDir); @@ -119,6 +121,7 @@ public void test_getSegmentFiles_blobPathIsHadoop_success() public void test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFiles_doNotDeleteOutputDirectory() throws IOException, URISyntaxException, StorageException, SegmentLoadingException { + final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); final File outDir = FileUtils.createTempDir(); try { @@ -133,7 +136,7 @@ public void test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFile replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir); } @@ -151,6 +154,7 @@ public void test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFile public void test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_deleteOutputDirectory() throws IOException, URISyntaxException, StorageException, SegmentLoadingException { + final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); final File outDir = FileUtils.createTempDir(); try { @@ -161,7 +165,7 @@ public void test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_d replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir); diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java index 3c5fada7c10e..e4c34f967dfd 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java @@ -303,6 +303,25 @@ public void testBothAccountKeyAndSAStokenUnset() ); } + @Test + public void testGetAzureUtilsWithDefaultProperties() + { + Properties properties = initializePropertes(); + AzureUtils utils = makeInjectorWithProperties(properties).getInstance(AzureUtils.class); + String outputBlob = utils.maybeRemoveAzurePathPrefix("blob.core.windows.net/container/blob"); + Assert.assertEquals("container/blob", outputBlob); + } + + @Test + public void testGetAzureUtilsWithDefaultCustomBlobPath() + { + Properties properties = initializePropertes(); + properties.setProperty("druid.azure.endpointSuffix", "core.usgovcloudapi.net"); + AzureUtils utils = makeInjectorWithProperties(properties).getInstance(AzureUtils.class); + String outputBlob = utils.maybeRemoveAzurePathPrefix("blob.core.usgovcloudapi.net/container/blob"); + Assert.assertEquals("container/blob", outputBlob); + } + private Injector makeInjectorWithProperties(final Properties props) { return Guice.createInjector( diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java index f75370703ce9..fe7ff54b68be 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java @@ -33,7 +33,7 @@ public class AzureUtilsTest private static final String CONTAINER_NAME = "container1"; private static final String BLOB_NAME = "blob1"; private static final String BLOB_PATH_WITH_LEADING_SLASH = "/" + BLOB_NAME; - private static final String BLOB_PATH_WITH_LEADING_AZURE_PREFIX = AzureUtils.AZURE_STORAGE_HOST_ADDRESS + private static final String BLOB_PATH_WITH_LEADING_AZURE_PREFIX = AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX + "/" + BLOB_NAME; private static final URI URI_WITH_PATH_WITH_LEADING_SLASH; @@ -74,14 +74,16 @@ public void test_extractAzureKey_pathHasLeadingSlash_returnsPathWithLeadingSlash @Test public void test_maybeRemoveAzurePathPrefix_pathHasLeadingAzurePathPrefix_returnsPathWithLeadingAzurePathRemoved() { - String path = AzureUtils.maybeRemoveAzurePathPrefix(BLOB_PATH_WITH_LEADING_AZURE_PREFIX); + final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); + String path = azureUtils.maybeRemoveAzurePathPrefix(BLOB_PATH_WITH_LEADING_AZURE_PREFIX); Assert.assertEquals(BLOB_NAME, path); } @Test public void test_maybeRemoveAzurePathPrefix_pathDoesNotHaveAzurePathPrefix__returnsPathWithLeadingAzurePathRemoved() { - String path = AzureUtils.maybeRemoveAzurePathPrefix(BLOB_NAME); + final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); + String path = azureUtils.maybeRemoveAzurePathPrefix(BLOB_NAME); Assert.assertEquals(BLOB_NAME, path); } From c6548f2db8b09eef4b7bfbc2d6d4571b3161be02 Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Fri, 8 Dec 2023 11:28:17 +0100 Subject: [PATCH 2/8] Azure extension docs updated --- docs/development/extensions-core/azure.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index c6a1c3979051..3e69df025e56 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -40,5 +40,6 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md# |`druid.azure.protocol`|the protocol to use|http or https|https| |`druid.azure.maxTries`|Number of tries before canceling an Azure operation.| |3| |`druid.azure.maxListingLength`|maximum number of input files matching a given prefix to retrieve at a time| |1024| +|`druid.azure.endpointSuffix`|the endpoint suffix to use.|Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for more information. From 763542a9d5dd25b14c954a507866ac8014a5d902 Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Mon, 11 Dec 2023 12:12:36 +0100 Subject: [PATCH 3/8] Azure extension module fixed --- .../storage/azure/AzureStorageDruidModule.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java index 62473aff0ae8..ce03d3b62b00 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java @@ -118,6 +118,18 @@ public void configure(Binder binder) .build(ListBlobItemHolderFactory.class)); } + + /** + * Creates a supplier that lazily initialize {@link AzureUtils}. It is used to inject an endpoint suffix into the + * {@link AzureUtils} constructor. + */ + @Provides + @LazySingleton + public AzureUtils getAzureUtils(final AzureAccountConfig config) + { + return new AzureUtils(config.getBlobStorageEndpointSuffix()); + } + /** * Creates a supplier that lazily initialize {@link CloudBlobClient}. * This is to avoid immediate config validation but defer it until you actually use the client. @@ -141,7 +153,8 @@ public Supplier getCloudBlobClient(final AzureAccountConfig con STORAGE_CONNECTION_STRING_WITH_KEY, config.getProtocol(), config.getAccount(), - config.getKey() + config.getKey(), + config.getEndpointSuffix() ) ); @@ -151,7 +164,8 @@ public Supplier getCloudBlobClient(final AzureAccountConfig con STORAGE_CONNECTION_STRING_WITH_TOKEN, config.getProtocol(), config.getAccount(), - config.getSharedAccessStorageToken() + config.getSharedAccessStorageToken(), + config.getEndpointSuffix() )); return account.createCloudBlobClient(); } else { From c4b94fa7b8a34a7c93ab5df0776df6db1d77cabb Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Fri, 5 Jan 2024 14:34:27 +0100 Subject: [PATCH 4/8] Addressing code review comments --- docs/development/extensions-core/azure.md | 24 +++++++++---------- .../storage/azure/AzureAccountConfig.java | 2 +- .../storage/azure/AzureDataSegmentPuller.java | 8 +++---- .../storage/azure/AzureDataSegmentPusher.java | 2 +- .../azure/AzureStorageDruidModule.java | 11 --------- .../druid/storage/azure/AzureUtils.java | 10 ++------ .../azure/AzureDataSegmentPullerTest.java | 18 +++++++------- .../azure/AzureStorageDruidModuleTest.java | 8 +++---- .../druid/storage/azure/AzureUtilsTest.java | 8 +++---- 9 files changed, 36 insertions(+), 55 deletions(-) diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index 3e69df025e56..ad69a771ec9f 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -29,17 +29,17 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md# [Microsoft Azure Storage](http://azure.microsoft.com/en-us/services/storage/) is another option for deep storage. This requires some additional Druid configuration. -|Property|Description|Possible Values|Default| -|--------|---------------|-----------|-------| -|`druid.storage.type`|azure||Must be set.| -|`druid.azure.account`||Azure Storage account name.|Must be set.| -|`druid.azure.key`||Azure Storage account key.|Optional. Either set key or sharedAccessStorageToken but not both.| -|`druid.azure.sharedAccessStorageToken`||Azure Shared Storage access token|Optional. Either set key or sharedAccessStorageToken but not both.| -|`druid.azure.container`||Azure Storage container name.|Must be set.| -|`druid.azure.prefix`|A prefix string that will be prepended to the blob names for the segments published to Azure deep storage| |""| -|`druid.azure.protocol`|the protocol to use|http or https|https| -|`druid.azure.maxTries`|Number of tries before canceling an Azure operation.| |3| -|`druid.azure.maxListingLength`|maximum number of input files matching a given prefix to retrieve at a time| |1024| -|`druid.azure.endpointSuffix`|the endpoint suffix to use.|Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| +| Property | Description | Possible Values | Default | +|----------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------------------| +| `druid.storage.type` | azure | | Must be set. | +| `druid.azure.account` | | Azure Storage account name. | Must be set. | +| `druid.azure.key` | | Azure Storage account key. | Optional. Either set key or sharedAccessStorageToken but not both. | +| `druid.azure.sharedAccessStorageToken` | | Azure Shared Storage access token | Optional. Either set key or sharedAccessStorageToken but not both. | +| `druid.azure.container` | | Azure Storage container name. | Must be set. | +| `druid.azure.prefix` | A prefix string that will be prepended to the blob names for the segments published to Azure deep storage | | "" | +| `druid.azure.protocol` | the protocol to use | http or https | https | +| `druid.azure.maxTries` | Number of tries before canceling an Azure operation. | | 3 | +| `druid.azure.maxListingLength` | maximum number of input files matching a given prefix to retrieve at a time | | 1024 | +| `druid.azure.endpointSuffix` | The endpoint suffix to use. Could be overriden for connecting with [Azure Goverment](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api). | Examples: `core.windows.net`, `core.usgovcloudapi.net` | `core.windows.net` | See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for more information. diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java index be54fa6bb0a7..a6b2ba76e892 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureAccountConfig.java @@ -113,7 +113,7 @@ public String getEndpointSuffix() return endpointSuffix; } - public String getBlobStorageEndpointSuffix() + public String getBlobStorageEndpoint() { return "blob." + endpointSuffix; } diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java index f2ddea959902..c20413b1169e 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPuller.java @@ -38,16 +38,16 @@ public class AzureDataSegmentPuller private final AzureByteSourceFactory byteSourceFactory; - private final AzureUtils azureUtils; + private final AzureAccountConfig azureAccountConfig; @Inject public AzureDataSegmentPuller( AzureByteSourceFactory byteSourceFactory, - AzureUtils azureUtils + AzureAccountConfig azureAccountConfig ) { this.byteSourceFactory = byteSourceFactory; - this.azureUtils = azureUtils; + this.azureAccountConfig = azureAccountConfig; } FileUtils.FileCopyResult getSegmentFiles( @@ -64,7 +64,7 @@ FileUtils.FileCopyResult getSegmentFiles( "Loading container: [%s], with blobPath: [%s] and outDir: [%s]", containerName, blobPath, outDir ); - final String actualBlobPath = azureUtils.maybeRemoveAzurePathPrefix(blobPath); + final String actualBlobPath = AzureUtils.maybeRemoveAzurePathPrefix(blobPath, azureAccountConfig.getBlobStorageEndpoint()); final ByteSource byteSource = byteSourceFactory.create(containerName, actualBlobPath); final FileUtils.FileCopyResult result = CompressionUtils.unzip( diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java index ba83123e2161..4282cd68c6c9 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureDataSegmentPusher.java @@ -79,7 +79,7 @@ public String getPathForHadoop() AzureUtils.AZURE_STORAGE_HADOOP_PROTOCOL, segmentConfig.getContainer(), accountConfig.getAccount(), - accountConfig.getBlobStorageEndpointSuffix(), + accountConfig.getBlobStorageEndpoint(), prefixIsNullOrEmpty ? "" : StringUtils.maybeRemoveTrailingSlash(prefix) + '/' ); diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java index ce03d3b62b00..8a9f71e7a2aa 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureStorageDruidModule.java @@ -119,17 +119,6 @@ public void configure(Binder binder) } - /** - * Creates a supplier that lazily initialize {@link AzureUtils}. It is used to inject an endpoint suffix into the - * {@link AzureUtils} constructor. - */ - @Provides - @LazySingleton - public AzureUtils getAzureUtils(final AzureAccountConfig config) - { - return new AzureUtils(config.getBlobStorageEndpointSuffix()); - } - /** * Creates a supplier that lazily initialize {@link CloudBlobClient}. * This is to avoid immediate config validation but defer it until you actually use the client. diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java index 34c38028b367..b630c533d340 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java @@ -43,8 +43,6 @@ public class AzureUtils public static final String DEFAULT_AZURE_ENDPOINT_SUFFIX = "core.windows.net"; @VisibleForTesting static final String AZURE_STORAGE_HOST_ADDRESS = "blob.core.windows.net"; - public static final String DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX = "blob." + DEFAULT_AZURE_ENDPOINT_SUFFIX; - private final String blobStorageEndpointSuffix; /** * Creates an AzureUtils object with the blob storage endpoint suffix. @@ -53,13 +51,9 @@ public class AzureUtils * "blob.core.chinacloudapi.cn" or * "blob.core.usgovcloudapi.net" */ - public AzureUtils(String blobStorageEndpointSuffix) - { - this.blobStorageEndpointSuffix = blobStorageEndpointSuffix; - } // The azure storage hadoop access pattern is: - // wasb[s]://@./ + // wasb[s]://@.blob./ // (from https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-use-blob-storage) static final String AZURE_STORAGE_HADOOP_PROTOCOL = "wasbs"; @@ -102,7 +96,7 @@ public static String extractAzureKey(URI uri) * @return a String representing the blob path component of the uri with any leading 'blob.core.windows.net/' string * removed characters removed. */ - public String maybeRemoveAzurePathPrefix(String blobPath) + public static String maybeRemoveAzurePathPrefix(String blobPath, String blobStorageEndpointSuffix) { boolean blobPathIsHadoop = blobPath.contains(blobStorageEndpointSuffix); diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java index 69d3b620abe7..d69a45c43ca5 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureDataSegmentPullerTest.java @@ -40,7 +40,7 @@ public class AzureDataSegmentPullerTest extends EasyMockSupport private static final String SEGMENT_FILE_NAME = "segment"; private static final String CONTAINER_NAME = "container"; private static final String BLOB_PATH = "path/to/storage/index.zip"; - private static final String BLOB_PATH_HADOOP = AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX + "/path/to/storage/index.zip"; + private static final String BLOB_PATH_HADOOP = AzureUtils.AZURE_STORAGE_HOST_ADDRESS + "/path/to/storage/index.zip"; private AzureStorage azureStorage; private AzureByteSourceFactory byteSourceFactory; @@ -60,14 +60,14 @@ public void test_getSegmentFiles_success() final File toDir = FileUtils.createTempDir(); try { final InputStream zipStream = new FileInputStream(pulledFile); - final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); + final AzureAccountConfig config = new AzureAccountConfig(); EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME, BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME, BLOB_PATH)); EasyMock.expect(azureStorage.getBlockBlobInputStream(0L, CONTAINER_NAME, BLOB_PATH)).andReturn(zipStream); replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, config); FileUtils.FileCopyResult result = puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, toDir); @@ -93,14 +93,14 @@ public void test_getSegmentFiles_blobPathIsHadoop_success() final File toDir = FileUtils.createTempDir(); try { final InputStream zipStream = new FileInputStream(pulledFile); - final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); + final AzureAccountConfig config = new AzureAccountConfig(); EasyMock.expect(byteSourceFactory.create(CONTAINER_NAME, BLOB_PATH)).andReturn(new AzureByteSource(azureStorage, CONTAINER_NAME, BLOB_PATH)); EasyMock.expect(azureStorage.getBlockBlobInputStream(0L, CONTAINER_NAME, BLOB_PATH)).andReturn(zipStream); replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, config); FileUtils.FileCopyResult result = puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH_HADOOP, toDir); @@ -121,7 +121,7 @@ public void test_getSegmentFiles_blobPathIsHadoop_success() public void test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFiles_doNotDeleteOutputDirectory() throws IOException, URISyntaxException, StorageException, SegmentLoadingException { - final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); + final AzureAccountConfig config = new AzureAccountConfig(); final File outDir = FileUtils.createTempDir(); try { @@ -136,7 +136,7 @@ public void test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFile replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, config); puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir); } @@ -154,7 +154,7 @@ public void test_getSegmentFiles_nonRecoverableErrorRaisedWhenPullingSegmentFile public void test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_deleteOutputDirectory() throws IOException, URISyntaxException, StorageException, SegmentLoadingException { - final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); + final AzureAccountConfig config = new AzureAccountConfig(); final File outDir = FileUtils.createTempDir(); try { @@ -165,7 +165,7 @@ public void test_getSegmentFiles_recoverableErrorRaisedWhenPullingSegmentFiles_d replayAll(); - AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, azureUtils); + AzureDataSegmentPuller puller = new AzureDataSegmentPuller(byteSourceFactory, config); puller.getSegmentFiles(CONTAINER_NAME, BLOB_PATH, outDir); diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java index e4c34f967dfd..62ea95bebb65 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java @@ -307,8 +307,8 @@ public void testBothAccountKeyAndSAStokenUnset() public void testGetAzureUtilsWithDefaultProperties() { Properties properties = initializePropertes(); - AzureUtils utils = makeInjectorWithProperties(properties).getInstance(AzureUtils.class); - String outputBlob = utils.maybeRemoveAzurePathPrefix("blob.core.windows.net/container/blob"); + AzureAccountConfig config = makeInjectorWithProperties(properties).getInstance(AzureAccountConfig.class); + String outputBlob = AzureUtils.maybeRemoveAzurePathPrefix("blob.core.windows.net/container/blob", config.getBlobStorageEndpoint()); Assert.assertEquals("container/blob", outputBlob); } @@ -317,8 +317,8 @@ public void testGetAzureUtilsWithDefaultCustomBlobPath() { Properties properties = initializePropertes(); properties.setProperty("druid.azure.endpointSuffix", "core.usgovcloudapi.net"); - AzureUtils utils = makeInjectorWithProperties(properties).getInstance(AzureUtils.class); - String outputBlob = utils.maybeRemoveAzurePathPrefix("blob.core.usgovcloudapi.net/container/blob"); + AzureAccountConfig config = makeInjectorWithProperties(properties).getInstance(AzureAccountConfig.class); + String outputBlob = AzureUtils.maybeRemoveAzurePathPrefix("blob.core.usgovcloudapi.net/container/blob", config.getBlobStorageEndpoint()); Assert.assertEquals("container/blob", outputBlob); } diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java index fe7ff54b68be..c16c25d40a10 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java @@ -33,7 +33,7 @@ public class AzureUtilsTest private static final String CONTAINER_NAME = "container1"; private static final String BLOB_NAME = "blob1"; private static final String BLOB_PATH_WITH_LEADING_SLASH = "/" + BLOB_NAME; - private static final String BLOB_PATH_WITH_LEADING_AZURE_PREFIX = AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX + private static final String BLOB_PATH_WITH_LEADING_AZURE_PREFIX = AzureUtils.AZURE_STORAGE_HOST_ADDRESS + "/" + BLOB_NAME; private static final URI URI_WITH_PATH_WITH_LEADING_SLASH; @@ -74,16 +74,14 @@ public void test_extractAzureKey_pathHasLeadingSlash_returnsPathWithLeadingSlash @Test public void test_maybeRemoveAzurePathPrefix_pathHasLeadingAzurePathPrefix_returnsPathWithLeadingAzurePathRemoved() { - final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); - String path = azureUtils.maybeRemoveAzurePathPrefix(BLOB_PATH_WITH_LEADING_AZURE_PREFIX); + String path = AzureUtils.maybeRemoveAzurePathPrefix(BLOB_PATH_WITH_LEADING_AZURE_PREFIX, AzureUtils.AZURE_STORAGE_HOST_ADDRESS); Assert.assertEquals(BLOB_NAME, path); } @Test public void test_maybeRemoveAzurePathPrefix_pathDoesNotHaveAzurePathPrefix__returnsPathWithLeadingAzurePathRemoved() { - final AzureUtils azureUtils = new AzureUtils(AzureUtils.DEFAULT_AZURE_BLOB_STORAGE_ENDPOINT_SUFFIX); - String path = azureUtils.maybeRemoveAzurePathPrefix(BLOB_NAME); + String path = AzureUtils.maybeRemoveAzurePathPrefix(BLOB_NAME, AzureUtils.AZURE_STORAGE_HOST_ADDRESS); Assert.assertEquals(BLOB_NAME, path); } From 261dfec8c621a1ac90a10bc1d4219c5c138bcfb3 Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Fri, 5 Jan 2024 14:51:11 +0100 Subject: [PATCH 5/8] Code clean up --- docs/development/extensions-core/azure.md | 24 +++++++++---------- .../druid/storage/azure/AzureUtils.java | 8 ------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index ad69a771ec9f..955d2a4867af 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -29,17 +29,17 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md# [Microsoft Azure Storage](http://azure.microsoft.com/en-us/services/storage/) is another option for deep storage. This requires some additional Druid configuration. -| Property | Description | Possible Values | Default | -|----------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------------------| -| `druid.storage.type` | azure | | Must be set. | -| `druid.azure.account` | | Azure Storage account name. | Must be set. | -| `druid.azure.key` | | Azure Storage account key. | Optional. Either set key or sharedAccessStorageToken but not both. | -| `druid.azure.sharedAccessStorageToken` | | Azure Shared Storage access token | Optional. Either set key or sharedAccessStorageToken but not both. | -| `druid.azure.container` | | Azure Storage container name. | Must be set. | -| `druid.azure.prefix` | A prefix string that will be prepended to the blob names for the segments published to Azure deep storage | | "" | -| `druid.azure.protocol` | the protocol to use | http or https | https | -| `druid.azure.maxTries` | Number of tries before canceling an Azure operation. | | 3 | -| `druid.azure.maxListingLength` | maximum number of input files matching a given prefix to retrieve at a time | | 1024 | -| `druid.azure.endpointSuffix` | The endpoint suffix to use. Could be overriden for connecting with [Azure Goverment](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api). | Examples: `core.windows.net`, `core.usgovcloudapi.net` | `core.windows.net` | +|Property|Description|Possible Values|Default| +|--------|---------------|-----------|-------| +|`druid.storage.type`|azure||Must be set.| +|`druid.azure.account`||Azure Storage account name.|Must be set.| +|`druid.azure.key`||Azure Storage account key.|Optional. Either set key or sharedAccessStorageToken but not both.| +|`druid.azure.sharedAccessStorageToken`||Azure Shared Storage access token|Optional. Either set key or sharedAccessStorageToken but not both.| +|`druid.azure.container`||Azure Storage container name.|Must be set.| +|`druid.azure.prefix`|A prefix string that will be prepended to the blob names for the segments published to Azure deep storage| |""| +|`druid.azure.protocol`|the protocol to use|http or https|https| +|`druid.azure.maxTries`|Number of tries before canceling an Azure operation.| |3| +|`druid.azure.maxListingLength`|maximum number of input files matching a given prefix to retrieve at a time| |1024| +|`druid.azure.endpointSuffix`|The endpoint suffix to use. Could be overriden for connecting with [Azure Goverment](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api).|Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for more information. diff --git a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java index b630c533d340..410df7c5c0da 100644 --- a/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java +++ b/extensions-core/azure-extensions/src/main/java/org/apache/druid/storage/azure/AzureUtils.java @@ -44,14 +44,6 @@ public class AzureUtils @VisibleForTesting static final String AZURE_STORAGE_HOST_ADDRESS = "blob.core.windows.net"; - /** - * Creates an AzureUtils object with the blob storage endpoint suffix. - * - * @param blobStorageEndpointSuffix the blob storage endpoint, like "blob.core.windows.net", - * "blob.core.chinacloudapi.cn" or - * "blob.core.usgovcloudapi.net" - */ - // The azure storage hadoop access pattern is: // wasb[s]://@.blob./ // (from https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-use-blob-storage) From 67d312824a6682664f5941ad31e0a2bebd6966ce Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Mon, 8 Jan 2024 16:34:51 +0100 Subject: [PATCH 6/8] Fixed tests and checkstyle --- docs/development/extensions-core/azure.md | 28 +++++++++---------- .../storage/azure/AzureClientFactoryTest.java | 1 + .../azure/AzureStorageDruidModuleTest.java | 4 +-- .../druid/storage/azure/AzureUtilsTest.java | 5 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index ccb58a5de623..2356648991c5 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -29,19 +29,19 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md# [Microsoft Azure Storage](http://azure.microsoft.com/en-us/services/storage/) is another option for deep storage. This requires some additional Druid configuration. -|Property|Description|Possible Values|Default| -|--------|---------------|-----------|-------| -|`druid.storage.type`|azure||Must be set.| -|`druid.azure.account`||Azure Storage account name.|Must be set.| -|`druid.azure.key`||Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.| -|`druid.azure.sharedAccessStorageToken`||Azure Shared Storage access token|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| -|`druid.azure.useAzureCredentialsChain`|Use [DefaultAzureCredential](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme?view=azure-java-stable) for authentication|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.|False| -|`druid.azure.managedIdentityClientId`|If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true.||Optional.| -|`druid.azure.container`||Azure Storage container name.|Must be set.| -|`druid.azure.prefix`|A prefix string that will be prepended to the blob names for the segments published to Azure deep storage| |""| -|`druid.azure.protocol`|the protocol to use|http or https|https| -|`druid.azure.maxTries`|Number of tries before canceling an Azure operation.| |3| -|`druid.azure.maxListingLength`|maximum number of input files matching a given prefix to retrieve at a time| |1024| -|`druid.azure.endpointSuffix`|The endpoint suffix to use. Override the default value to connect to [Azure Goverment](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api).|Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| +|Property| Description |Possible Values|Default| +|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|-------| +|`druid.storage.type`| azure ||Must be set.| +|`druid.azure.account`| |Azure Storage account name.|Must be set.| +|`druid.azure.key`| |Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.| +|`druid.azure.sharedAccessStorageToken`| |Azure Shared Storage access token|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| +|`druid.azure.useAzureCredentialsChain`| Use [DefaultAzureCredential](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme?view=azure-java-stable) for authentication |Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.|False| +|`druid.azure.managedIdentityClientId`| If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true. ||Optional.| +|`druid.azure.container`| |Azure Storage container name.|Must be set.| +|`druid.azure.prefix`| A prefix string that will be prepended to the blob names for the segments published to Azure deep storage | |""| +|`druid.azure.protocol`| the protocol to use |http or https|https| +|`druid.azure.maxTries`| Number of tries before canceling an Azure operation. | |3| +|`druid.azure.maxListingLength`| maximum number of input files matching a given prefix to retrieve at a time | |1024| +|`druid.azure.endpointSuffix`| The endpoint suffix to use. Override the default value to connect to [Azure Government](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api). |Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for more information. diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java index ffc4a8bb8013..bbf07b402ddc 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureClientFactoryTest.java @@ -133,6 +133,7 @@ public void test_blobServiceClientBuilder_useAzureAccountConfig_asDefaultMaxTrie EasyMock.expect(config.getKey()).andReturn("key").times(2); EasyMock.expect(config.getAccount()).andReturn(ACCOUNT).times(2); EasyMock.expect(config.getMaxTries()).andReturn(3); + EasyMock.expect(config.getBlobStorageEndpoint()).andReturn(AzureUtils.AZURE_STORAGE_HOST_ADDRESS); azureClientFactory = new AzureClientFactory(config); EasyMock.replay(config); azureClientFactory.getBlobServiceClient(null); diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java index 6bd7be197eb5..27d02cd23546 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureStorageDruidModuleTest.java @@ -260,7 +260,7 @@ public void testAllCredentialsUnset() } @Test - public void testGetAzureUtilsWithDefaultProperties() + public void testGetBlobStorageEndpointWithDefaultProperties() { Properties properties = initializePropertes(); AzureAccountConfig config = makeInjectorWithProperties(properties).getInstance(AzureAccountConfig.class); @@ -269,7 +269,7 @@ public void testGetAzureUtilsWithDefaultProperties() } @Test - public void testGetAzureUtilsWithDefaultCustomBlobPath() + public void testGetBlobStorageEndpointWithCustomBlobPath() { Properties properties = initializePropertes(); final String customSuffix = "core.usgovcloudapi.net"; diff --git a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java index 8be5265801f8..4a28c4de4ccc 100644 --- a/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java +++ b/extensions-core/azure-extensions/src/test/java/org/apache/druid/storage/azure/AzureUtilsTest.java @@ -33,7 +33,6 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; -import java.util.Properties; import java.util.concurrent.TimeoutException; @RunWith(EasyMockRunner.class) @@ -209,14 +208,14 @@ public void test_azureRetry_RunTimeExceptionWrappedInRunTimeException_returnsFal } @Test - public void testGetAzureUtilsWithDefaultProperties() + public void testRemoveAzurePathPrefixDefaultEndpoint() { String outputBlob = AzureUtils.maybeRemoveAzurePathPrefix("blob.core.windows.net/container/blob", "blob.core.windows.net"); Assert.assertEquals("container/blob", outputBlob); } @Test - public void testGetAzureUtilsWithDefaultCustomBlobPath() + public void testRemoveAzurePathPrefixCustomEndpoint() { String outputBlob = AzureUtils.maybeRemoveAzurePathPrefix("blob.core.usgovcloudapi.net/container/blob", "blob.core.usgovcloudapi.net"); Assert.assertEquals("container/blob", outputBlob); From 0689d1d23fd5f94bc0a060642c26d557ed85874c Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Tue, 9 Jan 2024 10:46:01 +0100 Subject: [PATCH 7/8] docs reformat --- docs/development/extensions-core/azure.md | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index 2356648991c5..44f5865303f9 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -29,19 +29,18 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md# [Microsoft Azure Storage](http://azure.microsoft.com/en-us/services/storage/) is another option for deep storage. This requires some additional Druid configuration. -|Property| Description |Possible Values|Default| -|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|-------| -|`druid.storage.type`| azure ||Must be set.| -|`druid.azure.account`| |Azure Storage account name.|Must be set.| -|`druid.azure.key`| |Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.| -|`druid.azure.sharedAccessStorageToken`| |Azure Shared Storage access token|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| -|`druid.azure.useAzureCredentialsChain`| Use [DefaultAzureCredential](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme?view=azure-java-stable) for authentication |Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.|False| -|`druid.azure.managedIdentityClientId`| If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true. ||Optional.| -|`druid.azure.container`| |Azure Storage container name.|Must be set.| -|`druid.azure.prefix`| A prefix string that will be prepended to the blob names for the segments published to Azure deep storage | |""| -|`druid.azure.protocol`| the protocol to use |http or https|https| -|`druid.azure.maxTries`| Number of tries before canceling an Azure operation. | |3| -|`druid.azure.maxListingLength`| maximum number of input files matching a given prefix to retrieve at a time | |1024| -|`druid.azure.endpointSuffix`| The endpoint suffix to use. Override the default value to connect to [Azure Government](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api). |Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| +|Property|Description|Possible Values|Default| +|--------|---------------|-----------|-------| +|`druid.storage.type`|azure||Must be set.| +|`druid.azure.account`||Azure Storage account name.|Must be set.| +|`druid.azure.key`||Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| +|`druid.azure.useAzureCredentialsChain`|Use [DefaultAzureCredential](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme?view=azure-java-stable) for authentication|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.|False| +|`druid.azure.managedIdentityClientId`|If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true.||Optional.| +|`druid.azure.container`||Azure Storage container name.|Must be set.| +|`druid.azure.prefix`|A prefix string that will be prepended to the blob names for the segments published to Azure deep storage| |""| +|`druid.azure.protocol`|the protocol to use|http or https|https| +|`druid.azure.maxTries`|Number of tries before canceling an Azure operation.| |3| +|`druid.azure.maxListingLength`|maximum number of input files matching a given prefix to retrieve at a time| |1024| +|`druid.azure.endpointSuffix`|The endpoint suffix to use. Override the default value to connect to [Azure Government](https://learn.microsoft.com/en-us/azure/azure-government/documentation-government-get-started-connect-to-storage#getting-started-with-storage-api).|Examples: `core.windows.net`, `core.usgovcloudapi.net`|`core.windows.net`| See [Azure Services](http://azure.microsoft.com/en-us/pricing/free-trial/) for more information. From 038caa97835b42fddc58ebc747a6fef97b603bba Mon Sep 17 00:00:00 2001 From: sviatahorau Date: Tue, 9 Jan 2024 10:50:03 +0100 Subject: [PATCH 8/8] fix typo --- docs/development/extensions-core/azure.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/development/extensions-core/azure.md b/docs/development/extensions-core/azure.md index 44f5865303f9..003f39cc5540 100644 --- a/docs/development/extensions-core/azure.md +++ b/docs/development/extensions-core/azure.md @@ -33,9 +33,10 @@ To use this Apache Druid extension, [include](../../configuration/extensions.md# |--------|---------------|-----------|-------| |`druid.storage.type`|azure||Must be set.| |`druid.azure.account`||Azure Storage account name.|Must be set.| -|`druid.azure.key`||Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| +|`druid.azure.key`||Azure Storage account key.|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.| +|`druid.azure.sharedAccessStorageToken`||Azure Shared Storage access token|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain..| |`druid.azure.useAzureCredentialsChain`|Use [DefaultAzureCredential](https://learn.microsoft.com/en-us/java/api/overview/azure/identity-readme?view=azure-java-stable) for authentication|Optional. Set one of key, sharedAccessStorageToken or useAzureCredentialsChain.|False| -|`druid.azure.managedIdentityClientId`|If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true.||Optional.| +|`druid.azure.managedIdentityClientId`|If you want to use managed identity authentication in the `DefaultAzureCredential`, `useAzureCredentialsChain` must be true.||Optional.| |`druid.azure.container`||Azure Storage container name.|Must be set.| |`druid.azure.prefix`|A prefix string that will be prepended to the blob names for the segments published to Azure deep storage| |""| |`druid.azure.protocol`|the protocol to use|http or https|https|