From 6fa2ef96c7cdcdd1a6160803787fee4aca5d9290 Mon Sep 17 00:00:00 2001 From: treff7es Date: Fri, 21 Jun 2024 21:19:00 +0200 Subject: [PATCH] Force http 1.x in datahub client --- .../java/datahub/client/rest/RestEmitter.java | 17 ++++++++++++----- .../java/spark-lineage-beta/README.md | 10 +++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java index ed4cee060bd69..dd6a7ba98c87d 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java @@ -35,6 +35,7 @@ import org.apache.hc.client5.http.async.methods.SimpleHttpResponse; import org.apache.hc.client5.http.async.methods.SimpleRequestBuilder; import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.config.TlsConfig; import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient; import org.apache.hc.client5.http.impl.async.HttpAsyncClientBuilder; import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder; @@ -45,6 +46,7 @@ import org.apache.hc.core5.http.ContentType; import org.apache.hc.core5.http.HttpStatus; import org.apache.hc.core5.http.nio.ssl.TlsStrategy; +import org.apache.hc.core5.http2.HttpVersionPolicy; import org.apache.hc.core5.ssl.SSLContexts; import org.apache.hc.core5.util.TimeValue; @@ -106,6 +108,14 @@ public RestEmitter(RestEmitterConfig config) { config.getTimeoutSec() * 1000, java.util.concurrent.TimeUnit.MILLISECONDS) .build()); } + PoolingAsyncClientConnectionManagerBuilder poolingAsyncClientConnectionManagerBuilder = + PoolingAsyncClientConnectionManagerBuilder.create(); + + // Forcing http 1.x as 2.0 is not supported yet + TlsConfig tlsHttp1Config = + TlsConfig.copy(TlsConfig.DEFAULT).setVersionPolicy(HttpVersionPolicy.FORCE_HTTP_1).build(); + poolingAsyncClientConnectionManagerBuilder.setDefaultTlsConfig(tlsHttp1Config); + if (config.isDisableSslVerification()) { try { SSLContext sslcontext = @@ -115,15 +125,12 @@ public RestEmitter(RestEmitterConfig config) { .setSslContext(sslcontext) .setHostnameVerifier(NoopHostnameVerifier.INSTANCE) .build(); - - httpClientBuilder.setConnectionManager( - PoolingAsyncClientConnectionManagerBuilder.create() - .setTlsStrategy(tlsStrategy) - .build()); + poolingAsyncClientConnectionManagerBuilder.setTlsStrategy(tlsStrategy); } catch (KeyManagementException | NoSuchAlgorithmException | KeyStoreException e) { throw new RuntimeException("Error while creating insecure http client", e); } } + httpClientBuilder.setConnectionManager(poolingAsyncClientConnectionManagerBuilder.build()); httpClientBuilder.setRetryStrategy( new DatahubHttpRequestRetryStrategy( diff --git a/metadata-integration/java/spark-lineage-beta/README.md b/metadata-integration/java/spark-lineage-beta/README.md index 146308e20d05a..ca966b08bc19a 100644 --- a/metadata-integration/java/spark-lineage-beta/README.md +++ b/metadata-integration/java/spark-lineage-beta/README.md @@ -24,7 +24,7 @@ When running jobs using spark-submit, the agent needs to be configured in the co ```text #Configuring DataHub spark agent jar -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.6 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.10 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server http://localhost:8080 ``` @@ -32,7 +32,7 @@ spark.datahub.rest.server http://localhost:8080 ## spark-submit command line ```sh -spark-submit --packages io.acryl:acryl-spark-lineage:0.2.6 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py +spark-submit --packages io.acryl:acryl-spark-lineage:0.2.10 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py ``` ### Configuration Instructions: Amazon EMR @@ -41,7 +41,7 @@ Set the following spark-defaults configuration properties as it stated [here](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html) ```text -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.6 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.10 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server https://your_datahub_host/gms #If you have authentication set up then you also need to specify the Datahub access token @@ -56,7 +56,7 @@ When running interactive jobs from a notebook, the listener can be configured wh spark = SparkSession.builder .master("spark://spark-master:7077") .appName("test-application") -.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.6") +.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.10") .config("spark.extraListeners", "datahub.spark.DatahubSparkListener") .config("spark.datahub.rest.server", "http://localhost:8080") .enableHiveSupport() @@ -79,7 +79,7 @@ appName("test-application") config("spark.master","spark://spark-master:7077") . -config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.6") +config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.10") . config("spark.extraListeners","datahub.spark.DatahubSparkListener")