From b5f86b5066d789df71bdad45d643a48eaeff0acc Mon Sep 17 00:00:00 2001 From: maskarb Date: Thu, 24 Oct 2024 16:13:40 -0400 Subject: [PATCH 1/7] [COST-5459] update config for AWS Glue --- deploy/clowdapp.yaml | 111 ++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/deploy/clowdapp.yaml b/deploy/clowdapp.yaml index 2dc6847..704efdf 100644 --- a/deploy/clowdapp.yaml +++ b/deploy/clowdapp.yaml @@ -13,7 +13,7 @@ objects: - apiVersion: v1 kind: ConfigMap metadata: - name: trino-scripts-${CONFIGMAP_HASH} + name: trino-scripts labels: app: trino data: @@ -56,24 +56,6 @@ objects: export PGSSLROOTCERT=$temp_file fi - export AWS_ACCESS_KEY_ID=$(jq -r '.objectStore.buckets[0].accessKey' ${ACG_CONFIG}) - export AWS_SECRET_ACCESS_KEY=$(jq -r '.objectStore.buckets[0].secretKey' ${ACG_CONFIG}) - export S3_BUCKET_NAME=$(jq -r '.objectStore.buckets[0].requestedName' ${ACG_CONFIG}) - - OBJECTSTORE_HOST=$(jq -r '.objectStore.hostname' ${ACG_CONFIG}) - OBJECTSTORE_PORT=$(jq -r '.objectStore.port' ${ACG_CONFIG}) - OBJECTSTORE_TLS=$(jq -r '.objectStore.tls' ${ACG_CONFIG}) - - export URI_PREFIX=https - if [[ $OBJECTSTORE_TLS == *"false"* ]]; then - export URI_PREFIX=http - fi - - S3_ENDPOINT="${URI_PREFIX}://${OBJECTSTORE_HOST}" - if [[ -n "${OBJECTSTORE_PORT}" ]] && [[ "${OBJECTSTORE_PORT}" != "null" ]]; then - S3_ENDPOINT="${S3_ENDPOINT}:${OBJECTSTORE_PORT}" - fi - export S3_ENDPOINT fi echo "Copy config files to ${TRINO_HOME}/" cp -v -L -r -f /etc/trino-init/* ${TRINO_HOME}/ @@ -108,12 +90,13 @@ objects: echo "" >> $HIVE_CATALOG_CONFIG if ! grep -q -F 's3.aws-access-key' "$HIVE_CATALOG_CONFIG"; then echo "Adding s3.aws-access-key and s3.aws-secret-key to $HIVE_CATALOG_CONFIG" + echo "s3.aws-access-key=$AWS_ACCESS_KEY_ID" >> "$HIVE_CATALOG_CONFIG" echo "s3.aws-secret-key=$AWS_SECRET_ACCESS_KEY" >> "$HIVE_CATALOG_CONFIG" - echo "s3.endpoint=$S3_ENDPOINT" >> "$HIVE_CATALOG_CONFIG" - if [[ $OBJECTSTORE_TLS == *"false"* ]]; then - echo "s3.sse.type=None" >> "$HIVE_CATALOG_CONFIG" - fi + + echo "hive.metastore.glue.aws-access-key=$AWS_ACCESS_KEY_ID" >> "$HIVE_CATALOG_CONFIG" + echo "hive.metastore.glue.aws-secret-key=$AWS_SECRET_ACCESS_KEY" >> "$HIVE_CATALOG_CONFIG" + fi # add UID to /etc/passwd if missing @@ -137,7 +120,7 @@ objects: - apiVersion: v1 kind: ConfigMap metadata: - name: trino-config-${CONFIGMAP_HASH} + name: trino-config labels: app: trino data: @@ -265,7 +248,7 @@ objects: - apiVersion: v1 kind: ConfigMap metadata: - name: trino-config-catalog-${CONFIGMAP_HASH} + name: trino-config-catalog labels: app: trino data: @@ -345,19 +328,24 @@ objects: type: GAUGE hive.properties: |- connector.name=hive + hive.metastore=glue hive.collect-column-statistics-on-write=true hive.compression-codec=SNAPPY - hive.metastore.authentication.type=NONE - hive.metastore.thrift.client.connect-timeout=${METASTORE_TIMEOUT} - hive.metastore.thrift.client.read-timeout=${METASTORE_READ_TIMEOUT} hive.max-partitions-per-scan=${HIVE_PARTITION_LIMIT} + hive.non-managed-table-writes-enabled=true hive.partition-statistics-sample-size=${HIVE_PARTITION_STATS_SAMPLE_SIZE} - hive.metastore.uri=thrift://hive-metastore:10000 hive.parquet.use-column-names=true + hive.recursive-directories=true hive.storage-format=Parquet fs.native-s3.enabled=true s3.path-style-access=true s3.region=us-east-1 + s3.endpoint=${S3_ENDPOINT} + + hive.metastore.glue.default-warehouse-dir=s3://${S3_BUCKET_NAME}/${WAREHOUSE_DIR} + hive.metastore.glue.region=us-east-1 + hive.metastore.glue.catalogid=${AWS_CATALOG_ID} + blackhole.propeties: |- connector.name=blackhole jmx.properties: |- @@ -417,8 +405,18 @@ objects: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - - name: S3_DATA_DIR - value: 'data' + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws-access-key-id + name: koku-aws + optional: false + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws-secret-access-key + name: koku-aws + optional: false - name: QUERY_MAX_MEMORY_PER_NODE value: ${QUERY_MAX_MEMORY_PER_NODE} - name: QUERY_MAX_MEMORY @@ -460,17 +458,17 @@ objects: volumes: - name: trino-scripts configMap: - name: trino-scripts-${CONFIGMAP_HASH} + name: trino-scripts items: - key: entrypoint.sh path: entrypoint.sh mode: 509 - name: trino-config configMap: - name: trino-config-${CONFIGMAP_HASH} + name: trino-config - name: trino-config-catalog configMap: - name: trino-config-catalog-${CONFIGMAP_HASH} + name: trino-config-catalog - name: trino-etc emptyDir: {} - name: trino-data @@ -532,8 +530,18 @@ objects: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - - name: S3_DATA_DIR - value: 'data' + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + key: aws-access-key-id + name: koku-aws + optional: false + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + key: aws-secret-access-key + name: koku-aws + optional: false - name: QUERY_MAX_MEMORY_PER_NODE value: ${QUERY_MAX_MEMORY_PER_NODE} - name: QUERY_MAX_MEMORY @@ -581,17 +589,17 @@ objects: volumes: - name: trino-scripts configMap: - name: trino-scripts-${CONFIGMAP_HASH} + name: trino-scripts items: - key: entrypoint.sh path: entrypoint.sh mode: 509 - name: trino-config configMap: - name: trino-config-${CONFIGMAP_HASH} + name: trino-config - name: trino-config-catalog configMap: - name: trino-config-catalog-${CONFIGMAP_HASH} + name: trino-config-catalog - name: trino-etc emptyDir: {} - name: trino-data @@ -635,10 +643,8 @@ parameters: required: true - name: ENV_NAME required: false -- name: S3_BUCKET_NAME - value: 'hccm-s3' - name: NODE_ENV - value: 'production' + value: production - name: MACHINE_POOL_OPTION value: '' - name: TRINO_HISTORY_FILE @@ -704,6 +710,14 @@ parameters: name: MAX_HEAP_SIZE value: '3G' +# S3 params +- name: S3_ENDPOINT + value: s3.us-east-1.amazonaws.com +- name: S3_BUCKET_NAME + value: hccm-s3 +- name: WAREHOUSE_DIR + values: data + # Trino configruation Params - description: Max amount of user memory a query can use on a worker (Trino default - JVM max memory * 0.3) displayName: query.max-memory-per-node @@ -727,14 +741,6 @@ parameters: required: true # Trino Hive config -- description: Socket connect timeout for metastore client - displayName: hive.metastore.thrift.client.connect-timeout - name: METASTORE_READ_TIMEOUT - value: '300s' -- description: Socket read timeout for metastore client - displayName: hive.metastore.thrift.client.read-timeout - name: METASTORE_TIMEOUT - value: '300s' - description: Specifies the number of partitions to analyze when computing table statistics displayName: hive.partition-statistics-sample-size name: HIVE_PARTITION_STATS_SAMPLE_SIZE @@ -764,8 +770,3 @@ parameters: displayName: livenessPeriodSeconds name: LIVENESS_PROBE_PERIOD value: '120' - -# Configmap updater -- name: CONFIGMAP_HASH - description: "The random hash to change the configmap names" - value: '000001' From ae1093dffd467ebec69529cf86b62ddab25f9d3b Mon Sep 17 00:00:00 2001 From: maskarb Date: Thu, 24 Oct 2024 16:15:35 -0400 Subject: [PATCH 2/7] add catalog param --- deploy/clowdapp.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/deploy/clowdapp.yaml b/deploy/clowdapp.yaml index 704efdf..0864bdc 100644 --- a/deploy/clowdapp.yaml +++ b/deploy/clowdapp.yaml @@ -710,13 +710,15 @@ parameters: name: MAX_HEAP_SIZE value: '3G' -# S3 params +# AWS params - name: S3_ENDPOINT value: s3.us-east-1.amazonaws.com - name: S3_BUCKET_NAME value: hccm-s3 - name: WAREHOUSE_DIR - values: data + value: data +- name: AWS_CATALOG_ID + value: '589173575009' # Trino configruation Params - description: Max amount of user memory a query can use on a worker (Trino default - JVM max memory * 0.3) From 763842ca5c003d3d472fe2418d57c1ad1089610c Mon Sep 17 00:00:00 2001 From: maskarb Date: Fri, 25 Oct 2024 09:03:05 -0400 Subject: [PATCH 3/7] copy secret to test namespace --- pr_check.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_check.sh b/pr_check.sh index 7c651e1..9ab41be 100755 --- a/pr_check.sh +++ b/pr_check.sh @@ -27,6 +27,7 @@ source $CICD_ROOT/build.sh # export NAMESPACE=$(bonfire namespace reserve) # oc process --local -f deploy/clowdapp.yaml | oc apply -f - -n $NAMESPACE source $CICD_ROOT/deploy_ephemeral_env.sh +oc get secret koku-aws --namespace=ephemeral-base -oyaml | oc apply --namespace=${NAMESPACE} -f - #source $CICD_ROOT/smoke_test.sh mkdir -p $WORKSPACE/artifacts From 7450b5ae65dbc4317805495d939df6fd1d5fcaca Mon Sep 17 00:00:00 2001 From: maskarb Date: Fri, 25 Oct 2024 09:14:48 -0400 Subject: [PATCH 4/7] copy secret before deploy --- pr_check.sh | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pr_check.sh b/pr_check.sh index 9ab41be..b0b5f6d 100755 --- a/pr_check.sh +++ b/pr_check.sh @@ -23,12 +23,23 @@ curl -s $CICD_URL/bootstrap.sh > .cicd_bootstrap.sh && source .cicd_bootstrap.sh source $CICD_ROOT/build.sh -# source $CICD_ROOT/_common_deploy_logic.sh -# export NAMESPACE=$(bonfire namespace reserve) -# oc process --local -f deploy/clowdapp.yaml | oc apply -f - -n $NAMESPACE -source $CICD_ROOT/deploy_ephemeral_env.sh +source ${CICD_ROOT}/_common_deploy_logic.sh +export NAMESPACE=$(bonfire namespace reserve) +SMOKE_NAMESPACE=$NAMESPACE oc get secret koku-aws --namespace=ephemeral-base -oyaml | oc apply --namespace=${NAMESPACE} -f - -#source $CICD_ROOT/smoke_test.sh + +bonfire deploy \ + ${APP_NAME} \ + --source=appsre \ + --ref-env ${REF_ENV} \ + --set-image-tag ${IMAGE}=${IMAGE_TAG} \ + --namespace ${NAMESPACE} \ + --timeout ${DEPLOY_TIMEOUT} \ + --optional-deps-method hybrid \ + ${TEMPLATE_REF_ARG} \ + ${COMPONENTS_ARG} \ + ${COMPONENTS_RESOURCES_ARG} \ + ${EXTRA_DEPLOY_ARGS} mkdir -p $WORKSPACE/artifacts cat << EOF > ${WORKSPACE}/artifacts/junit-dummy.xml From 839e07b1976bada9381ad677741ea6773137e88c Mon Sep 17 00:00:00 2001 From: maskarb Date: Fri, 25 Oct 2024 09:19:53 -0400 Subject: [PATCH 5/7] strip namespace when copying --- pr_check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_check.sh b/pr_check.sh index b0b5f6d..2f1a8f5 100755 --- a/pr_check.sh +++ b/pr_check.sh @@ -26,7 +26,7 @@ source $CICD_ROOT/build.sh source ${CICD_ROOT}/_common_deploy_logic.sh export NAMESPACE=$(bonfire namespace reserve) SMOKE_NAMESPACE=$NAMESPACE -oc get secret koku-aws --namespace=ephemeral-base -oyaml | oc apply --namespace=${NAMESPACE} -f - +oc get secret koku-aws --namespace=ephemeral-base -oyaml | grep -v '^\s*namespace:\s' | oc apply --namespace=${NAMESPACE} -f - bonfire deploy \ ${APP_NAME} \ From cbeab5fb5e3c0448fb157d4c701485a6d76246f0 Mon Sep 17 00:00:00 2001 From: maskarb Date: Fri, 25 Oct 2024 10:53:15 -0400 Subject: [PATCH 6/7] add https --- deploy/clowdapp.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/deploy/clowdapp.yaml b/deploy/clowdapp.yaml index 0864bdc..31e99a0 100644 --- a/deploy/clowdapp.yaml +++ b/deploy/clowdapp.yaml @@ -344,6 +344,7 @@ objects: hive.metastore.glue.default-warehouse-dir=s3://${S3_BUCKET_NAME}/${WAREHOUSE_DIR} hive.metastore.glue.region=us-east-1 + hive.metastore.glue.endpoint-url hive.metastore.glue.catalogid=${AWS_CATALOG_ID} blackhole.propeties: |- @@ -712,7 +713,7 @@ parameters: # AWS params - name: S3_ENDPOINT - value: s3.us-east-1.amazonaws.com + value: https://s3.us-east-1.amazonaws.com - name: S3_BUCKET_NAME value: hccm-s3 - name: WAREHOUSE_DIR From 62ff353a29a1e068709c7adf9c01be1a70dcf3b0 Mon Sep 17 00:00:00 2001 From: maskarb Date: Thu, 7 Nov 2024 10:28:17 -0500 Subject: [PATCH 7/7] some cleanup --- deploy/clowdapp.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/deploy/clowdapp.yaml b/deploy/clowdapp.yaml index 31e99a0..2f2b86c 100644 --- a/deploy/clowdapp.yaml +++ b/deploy/clowdapp.yaml @@ -339,12 +339,11 @@ objects: hive.storage-format=Parquet fs.native-s3.enabled=true s3.path-style-access=true - s3.region=us-east-1 + s3.region=${S3_REGION} s3.endpoint=${S3_ENDPOINT} hive.metastore.glue.default-warehouse-dir=s3://${S3_BUCKET_NAME}/${WAREHOUSE_DIR} - hive.metastore.glue.region=us-east-1 - hive.metastore.glue.endpoint-url + hive.metastore.glue.region=${S3_REGION} hive.metastore.glue.catalogid=${AWS_CATALOG_ID} blackhole.propeties: |- @@ -714,6 +713,8 @@ parameters: # AWS params - name: S3_ENDPOINT value: https://s3.us-east-1.amazonaws.com +- name: S3_REGION + value: us-east-1 - name: S3_BUCKET_NAME value: hccm-s3 - name: WAREHOUSE_DIR