From 38341f44d9cb5c7d04920d47f73223c6f0b8bca3 Mon Sep 17 00:00:00 2001 From: Varun Rao Bhamidimarri Date: Mon, 15 Nov 2021 16:08:36 -0600 Subject: [PATCH 1/6] Add EMR 6.x versions for V3 --- aws_emr_blog_v3/cloudformation/emr-template.template | 2 ++ aws_emr_blog_v3/cloudformation/nestedstack.template | 2 ++ aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template | 2 ++ 3 files changed, 6 insertions(+) diff --git a/aws_emr_blog_v3/cloudformation/emr-template.template b/aws_emr_blog_v3/cloudformation/emr-template.template index 7494776..5dcfe59 100644 --- a/aws_emr_blog_v3/cloudformation/emr-template.template +++ b/aws_emr_blog_v3/cloudformation/emr-template.template @@ -243,6 +243,8 @@ Parameters: Default: emr-5.32.0 AllowedValues: - emr-5.32.0 + - emr-6.3.0 + - emr-6.4.0 Description: Release label for the EMR cluster AppsEMR: Description: 'Comma separated list of applications to install on the cluster e.g., ' diff --git a/aws_emr_blog_v3/cloudformation/nestedstack.template b/aws_emr_blog_v3/cloudformation/nestedstack.template index 315b6b9..d1f9461 100644 --- a/aws_emr_blog_v3/cloudformation/nestedstack.template +++ b/aws_emr_blog_v3/cloudformation/nestedstack.template @@ -295,6 +295,8 @@ Parameters: Default: emr-5.32.0 AllowedValues: - emr-5.32.0 + - emr-6.3.0 + - emr-6.4.0 KeyPairName: Description: Name of an existing EC2 key pair to access the Amazon EMR cluster Type: AWS::EC2::KeyPair::KeyName diff --git a/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template b/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template index 0750105..8aa618c 100644 --- a/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template +++ b/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template @@ -227,6 +227,8 @@ Parameters: Default: emr-5.32.0 AllowedValues: - emr-5.32.0 + - emr-6.3.0 + - emr-6.4.0 S3Bucket: Description: S3Bucket for the code [update this is you want to run this stack in a region other than US-EAST-1] Type: String From b3cdef2d3165b673eddf2ebbf4fac26cb8a492c4 Mon Sep 17 00:00:00 2001 From: Varun Rao Bhamidimarri Date: Thu, 30 Jun 2022 14:42:55 -0500 Subject: [PATCH 2/6] Add directory for service definitions --- .../ranger-servicedef-amazon-emr-s3.json | 75 +++ .../ranger-servicedef-amazon-emr-spark.json | 150 +++++ .../ranger-servicedef-trino.json | 516 ++++++++++++++++++ 3 files changed, 741 insertions(+) create mode 100644 ranger-service-definition/ranger-servicedef-amazon-emr-s3.json create mode 100644 ranger-service-definition/ranger-servicedef-amazon-emr-spark.json create mode 100644 ranger-service-definition/ranger-servicedef-trino.json diff --git a/ranger-service-definition/ranger-servicedef-amazon-emr-s3.json b/ranger-service-definition/ranger-servicedef-amazon-emr-s3.json new file mode 100644 index 0000000..f717c8a --- /dev/null +++ b/ranger-service-definition/ranger-servicedef-amazon-emr-s3.json @@ -0,0 +1,75 @@ +{ + "id": 157, + "name": "amazon-emr-emrfs", + "label": "EMRFS S3 object storage", + "description": "EMRFS Ranger plugin for AWS S3 object storage service", + "implClass": "com.amazonaws.emr.ranger.emrfs.RangerServiceS3", + "version": 1, + "options": { + "enableDenyAndExceptionsInPolicies": "false" + }, + "configs": [ + { + "itemId": 1, + "name": "commonNameForCertificate", + "type": "string", + "subType": "", + "mandatory": false, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Common Name for Certificate" + } + ], + "resources": [ + { + "itemId": 1, + "name": "sthreeresource", + "type": "path", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": true, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": false + }, + "validationRegEx": ".+", + "validationMessage": "S3 resource can't be empty", + "uiHint": "", + "label": "S3 resource", + "description": "Represents an S3 resource i.e. either a bucket or bucket and object key. Supports default Ranger wildcards." + } + ], + "accessTypes": [ + { + "itemId": 1, + "name": "GetObject", + "label": "GetObject" + }, + { + "itemId": 2, + "name": "PutObject", + "label": "PutObject" + }, + { + "itemId": 3, + "name": "ListObjects", + "label": "ListObjects" + }, + { + "itemId": 4, + "name": "DeleteObject", + "label": "DeleteObject" + } + ], + "enums": [ + ], + "contextEnrichers": [ + ], + "policyConditions": [ + ] +} diff --git a/ranger-service-definition/ranger-servicedef-amazon-emr-spark.json b/ranger-service-definition/ranger-servicedef-amazon-emr-spark.json new file mode 100644 index 0000000..2a5ad6e --- /dev/null +++ b/ranger-service-definition/ranger-servicedef-amazon-emr-spark.json @@ -0,0 +1,150 @@ +{ + "name": "amazon-emr-spark", + "implClass": "org.apache.ranger.services.spark.RangerServiceSpark", + "label": "Amazon EMR Spark", + "description": "Amazon EMR Spark", + "guid": "f4707ecc-b5c6-11ea-b8ab-02f5a39015b1", + "resources": + [ + { + "itemId": 1, + "name": "database", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Database", + "description": "EMR Spark Database" + }, + + { + "itemId": 2, + "name": "table", + "type": "string", + "level": 20, + "parent": "database", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Table", + "description": "EMR Spark Table" + }, + + { + "itemId": 3, + "name": "column", + "type": "string", + "level": 30, + "parent": "table", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Column", + "description": "EMR Spark Column" + }, + + { + "itemId": 4, + "name": "url", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerURLResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":false }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Table URL", + "description": "EMR Spark Table URL" + } + ], + + "accessTypes": + [ + { + "itemId": 1, + "name": "select", + "label": "select" + }, + { + "itemId": 2, + "name": "update", + "label": "update" + }, + { + "itemId": 3, + "name": "alter", + "label": "alter" + }, + { + "itemId": 4, + "name": "read", + "label": "read" + }, + { + "itemId": 5, + "name": "write", + "label": "write" + }, + { + "itemId": 6, + "name": "create", + "label": "create" + }, + { + "itemId": 7, + "name": "drop", + "label": "drop" + } + ], + "configs": + [ + { + "itemId": 1, + "name": "commonNameForCertificate", + "type": "string", + "mandatory": false, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "Common Name for Certificate" + } + ], + + "enums": + [ + + ], + + "contextEnrichers": + [ + ], + + "policyConditions": + [ + ] +} diff --git a/ranger-service-definition/ranger-servicedef-trino.json b/ranger-service-definition/ranger-servicedef-trino.json new file mode 100644 index 0000000..4e1475b --- /dev/null +++ b/ranger-service-definition/ranger-servicedef-trino.json @@ -0,0 +1,516 @@ +{ + "name": "trino", + "displayName": "trino", + "implClass": "", + "label": "Trino", + "description": "Trino", + "guid": "379a9fe5-1b6e-4091-a584-4890e245e6c1", + "resources": [ + { + "itemId": 1, + "name": "catalog", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Catalog", + "description": "Trino Catalog" + }, + { + "itemId": 2, + "name": "schema", + "type": "string", + "level": 20, + "parent": "catalog", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Schema", + "description": "Trino Schema" + }, + { + "itemId": 3, + "name": "table", + "type": "string", + "level": 30, + "parent": "schema", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Table", + "description": "Trino Table" + }, + { + "itemId": 4, + "name": "column", + "type": "string", + "level": 40, + "parent": "table", + "mandatory": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Column", + "description": "Trino Column" + }, + { + "itemId": 5, + "name": "trinouser", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino User", + "description": "Trino User", + "accessTypeRestrictions": ["impersonate"] + }, + { + "itemId": 6, + "name": "systemproperty", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "System Property", + "description": "Trino System Property", + "accessTypeRestrictions": ["alter"] + }, + { + "itemId": 7, + "name": "sessionproperty", + "type": "string", + "level": 20, + "parent": "catalog", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Catalog Session Property", + "description": "Trino Catalog Session Property", + "accessTypeRestrictions": ["alter"] + }, + { + "itemId": 8, + "name": "function", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Function", + "description": "Trino Function", + "accessTypeRestrictions": ["execute", "grant"] + }, + { + "itemId": 9, + "name": "procedure", + "type": "string", + "level": 30, + "parent": "schema", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Schema Procedure", + "description": "Schema Procedure", + "accessTypeRestrictions": ["execute", "grant"] + }, + { + "itemId": 10, + "name": "url", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": false + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "URL", + "description": "External URL", + "accessTypeRestrictions": ["read", "write"] + } + ], + "accessTypes": [ + { + "itemId": 1, + "name": "select", + "label": "Select" + }, + { + "itemId": 2, + "name": "insert", + "label": "Insert" + }, + { + "itemId": 3, + "name": "create", + "label": "Create" + }, + { + "itemId": 4, + "name": "drop", + "label": "Drop" + }, + { + "itemId": 5, + "name": "delete", + "label": "Delete" + }, + { + "itemId": 6, + "name": "use", + "label": "Use" + }, + { + "itemId": 7, + "name": "alter", + "label": "Alter" + }, + { + "itemId": 8, + "name": "grant", + "label": "Grant" + }, + { + "itemId": 9, + "name": "revoke", + "label": "Revoke" + }, + { + "itemId": 10, + "name": "show", + "label": "Show" + }, + { + "itemId": 11, + "name": "impersonate", + "label": "Impersonate" + }, + { + "itemId": 12, + "name": "all", + "label": "All", + "impliedGrants": [ + "select", + "insert", + "create", + "delete", + "drop", + "use", + "alter", + "grant", + "revoke", + "show", + "impersonate", + "execute", + "read", + "write" + ] + }, + { + "itemId": 13, + "name": "execute", + "label": "execute" + }, + { + "itemId": 14, + "name": "read", + "label": "Read" + }, + { + "itemId": 15, + "name": "write", + "label": "Write" + } + ], + "configs": [ + { + "itemId": 1, + "name": "username", + "type": "string", + "mandatory": true, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Username" + }, + { + "itemId": 2, + "name": "password", + "type": "password", + "mandatory": false, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Password" + }, + { + "itemId": 3, + "name": "jdbc.driverClassName", + "type": "string", + "mandatory": true, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "defaultValue": "io.trino.jdbc.TrinoDriver" + }, + { + "itemId": 4, + "name": "jdbc.url", + "type": "string", + "mandatory": true, + "defaultValue": "", + "validationRegEx": "", + "validationMessage": "", + "uiHint": "" + }, + { + "itemId": 5, + "name": "commonNameForCertificate", + "type": "string", + "mandatory": false, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "Common Name for Certificate" + } + ], + "enums": [ + ], + "contextEnrichers": [ + ], + "policyConditions": + [ + ], + "dataMaskDef": { + "accessTypes": [ + { + "name": "select" + } + ], + "resources": [ + { + "name": "catalog", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "schema", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "table", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "column", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + } + ], + "maskTypes": [ + { + "itemId": 1, + "name": "MASK", + "label": "Redact", + "description": "Replace lowercase with 'x', uppercase with 'X', digits with '0'", + "transformer": "cast(regexp_replace(regexp_replace(regexp_replace({col},'([A-Z])', 'X'),'([a-z])','x'),'([0-9])','0') as {type})", + "dataMaskOptions": { + } + }, + { + "itemId": 2, + "name": "MASK_SHOW_LAST_4", + "label": "Partial mask: show last 4", + "description": "Show last 4 characters; replace rest with 'X'", + "transformer": "cast(regexp_replace({col}, '(.*)(.{4}$)', x -> regexp_replace(x[1], '.', 'X') || x[2]) as {type})" + }, + { + "itemId": 3, + "name": "MASK_SHOW_FIRST_4", + "label": "Partial mask: show first 4", + "description": "Show first 4 characters; replace rest with 'x'", + "transformer": "cast(regexp_replace({col}, '(^.{4})(.*)', x -> x[1] || regexp_replace(x[2], '.', 'X')) as {type})" + }, + { + "itemId": 4, + "name": "MASK_HASH", + "label": "Hash", + "description": "Hash the value of a varchar with sha256", + "transformer": "cast(to_hex(sha256(to_utf8({col}))) as {type})" + }, + { + "itemId": 5, + "name": "MASK_NULL", + "label": "Nullify", + "description": "Replace with NULL" + }, + { + "itemId": 6, + "name": "MASK_NONE", + "label": "Unmasked (retain original value)", + "description": "No masking" + }, + { + "itemId": 12, + "name": "MASK_DATE_SHOW_YEAR", + "label": "Date: show only year", + "description": "Date: show only year", + "transformer": "date_trunc('year', {col})" + }, + { + "itemId": 13, + "name": "CUSTOM", + "label": "Custom", + "description": "Custom" + } + ] + }, + "rowFilterDef": { + "accessTypes": [ + { + "name": "select" + } + ], + "resources": [ + { + "name": "catalog", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + }, + { + "name": "schema", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + }, + { + "name": "table", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + } + ] + } + +} From 3c0af096e2858a662987fdcae390f670cf68bb11 Mon Sep 17 00:00:00 2001 From: Varun Rao Bhamidimarri Date: Mon, 15 Nov 2021 16:08:36 -0600 Subject: [PATCH 3/6] Add EMR 6.x versions for V3 --- aws_emr_blog_v3/cloudformation/emr-template.template | 1 + aws_emr_blog_v3/cloudformation/nestedstack.template | 1 + aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template | 1 + 3 files changed, 3 insertions(+) diff --git a/aws_emr_blog_v3/cloudformation/emr-template.template b/aws_emr_blog_v3/cloudformation/emr-template.template index a498f71..5dcfe59 100644 --- a/aws_emr_blog_v3/cloudformation/emr-template.template +++ b/aws_emr_blog_v3/cloudformation/emr-template.template @@ -244,6 +244,7 @@ Parameters: AllowedValues: - emr-5.32.0 - emr-6.3.0 + - emr-6.4.0 Description: Release label for the EMR cluster AppsEMR: Description: 'Comma separated list of applications to install on the cluster e.g., ' diff --git a/aws_emr_blog_v3/cloudformation/nestedstack.template b/aws_emr_blog_v3/cloudformation/nestedstack.template index 1fd1538..d1f9461 100644 --- a/aws_emr_blog_v3/cloudformation/nestedstack.template +++ b/aws_emr_blog_v3/cloudformation/nestedstack.template @@ -296,6 +296,7 @@ Parameters: AllowedValues: - emr-5.32.0 - emr-6.3.0 + - emr-6.4.0 KeyPairName: Description: Name of an existing EC2 key pair to access the Amazon EMR cluster Type: AWS::EC2::KeyPair::KeyName diff --git a/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template b/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template index 3031802..8aa618c 100644 --- a/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template +++ b/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template @@ -228,6 +228,7 @@ Parameters: AllowedValues: - emr-5.32.0 - emr-6.3.0 + - emr-6.4.0 S3Bucket: Description: S3Bucket for the code [update this is you want to run this stack in a region other than US-EAST-1] Type: String From c5a36096370550b02076960c75fea3f820786268 Mon Sep 17 00:00:00 2001 From: Varun Rao Bhamidimarri Date: Thu, 30 Jun 2022 14:42:55 -0500 Subject: [PATCH 4/6] Add directory for service definitions --- .../ranger-servicedef-amazon-emr-s3.json | 75 +++ .../ranger-servicedef-amazon-emr-spark.json | 150 +++++ .../ranger-servicedef-trino.json | 516 ++++++++++++++++++ 3 files changed, 741 insertions(+) create mode 100644 ranger-service-definition/ranger-servicedef-amazon-emr-s3.json create mode 100644 ranger-service-definition/ranger-servicedef-amazon-emr-spark.json create mode 100644 ranger-service-definition/ranger-servicedef-trino.json diff --git a/ranger-service-definition/ranger-servicedef-amazon-emr-s3.json b/ranger-service-definition/ranger-servicedef-amazon-emr-s3.json new file mode 100644 index 0000000..f717c8a --- /dev/null +++ b/ranger-service-definition/ranger-servicedef-amazon-emr-s3.json @@ -0,0 +1,75 @@ +{ + "id": 157, + "name": "amazon-emr-emrfs", + "label": "EMRFS S3 object storage", + "description": "EMRFS Ranger plugin for AWS S3 object storage service", + "implClass": "com.amazonaws.emr.ranger.emrfs.RangerServiceS3", + "version": 1, + "options": { + "enableDenyAndExceptionsInPolicies": "false" + }, + "configs": [ + { + "itemId": 1, + "name": "commonNameForCertificate", + "type": "string", + "subType": "", + "mandatory": false, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Common Name for Certificate" + } + ], + "resources": [ + { + "itemId": 1, + "name": "sthreeresource", + "type": "path", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": true, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": false + }, + "validationRegEx": ".+", + "validationMessage": "S3 resource can't be empty", + "uiHint": "", + "label": "S3 resource", + "description": "Represents an S3 resource i.e. either a bucket or bucket and object key. Supports default Ranger wildcards." + } + ], + "accessTypes": [ + { + "itemId": 1, + "name": "GetObject", + "label": "GetObject" + }, + { + "itemId": 2, + "name": "PutObject", + "label": "PutObject" + }, + { + "itemId": 3, + "name": "ListObjects", + "label": "ListObjects" + }, + { + "itemId": 4, + "name": "DeleteObject", + "label": "DeleteObject" + } + ], + "enums": [ + ], + "contextEnrichers": [ + ], + "policyConditions": [ + ] +} diff --git a/ranger-service-definition/ranger-servicedef-amazon-emr-spark.json b/ranger-service-definition/ranger-servicedef-amazon-emr-spark.json new file mode 100644 index 0000000..2a5ad6e --- /dev/null +++ b/ranger-service-definition/ranger-servicedef-amazon-emr-spark.json @@ -0,0 +1,150 @@ +{ + "name": "amazon-emr-spark", + "implClass": "org.apache.ranger.services.spark.RangerServiceSpark", + "label": "Amazon EMR Spark", + "description": "Amazon EMR Spark", + "guid": "f4707ecc-b5c6-11ea-b8ab-02f5a39015b1", + "resources": + [ + { + "itemId": 1, + "name": "database", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Database", + "description": "EMR Spark Database" + }, + + { + "itemId": 2, + "name": "table", + "type": "string", + "level": 20, + "parent": "database", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Table", + "description": "EMR Spark Table" + }, + + { + "itemId": 3, + "name": "column", + "type": "string", + "level": 30, + "parent": "table", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Column", + "description": "EMR Spark Column" + }, + + { + "itemId": 4, + "name": "url", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerURLResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":false }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Table URL", + "description": "EMR Spark Table URL" + } + ], + + "accessTypes": + [ + { + "itemId": 1, + "name": "select", + "label": "select" + }, + { + "itemId": 2, + "name": "update", + "label": "update" + }, + { + "itemId": 3, + "name": "alter", + "label": "alter" + }, + { + "itemId": 4, + "name": "read", + "label": "read" + }, + { + "itemId": 5, + "name": "write", + "label": "write" + }, + { + "itemId": 6, + "name": "create", + "label": "create" + }, + { + "itemId": 7, + "name": "drop", + "label": "drop" + } + ], + "configs": + [ + { + "itemId": 1, + "name": "commonNameForCertificate", + "type": "string", + "mandatory": false, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "Common Name for Certificate" + } + ], + + "enums": + [ + + ], + + "contextEnrichers": + [ + ], + + "policyConditions": + [ + ] +} diff --git a/ranger-service-definition/ranger-servicedef-trino.json b/ranger-service-definition/ranger-servicedef-trino.json new file mode 100644 index 0000000..4e1475b --- /dev/null +++ b/ranger-service-definition/ranger-servicedef-trino.json @@ -0,0 +1,516 @@ +{ + "name": "trino", + "displayName": "trino", + "implClass": "", + "label": "Trino", + "description": "Trino", + "guid": "379a9fe5-1b6e-4091-a584-4890e245e6c1", + "resources": [ + { + "itemId": 1, + "name": "catalog", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Catalog", + "description": "Trino Catalog" + }, + { + "itemId": 2, + "name": "schema", + "type": "string", + "level": 20, + "parent": "catalog", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Schema", + "description": "Trino Schema" + }, + { + "itemId": 3, + "name": "table", + "type": "string", + "level": 30, + "parent": "schema", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Table", + "description": "Trino Table" + }, + { + "itemId": 4, + "name": "column", + "type": "string", + "level": 40, + "parent": "table", + "mandatory": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Column", + "description": "Trino Column" + }, + { + "itemId": 5, + "name": "trinouser", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino User", + "description": "Trino User", + "accessTypeRestrictions": ["impersonate"] + }, + { + "itemId": 6, + "name": "systemproperty", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "System Property", + "description": "Trino System Property", + "accessTypeRestrictions": ["alter"] + }, + { + "itemId": 7, + "name": "sessionproperty", + "type": "string", + "level": 20, + "parent": "catalog", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Catalog Session Property", + "description": "Trino Catalog Session Property", + "accessTypeRestrictions": ["alter"] + }, + { + "itemId": 8, + "name": "function", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Function", + "description": "Trino Function", + "accessTypeRestrictions": ["execute", "grant"] + }, + { + "itemId": 9, + "name": "procedure", + "type": "string", + "level": 30, + "parent": "schema", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Schema Procedure", + "description": "Schema Procedure", + "accessTypeRestrictions": ["execute", "grant"] + }, + { + "itemId": 10, + "name": "url", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": false + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "URL", + "description": "External URL", + "accessTypeRestrictions": ["read", "write"] + } + ], + "accessTypes": [ + { + "itemId": 1, + "name": "select", + "label": "Select" + }, + { + "itemId": 2, + "name": "insert", + "label": "Insert" + }, + { + "itemId": 3, + "name": "create", + "label": "Create" + }, + { + "itemId": 4, + "name": "drop", + "label": "Drop" + }, + { + "itemId": 5, + "name": "delete", + "label": "Delete" + }, + { + "itemId": 6, + "name": "use", + "label": "Use" + }, + { + "itemId": 7, + "name": "alter", + "label": "Alter" + }, + { + "itemId": 8, + "name": "grant", + "label": "Grant" + }, + { + "itemId": 9, + "name": "revoke", + "label": "Revoke" + }, + { + "itemId": 10, + "name": "show", + "label": "Show" + }, + { + "itemId": 11, + "name": "impersonate", + "label": "Impersonate" + }, + { + "itemId": 12, + "name": "all", + "label": "All", + "impliedGrants": [ + "select", + "insert", + "create", + "delete", + "drop", + "use", + "alter", + "grant", + "revoke", + "show", + "impersonate", + "execute", + "read", + "write" + ] + }, + { + "itemId": 13, + "name": "execute", + "label": "execute" + }, + { + "itemId": 14, + "name": "read", + "label": "Read" + }, + { + "itemId": 15, + "name": "write", + "label": "Write" + } + ], + "configs": [ + { + "itemId": 1, + "name": "username", + "type": "string", + "mandatory": true, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Username" + }, + { + "itemId": 2, + "name": "password", + "type": "password", + "mandatory": false, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Password" + }, + { + "itemId": 3, + "name": "jdbc.driverClassName", + "type": "string", + "mandatory": true, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "defaultValue": "io.trino.jdbc.TrinoDriver" + }, + { + "itemId": 4, + "name": "jdbc.url", + "type": "string", + "mandatory": true, + "defaultValue": "", + "validationRegEx": "", + "validationMessage": "", + "uiHint": "" + }, + { + "itemId": 5, + "name": "commonNameForCertificate", + "type": "string", + "mandatory": false, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "Common Name for Certificate" + } + ], + "enums": [ + ], + "contextEnrichers": [ + ], + "policyConditions": + [ + ], + "dataMaskDef": { + "accessTypes": [ + { + "name": "select" + } + ], + "resources": [ + { + "name": "catalog", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "schema", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "table", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "column", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + } + ], + "maskTypes": [ + { + "itemId": 1, + "name": "MASK", + "label": "Redact", + "description": "Replace lowercase with 'x', uppercase with 'X', digits with '0'", + "transformer": "cast(regexp_replace(regexp_replace(regexp_replace({col},'([A-Z])', 'X'),'([a-z])','x'),'([0-9])','0') as {type})", + "dataMaskOptions": { + } + }, + { + "itemId": 2, + "name": "MASK_SHOW_LAST_4", + "label": "Partial mask: show last 4", + "description": "Show last 4 characters; replace rest with 'X'", + "transformer": "cast(regexp_replace({col}, '(.*)(.{4}$)', x -> regexp_replace(x[1], '.', 'X') || x[2]) as {type})" + }, + { + "itemId": 3, + "name": "MASK_SHOW_FIRST_4", + "label": "Partial mask: show first 4", + "description": "Show first 4 characters; replace rest with 'x'", + "transformer": "cast(regexp_replace({col}, '(^.{4})(.*)', x -> x[1] || regexp_replace(x[2], '.', 'X')) as {type})" + }, + { + "itemId": 4, + "name": "MASK_HASH", + "label": "Hash", + "description": "Hash the value of a varchar with sha256", + "transformer": "cast(to_hex(sha256(to_utf8({col}))) as {type})" + }, + { + "itemId": 5, + "name": "MASK_NULL", + "label": "Nullify", + "description": "Replace with NULL" + }, + { + "itemId": 6, + "name": "MASK_NONE", + "label": "Unmasked (retain original value)", + "description": "No masking" + }, + { + "itemId": 12, + "name": "MASK_DATE_SHOW_YEAR", + "label": "Date: show only year", + "description": "Date: show only year", + "transformer": "date_trunc('year', {col})" + }, + { + "itemId": 13, + "name": "CUSTOM", + "label": "Custom", + "description": "Custom" + } + ] + }, + "rowFilterDef": { + "accessTypes": [ + { + "name": "select" + } + ], + "resources": [ + { + "name": "catalog", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + }, + { + "name": "schema", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + }, + { + "name": "table", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + } + ] + } + +} From 3cc15a64e7724c88585c6d737cae74f6c001d2ea Mon Sep 17 00:00:00 2001 From: Varun Rao Bhamidimarri Date: Thu, 14 Jul 2022 07:21:20 -0500 Subject: [PATCH 5/6] Updates for the new Amazon EMR Trino Plugin --- .../cloudformation/emr-template.template | 19 + .../lambda-amilookup-win.template | 2 + .../cloudformation/ranger-server.template | 2 +- .../cloudformation/step1_vpc-ec2-ad.template | 1 + .../step2_ranger-rds-emr.template | 8 +- .../code/amilookup-win/amilookup-win.js | 3 +- aws_emr_blog_v3/code/launch-cluster/cremr.py | 234 +++----- aws_emr_blog_v3/inputdata/processSalesData.py | 33 ++ .../ranger-trino-policy-analyst1.json | 100 ++++ ...nger-trino-policy-hive-catalog-access.json | 75 +++ ...rino-policy-hive-catalog-table-access.json | 92 ++++ .../ranger-trino-policy-impersonate-user.json | 63 +++ .../inputdata/ranger-trino-repo.json | 13 + .../inputdata/ranger-trino-s3-policy.json | 61 +++ .../ranger-users/ranger-hue-user.json | 15 + .../ranger-users/ranger-trino-user.json | 15 + .../inputdata/redshift-queries.sql | 6 + aws_emr_blog_v3/inputdata/redshift.properties | 7 +- .../ranger-servicedef-amazon-emr-spark.json | 225 ++++---- .../2.0.0/ranger-servicedef-trino.json | 516 ++++++++++++++++++ aws_emr_blog_v3/inputdata/spark-notebook.py | 26 + aws_emr_blog_v3/inputdata/trino-queries.sql | 16 + aws_emr_blog_v3/scripts/download-scripts.sh | 1 + .../scripts/emr-steps/createHiveTables.sh | 24 + .../emr-steps/createdExtendedHiveTables.sh | 55 ++ .../emr-steps/presto-cli-kerberos_fix.sh | 48 ++ .../emr-steps/trino-update-user-mapping.sh | 18 + .../scripts/emr-tls/create-tls-certs.sh | 2 +- .../scripts/install-ranger-admin-server.sh | 11 + .../remove-yum-package-name-validator.sh | 1 + .../scripts/setup-trino-redshift-connector.sh | 4 + 31 files changed, 1423 insertions(+), 273 deletions(-) create mode 100644 aws_emr_blog_v3/inputdata/processSalesData.py create mode 100644 aws_emr_blog_v3/inputdata/ranger-trino-policy-analyst1.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-access.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-table-access.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-trino-policy-impersonate-user.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-trino-repo.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-trino-s3-policy.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-users/ranger-hue-user.json create mode 100644 aws_emr_blog_v3/inputdata/ranger-users/ranger-trino-user.json create mode 100644 aws_emr_blog_v3/inputdata/redshift-queries.sql create mode 100644 aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-trino.json create mode 100644 aws_emr_blog_v3/inputdata/spark-notebook.py create mode 100644 aws_emr_blog_v3/inputdata/trino-queries.sql create mode 100644 aws_emr_blog_v3/scripts/emr-steps/createdExtendedHiveTables.sh create mode 100644 aws_emr_blog_v3/scripts/emr-steps/presto-cli-kerberos_fix.sh create mode 100644 aws_emr_blog_v3/scripts/emr-steps/trino-update-user-mapping.sh create mode 100644 aws_emr_blog_v3/scripts/remove-yum-package-name-validator.sh create mode 100644 aws_emr_blog_v3/scripts/setup-trino-redshift-connector.sh diff --git a/aws_emr_blog_v3/cloudformation/emr-template.template b/aws_emr_blog_v3/cloudformation/emr-template.template index 5dcfe59..0a67fe0 100644 --- a/aws_emr_blog_v3/cloudformation/emr-template.template +++ b/aws_emr_blog_v3/cloudformation/emr-template.template @@ -245,6 +245,7 @@ Parameters: - emr-5.32.0 - emr-6.3.0 - emr-6.4.0 + - emr-6.7.0 Description: Release label for the EMR cluster AppsEMR: Description: 'Comma separated list of applications to install on the cluster e.g., ' @@ -252,6 +253,7 @@ Parameters: Default: Hadoop, Spark, Hive, Livy, Hue AllowedValues: - "Hadoop, Spark, Hive, Livy, Hue" + - "Hadoop, Spark, Hive, Livy, Hue, Trino" - "Hadoop, Spark, Hive, Livy" EnableKerberos: Description: Enable Kerberos on the Cluster. This is Required for Ranger EMR support @@ -342,6 +344,11 @@ Parameters: Default: true Type: String AllowedValues: [true, false] + EnableSparkDDLAndIceberg: + Description: Installs Spark DDL's and Iceberg configuration + Default: false + Type: String + AllowedValues: [ true, false ] RangerAgentKeySecretName: Description: Name of Ranger Agent Cert Secrets mgr resource Type: String @@ -534,10 +541,20 @@ Resources: - App: "EMRFS-S3" ClientSecretARN: !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:', !Ref RangerAgentKeySecretName]] PolicyRepositoryName: "amazonemrs3" + - App: "Trino" + ClientSecretARN: !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:', !Ref RangerAgentKeySecretName]] + PolicyRepositoryName: "amazonemrtrino" AuditConfiguration: Destinations: AmazonCloudWatchLogs: CloudWatchLogGroup: !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:${RangerCloudWatchLogGroupName}' + EncryptionConfiguration: + EnableInTransitEncryption: true + EnableAtRestEncryption: false + InTransitEncryptionConfiguration: + TLSCertificateConfiguration: + CertificateProviderType: PEM + S3Object: !Join ['', ["s3://", !Ref S3ArtifactBucket, "/", !Ref S3ArtifactKey, "/", !Ref ProjectVersion, "/emr-tls/", "emr-certs-certs.zip"]] LaunchEMRClusterFunction: Type: AWS::Lambda::Function DependsOn: LambdaExecutionRole @@ -604,9 +621,11 @@ Resources: DBHostName: !Ref DBHostName DBUserName: !Ref DBUserName DBRootPassword: !Ref DBRootPassword + ClientSecretARN: !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:', !Ref RangerAgentKeySecretName]] CertLocationPath: !Join ['', ["s3://", !Ref S3ArtifactBucket, "/", !Ref S3ArtifactKey, "/", !Ref ProjectVersion]] RangerAdminPassword: !Ref RangerAdminPassword DefaultDomain: !If [ USEastRegion, 'EC2.INTERNAL', 'COMPUTE.INTERNAL' ] + EnableSparkDDLAndIceberg: !Ref EnableSparkDDLAndIceberg emrCreateWaitHandle: Type: AWS::CloudFormation::WaitConditionHandle diff --git a/aws_emr_blog_v3/cloudformation/lambda-amilookup-win.template b/aws_emr_blog_v3/cloudformation/lambda-amilookup-win.template index 7efd8fa..e761a5a 100644 --- a/aws_emr_blog_v3/cloudformation/lambda-amilookup-win.template +++ b/aws_emr_blog_v3/cloudformation/lambda-amilookup-win.template @@ -68,6 +68,7 @@ Parameters: - Windows Server 2012 RTM English 64-bit - Windows Server 2012 R2 English 64-bit - Windows Server 2016 Base English 64-bit + - Windows Server 2019 Base English 64-bit ConstraintDescription: Must be a valid Windows version. ModuleName: Description: The name of the JavaScript file @@ -87,6 +88,7 @@ Parameters: Type: String AllowedValues: - 3.0 + - beta Resources: # SampleInstance: # Type: AWS::EC2::Instance diff --git a/aws_emr_blog_v3/cloudformation/ranger-server.template b/aws_emr_blog_v3/cloudformation/ranger-server.template index 43e3b52..4869a6f 100644 --- a/aws_emr_blog_v3/cloudformation/ranger-server.template +++ b/aws_emr_blog_v3/cloudformation/ranger-server.template @@ -1 +1 @@ -Description: CloudFormation template to create Apache Ranger server with Solr Parameters: S3Bucket: Description: S3Bucket where artifacts are stored Type: String Default: aws-bigdata-blog S3Key: Description: S3Key of the Lambda code Type: String VPC: Description: VPC ID for creating the EMR cluster Type: AWS::EC2::VPC::Id Subnet: Description: Subnet ID for creating the EMR cluster Type: AWS::EC2::Subnet::Id ProjectVersion: Default: 3.0 Description: Project version Type: String AllowedValues: - 3.0 - beta KeyPairName: Description: Name of an existing EC2 KeyPair to enable SSH to the instances Type: AWS::EC2::KeyPair::KeyName DBHostName: Description: HostName of the database Type: String DBRootPassword: Description: Root password of database Type: String ADAdminUsername: Description: Admin username of AD Type: String ADAdminPassword: Description: Admin password for AD Type: String InstanceType: Description: Instance type of the Ranger Server Type: String Default: r5.2xlarge AllowedValues: - t1.micro - t2.nano - t2.micro - t2.small - t2.medium - t2.large - m1.small - m1.medium - m1.large - m1.xlarge - m2.xlarge - m2.2xlarge - m2.4xlarge - m3.medium - m3.large - m3.xlarge - m3.2xlarge - m4.large - m4.xlarge - m4.2xlarge - m4.4xlarge - m4.10xlarge - c1.medium - c1.xlarge - c3.large - c3.xlarge - c3.2xlarge - c3.4xlarge - c3.8xlarge - c4.large - c4.xlarge - c4.2xlarge - c4.4xlarge - c4.8xlarge - g2.2xlarge - g2.8xlarge - r3.large - r3.xlarge - r3.2xlarge - r3.4xlarge - r3.8xlarge - i2.xlarge - i2.2xlarge - i2.4xlarge - i2.8xlarge - d2.xlarge - d2.2xlarge - d2.4xlarge - d2.8xlarge - hi1.4xlarge - hs1.8xlarge - cr1.8xlarge - cc2.8xlarge - cg1.4xlarge - r5.4xlarge - r5.2xlarge - r5.xlarge ConstraintDescription: must be a valid EC2 instance type. LDAPHostPrivateIP: Description: IP Address of the SimpleAD server Type: String DomainDNSName: AllowedPattern: '[a-zA-Z0-9\-]+\..+' Default: awsemr.com Description: The Active Directory domain that you want to establish the cross-realm trust with e.g., awsemr.com MaxLength: '25' MinLength: '3' Type: String LDAPSearchBase: Description: Base DN SimpleAD server Type: String Default: dc=awsemr,dc=com LDAPBindUserName: Description: BindUser SimpleAD server Type: String Default: binduser AllowedValues: - binduser LDAPBindPassword: Description: BindPassword SimpleAD server Type: String Default: Bind@User123 rangerVersion: Description: RangerVersion Type: String Default: '2.0' AllowedValues: - '2.0' AttachAdditionalSourcePrefixToSG: Description: Attaches additional sources to EMR Master SG Default: false Type: String AllowedValues: [true, false] CIDRAccessToPrivateSubnetResources: Description: IP address range (in CIDR notation) of the client that will be allowed to connect to the cluster using SSH e.g., 203.0.113.5/32 AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) Type: String MinLength: '9' MaxLength: '18' Default: 10.0.0.0/16 ConstraintDescription: must be a valid CIDR range of the form x.x.x.x/x AdditionalSourcePrefixToSG: Description: Sources that are allowd to access the Ranger Instance. Should be a source prefix e.g., pl-xxx Type: String Mappings: AWSInstanceType2Arch: t1.micro: Arch: PV64 t2.nano: Arch: HVM64 t2.micro: Arch: HVM64 t2.small: Arch: HVM64 t2.medium: Arch: HVM64 t2.large: Arch: HVM64 m3.medium: Arch: HVM64 m3.large: Arch: HVM64 m3.xlarge: Arch: HVM64 m3.2xlarge: Arch: HVM64 m4.large: Arch: HVM64 m4.xlarge: Arch: HVM64 m4.2xlarge: Arch: HVM64 m4.4xlarge: Arch: HVM64 m4.10xlarge: Arch: HVM64 c1.medium: Arch: PV64 c1.xlarge: Arch: PV64 c3.large: Arch: HVM64 c3.xlarge: Arch: HVM64 c3.2xlarge: Arch: HVM64 c3.4xlarge: Arch: HVM64 c3.8xlarge: Arch: HVM64 c4.large: Arch: HVM64 c4.xlarge: Arch: HVM64 c4.2xlarge: Arch: HVM64 c4.4xlarge: Arch: HVM64 c4.8xlarge: Arch: HVM64 r3.large: Arch: HVM64 r3.xlarge: Arch: HVM64 r3.2xlarge: Arch: HVM64 r3.4xlarge: Arch: HVM64 r3.8xlarge: Arch: HVM64 i2.xlarge: Arch: HVM64 i2.2xlarge: Arch: HVM64 i2.4xlarge: Arch: HVM64 i2.8xlarge: Arch: HVM64 d2.xlarge: Arch: HVM64 d2.2xlarge: Arch: HVM64 d2.4xlarge: Arch: HVM64 d2.8xlarge: Arch: HVM64 hi1.4xlarge: Arch: HVM64 hs1.8xlarge: Arch: HVM64 cr1.8xlarge: Arch: HVM64 cc2.8xlarge: Arch: HVM64 r5.xlarge: Arch: HVM64 r5.2xlarge: Arch: HVM64 r5.4xlarge: Arch: HVM64 AWSRegionArch2AMI: us-east-1: PV64: ami-0023040df18933030 HVM64: ami-0915e09cc7ceee3ab us-east-2: PV64: ami-0ea2bc03f34d1ada4 HVM64: ami-097834fcb3081f51a us-west-2: PV64: ami-00cb981adfcebb519 HVM64: ami-01f08ef3e76b957e5 us-west-1: PV64: ami-0027eed75be6f3bf4 HVM64: ami-014743cb7690ea737 eu-west-1: PV64: ami-00b6370b096f24de2 HVM64: ami-00890f614e48ce866 eu-central-1: PV64: ami-0001160eb97d88825 HVM64: ami-03ab4e8f1d88ce614 eu-north-1: PV64: ami-0001160eb97d88825 HVM64: ami-0031cb7c28e14ea6f ap-northeast-1: PV64: ami-0095079896fca4cca HVM64: ami-0318ecd6d05daa212 ap-northeast-2: PV64: NOT_SUPPORTED HVM64: ami-09391a0ad9f9243b6 ap-southeast-1: PV64: ami-021f73ba029345fb1 HVM64: ami-0dff4318d85149d5d ap-southeast-2: PV64: ami-01c6bf0aeb3c63052 HVM64: ami-050e1ec030abb8dde sa-east-1: PV64: ami-0015527da78932f76 HVM64: ami-03e1e4abf50e14ded cn-north-1: PV64: ami-7f84361b HVM64: ami-021321e9bc16d5186 Conditions: AttachAdditionalSourcePrefixToSG: !Equals [true, !Ref AttachAdditionalSourcePrefixToSG] Resources: ManagedInstanceRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - ssm.amazonaws.com - ec2.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM - arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess Path: "/" AllowSecretsRetrievalPolivy: Type: 'AWS::IAM::Policy' Properties: PolicyName: AllowSecretsRetrieval PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - secretsmanager:GetSecretValue - secretsmanager:ListSecrets - secretsmanager:DescribeSecret Resource: - !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:emr/ranger*']] Roles: - !Ref ManagedInstanceRole ManagedInstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Path: "/" Roles: - !Ref ManagedInstanceRole sgRangerAdminServer: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Open Up all Ranger Server ports VpcId: !Ref 'VPC' SecurityGroupIngress: - IpProtocol: tcp FromPort: '6080' ToPort: '6080' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '6182' ToPort: '6182' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8983' ToPort: '8983' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8984' ToPort: '8984' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '22' ToPort: '22' CidrIp: !Ref CIDRAccessToPrivateSubnetResources sgRangerAdminServerWithAdditions: Type: AWS::EC2::SecurityGroup Condition: AttachAdditionalSourcePrefixToSG Properties: GroupDescription: Open Up all Ranger Server ports VpcId: !Ref 'VPC' SecurityGroupIngress: - IpProtocol: tcp FromPort: '6080' ToPort: '6080' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '6080' ToPort: '6080' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '6182' ToPort: '6182' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '6182' ToPort: '6182' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '8983' ToPort: '8983' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8983' ToPort: '8983' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '8984' ToPort: '8984' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8984' ToPort: '8984' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '22' ToPort: '22' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '22' ToPort: '22' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG RangerLogGroup: Type: "AWS::Logs::LogGroup" Properties: RetentionInDays: 14 #LogGroupName: rangerlogs-${AWS::StackName} myEC2: Type: AWS::EC2::Instance Metadata: AWS::CloudFormation::Init: configSets: ascending: - CloudwatchLogs - RangerServer CloudwatchLogs: files: '/etc/awslogs/awslogs.conf': content: !Sub | [general] # Path to the CloudWatch Logs agent's state file. The agent uses this file to maintain # client side state across its executions. state_file = /var/lib/awslogs/agent-state [ranger_admin_setup_log] datetime_format = %Y-%m-%d %H:%M:%S,%f file = /tmp/create-ranger-server-output.log buffer_duration = 500 log_stream_name = adminsetuplog-{instance_id} initial_position = start_of_file log_group_name = ${RangerLogGroup} [ranger_admin_log] datetime_format = %Y-%m-%d %H:%M:%S,%f file = /usr/lib/ranger/logs/admin/logs/ranger_admin* buffer_duration = 500 log_stream_name = adminlog-{instance_id} initial_position = start_of_file log_group_name = ${RangerLogGroup} commands: startawslogs: command: !Sub | sudo yum update -y sudo yum install -y awslogs sudo service awslogs restart sudo chkconfig awslogs on RangerServer: commands: installrangerserver: command: !Join ['', ['bash /tmp/install-ranger-admin-server.sh ', !Ref 'LDAPHostPrivateIP', ' ', !Ref 'LDAPSearchBase', ' ', !Ref 'LDAPBindUserName', '@',!Ref 'DomainDNSName' , ' ', !Ref 'LDAPBindPassword', ' ', !Ref 'rangerVersion', ' ', !Join ['', ['s3://', !Ref S3Bucket, '/', !Ref S3Key]], ' ', !Ref 'ProjectVersion', ' ', !Ref 'DBHostName', ' ', !Ref 'DBRootPassword', ' ', !Ref "AWS::Region", ' '," >\ \ create-ranger-server-output.log \n"]] Properties: SubnetId: !Ref 'Subnet' IamInstanceProfile: !Ref ManagedInstanceProfile SecurityGroupIds: - !If [ AttachAdditionalSourcePrefixToSG, !Ref sgRangerAdminServerWithAdditions, !Ref sgRangerAdminServer ] ImageId: !FindInMap [AWSRegionArch2AMI, !Ref 'AWS::Region', !FindInMap [AWSInstanceType2Arch, !Ref 'InstanceType', Arch]] InstanceType: !Ref 'InstanceType' KeyName: !Ref 'KeyPairName' BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: VolumeType: gp2 VolumeSize: '100' DeleteOnTermination: 'true' UserData: !Base64 Fn::Join: - '' - - '#!/bin/bash ' - 'cd /tmp ' - 'aws s3 cp ' - !Join ['', ['s3://', !Ref S3Bucket, '/', !Ref S3Key, '/', !Ref ProjectVersion]] - '/scripts/install-ranger-admin-server.sh . ' - 'yum update aws-cfn-bootstrap ' - '# Install the files and packages from the metadata ' - '/opt/aws/bin/cfn-init ' - ' --stack ' - !Ref 'AWS::StackName' - ' --resource myEC2 ' - ' --configsets ascending ' - ' --region ' - !Ref 'AWS::Region' - ' ' Tags: - Key: Name Value: RangerServer Outputs: IPAddress: Description: IP address of the Ranger server Value: !GetAtt [myEC2, PrivateIp] PrivateDNS: Description: IP address of the Ranger server Value: !GetAtt [myEC2, PrivateDnsName] \ No newline at end of file +Description: CloudFormation template to create Apache Ranger server with Solr Parameters: S3Bucket: Description: S3Bucket where artifacts are stored Type: String Default: aws-bigdata-blog S3Key: Description: S3Key of the Lambda code Type: String S3ArtifactBucket: Description: S3Bucket where artifacts are stored Type: String Default: aws-bigdata-blog AllowedValues: ["aws-bigdata-blog"] S3ArtifactKey: Description: S3Key of the Lambda code Type: String Default: artifacts/aws-blog-emr-ranger AllowedValues: ["artifacts/aws-blog-emr-ranger"] VPC: Description: VPC ID for creating the EMR cluster Type: AWS::EC2::VPC::Id Subnet: Description: Subnet ID for creating the EMR cluster Type: AWS::EC2::Subnet::Id ProjectVersion: Default: 3.0 Description: Project version Type: String AllowedValues: - 3.0 - beta - test KeyPairName: Description: Name of an existing EC2 KeyPair to enable SSH to the instances Type: AWS::EC2::KeyPair::KeyName DBHostName: Description: HostName of the database Type: String DBRootPassword: Description: Root password of database NoEcho: 'true' Type: String ADAdminUsername: Description: Admin username of AD Type: String ADAdminPassword: Description: Admin password for AD NoEcho: 'true' Type: String InstanceType: Description: Instance type of the Ranger Server Type: String Default: r5.2xlarge AllowedValues: - t1.micro - t2.nano - t2.micro - t2.small - t2.medium - t2.large - m1.small - m1.medium - m1.large - m1.xlarge - m2.xlarge - m2.2xlarge - m2.4xlarge - m3.medium - m3.large - m3.xlarge - m3.2xlarge - m4.large - m4.xlarge - m4.2xlarge - m4.4xlarge - m4.10xlarge - c1.medium - c1.xlarge - c3.large - c3.xlarge - c3.2xlarge - c3.4xlarge - c3.8xlarge - c4.large - c4.xlarge - c4.2xlarge - c4.4xlarge - c4.8xlarge - g2.2xlarge - g2.8xlarge - r3.large - r3.xlarge - r3.2xlarge - r3.4xlarge - r3.8xlarge - i2.xlarge - i2.2xlarge - i2.4xlarge - i2.8xlarge - d2.xlarge - d2.2xlarge - d2.4xlarge - d2.8xlarge - hi1.4xlarge - hs1.8xlarge - cr1.8xlarge - cc2.8xlarge - cg1.4xlarge - r5.4xlarge - r5.2xlarge - r5.xlarge ConstraintDescription: must be a valid EC2 instance type. LDAPHostPrivateIP: Description: IP Address of the SimpleAD server Type: String DomainDNSName: AllowedPattern: '[a-zA-Z0-9\-]+\..+' Default: awsemr.com Description: The Active Directory domain that you want to establish the cross-realm trust with e.g., awsemr.com MaxLength: '25' MinLength: '3' Type: String LDAPSearchBase: Description: Base DN SimpleAD server Type: String Default: dc=awsemr,dc=com LDAPBindUserName: Description: BindUser SimpleAD server Type: String Default: binduser AllowedValues: - binduser LDAPBindPassword: Description: BindPassword SimpleAD server Type: String NoEcho: 'true' Default: Bind@User123 rangerVersion: Description: RangerVersion Type: String Default: '2.0' AllowedValues: - '2.0' AttachAdditionalSourcePrefixToSG: Description: Attaches additional sources to EMR Master SG Default: false Type: String AllowedValues: [true, false] CIDRAccessToPrivateSubnetResources: Description: IP address range (in CIDR notation) of the client that will be allowed to connect to the cluster using SSH e.g., 203.0.113.5/32 AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2}) Type: String MinLength: '9' MaxLength: '18' Default: 10.0.0.0/16 ConstraintDescription: must be a valid CIDR range of the form x.x.x.x/x AdditionalSourcePrefixToSG: Description: Sources that are allowd to access the Ranger Instance. Should be a source prefix e.g., pl-xxx Type: String Mappings: AWSInstanceType2Arch: t1.micro: Arch: PV64 t2.nano: Arch: HVM64 t2.micro: Arch: HVM64 t2.small: Arch: HVM64 t2.medium: Arch: HVM64 t2.large: Arch: HVM64 m3.medium: Arch: HVM64 m3.large: Arch: HVM64 m3.xlarge: Arch: HVM64 m3.2xlarge: Arch: HVM64 m4.large: Arch: HVM64 m4.xlarge: Arch: HVM64 m4.2xlarge: Arch: HVM64 m4.4xlarge: Arch: HVM64 m4.10xlarge: Arch: HVM64 c1.medium: Arch: PV64 c1.xlarge: Arch: PV64 c3.large: Arch: HVM64 c3.xlarge: Arch: HVM64 c3.2xlarge: Arch: HVM64 c3.4xlarge: Arch: HVM64 c3.8xlarge: Arch: HVM64 c4.large: Arch: HVM64 c4.xlarge: Arch: HVM64 c4.2xlarge: Arch: HVM64 c4.4xlarge: Arch: HVM64 c4.8xlarge: Arch: HVM64 r3.large: Arch: HVM64 r3.xlarge: Arch: HVM64 r3.2xlarge: Arch: HVM64 r3.4xlarge: Arch: HVM64 r3.8xlarge: Arch: HVM64 i2.xlarge: Arch: HVM64 i2.2xlarge: Arch: HVM64 i2.4xlarge: Arch: HVM64 i2.8xlarge: Arch: HVM64 d2.xlarge: Arch: HVM64 d2.2xlarge: Arch: HVM64 d2.4xlarge: Arch: HVM64 d2.8xlarge: Arch: HVM64 hi1.4xlarge: Arch: HVM64 hs1.8xlarge: Arch: HVM64 cr1.8xlarge: Arch: HVM64 cc2.8xlarge: Arch: HVM64 r5.xlarge: Arch: HVM64 r5.2xlarge: Arch: HVM64 r5.4xlarge: Arch: HVM64 AWSRegionArch2AMI: us-east-1: PV64: ami-0023040df18933030 HVM64: ami-0915e09cc7ceee3ab us-east-2: PV64: ami-0ea2bc03f34d1ada4 HVM64: ami-097834fcb3081f51a us-west-2: PV64: ami-00cb981adfcebb519 HVM64: ami-01f08ef3e76b957e5 us-west-1: PV64: ami-0027eed75be6f3bf4 HVM64: ami-014743cb7690ea737 eu-west-1: PV64: ami-00b6370b096f24de2 HVM64: ami-00890f614e48ce866 eu-central-1: PV64: ami-0001160eb97d88825 HVM64: ami-03ab4e8f1d88ce614 eu-north-1: PV64: ami-0001160eb97d88825 HVM64: ami-0031cb7c28e14ea6f ap-northeast-1: PV64: ami-0095079896fca4cca HVM64: ami-0318ecd6d05daa212 ap-northeast-2: PV64: NOT_SUPPORTED HVM64: ami-09391a0ad9f9243b6 ap-southeast-1: PV64: ami-021f73ba029345fb1 HVM64: ami-0dff4318d85149d5d ap-southeast-2: PV64: ami-01c6bf0aeb3c63052 HVM64: ami-050e1ec030abb8dde sa-east-1: PV64: ami-0015527da78932f76 HVM64: ami-03e1e4abf50e14ded cn-north-1: PV64: ami-7f84361b HVM64: ami-021321e9bc16d5186 Conditions: AttachAdditionalSourcePrefixToSG: !Equals [true, !Ref AttachAdditionalSourcePrefixToSG] Resources: ManagedInstanceRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - ssm.amazonaws.com - ec2.amazonaws.com Action: sts:AssumeRole ManagedPolicyArns: - arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM - arn:aws:iam::aws:policy/AWSCloudFormationReadOnlyAccess - arn:aws:iam::aws:policy/CloudWatchLogsFullAccess Path: "/" AllowSecretsRetrievalPolivy: Type: 'AWS::IAM::Policy' Properties: PolicyName: AllowSecretsRetrieval PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - secretsmanager:GetSecretValue - secretsmanager:ListSecrets - secretsmanager:DescribeSecret Resource: - !Join ['', ['arn:aws:secretsmanager:', !Ref "AWS::Region", ':', !Ref "AWS::AccountId", ':secret:emr/ranger*']] Roles: - !Ref ManagedInstanceRole ManagedInstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Path: "/" Roles: - !Ref ManagedInstanceRole sgRangerAdminServer: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Open Up all Ranger Server ports VpcId: !Ref 'VPC' SecurityGroupIngress: - IpProtocol: tcp FromPort: '6080' ToPort: '6080' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '6182' ToPort: '6182' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8983' ToPort: '8983' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8984' ToPort: '8984' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '22' ToPort: '22' CidrIp: !Ref CIDRAccessToPrivateSubnetResources sgRangerAdminServerWithAdditions: Type: AWS::EC2::SecurityGroup Condition: AttachAdditionalSourcePrefixToSG Properties: GroupDescription: Open Up all Ranger Server ports VpcId: !Ref 'VPC' SecurityGroupIngress: - IpProtocol: tcp FromPort: '6080' ToPort: '6080' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '6080' ToPort: '6080' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '6182' ToPort: '6182' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '6182' ToPort: '6182' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '8983' ToPort: '8983' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8983' ToPort: '8983' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '8984' ToPort: '8984' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '8984' ToPort: '8984' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG - IpProtocol: tcp FromPort: '22' ToPort: '22' CidrIp: !Ref CIDRAccessToPrivateSubnetResources - IpProtocol: tcp FromPort: '22' ToPort: '22' SourcePrefixListId: !Ref AdditionalSourcePrefixToSG RangerLogGroup: Type: "AWS::Logs::LogGroup" Properties: RetentionInDays: 14 #LogGroupName: rangerlogs-${AWS::StackName} myEC2: Type: AWS::EC2::Instance Metadata: AWS::CloudFormation::Init: configSets: ascending: - CloudwatchLogs - RangerServer CloudwatchLogs: files: '/etc/awslogs/awslogs.conf': content: !Sub | [general] # Path to the CloudWatch Logs agent's state file. The agent uses this file to maintain # client side state across its executions. state_file = /var/lib/awslogs/agent-state [ranger_admin_setup_log] datetime_format = %Y-%m-%d %H:%M:%S,%f file = /tmp/create-ranger-server-output.log buffer_duration = 500 log_stream_name = adminsetuplog-{instance_id} initial_position = start_of_file log_group_name = ${RangerLogGroup} [ranger_admin_log] datetime_format = %Y-%m-%d %H:%M:%S,%f file = /usr/lib/ranger/logs/admin/logs/ranger_admin* buffer_duration = 500 log_stream_name = adminlog-{instance_id} initial_position = start_of_file log_group_name = ${RangerLogGroup} commands: startawslogs: command: !Sub | sudo yum update -y sudo yum install -y awslogs sudo service awslogs restart sudo chkconfig awslogs on RangerServer: commands: installrangerserver: command: !Join ['', ['bash /tmp/install-ranger-admin-server.sh ', !Ref 'LDAPHostPrivateIP', ' ', !Ref 'LDAPSearchBase', ' ', !Ref 'LDAPBindUserName', '@',!Ref 'DomainDNSName' , ' ', !Ref 'LDAPBindPassword', ' ', !Ref 'rangerVersion', ' ', !Join ['', ['s3://', !Ref S3ArtifactBucket, '/', !Ref S3ArtifactKey]], ' ', !Ref 'ProjectVersion', ' ', !Ref 'DBHostName', ' ', !Ref 'DBRootPassword', ' ', !Ref "AWS::Region", ' '," >\ \ create-ranger-server-output.log \n"]] Properties: SubnetId: !Ref 'Subnet' IamInstanceProfile: !Ref ManagedInstanceProfile SecurityGroupIds: - !If [ AttachAdditionalSourcePrefixToSG, !Ref sgRangerAdminServerWithAdditions, !Ref sgRangerAdminServer ] ImageId: !FindInMap [AWSRegionArch2AMI, !Ref 'AWS::Region', !FindInMap [AWSInstanceType2Arch, !Ref 'InstanceType', Arch]] InstanceType: !Ref 'InstanceType' KeyName: !Ref 'KeyPairName' BlockDeviceMappings: - DeviceName: /dev/xvda Ebs: VolumeType: gp2 VolumeSize: '100' DeleteOnTermination: 'true' UserData: !Base64 Fn::Join: - '' - - '#!/bin/bash ' - 'cd /tmp ' - 'aws s3 cp ' - !Join ['', ['s3://', !Ref S3Bucket, '/', !Ref S3Key, '/', !Ref ProjectVersion]] - '/scripts/install-ranger-admin-server.sh . ' - 'yum update aws-cfn-bootstrap ' - '# Install the files and packages from the metadata ' - '/opt/aws/bin/cfn-init ' - ' --stack ' - !Ref 'AWS::StackName' - ' --resource myEC2 ' - ' --configsets ascending ' - ' --region ' - !Ref 'AWS::Region' - ' ' Tags: - Key: Name Value: RangerServer Outputs: IPAddress: Description: IP address of the Ranger server Value: !GetAtt [myEC2, PrivateIp] PrivateDNS: Description: IP address of the Ranger server Value: !GetAtt [myEC2, PrivateDnsName] \ No newline at end of file diff --git a/aws_emr_blog_v3/cloudformation/step1_vpc-ec2-ad.template b/aws_emr_blog_v3/cloudformation/step1_vpc-ec2-ad.template index 130d50f..8397dd1 100644 --- a/aws_emr_blog_v3/cloudformation/step1_vpc-ec2-ad.template +++ b/aws_emr_blog_v3/cloudformation/step1_vpc-ec2-ad.template @@ -179,6 +179,7 @@ Resources: Properties: TemplateURL: !Join ['', ['https://s3.amazonaws.com/', !Ref 'S3ArtifactBucket', '/', !Ref 'S3ArtifactKey', '/', !Ref 'ProjectVersion', '/cloudformation/', 'lambda-amilookup-win.template']] Parameters: + ProjectVersion: !Ref 'ProjectVersion' S3Bucket: !Ref 'S3Bucket' S3Key: !Ref 'S3Key' STEP1VPC: diff --git a/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template b/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template index 8aa618c..5306d48 100644 --- a/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template +++ b/aws_emr_blog_v3/cloudformation/step2_ranger-rds-emr.template @@ -216,7 +216,10 @@ Parameters: Description: 'Comma separated list of applications to install on the cluster e.g., ' Type: String Default: Hadoop, Spark, Hive, Livy, Hue - AllowedValues: ["Hadoop, Spark, Hive, Livy, Hue"] + AllowedValues: + - "Hadoop, Spark, Hive, Livy, Hue" + - "Hadoop, Spark, Hive, Livy, Hue, Trino" + - "Hadoop, Spark, Hive, Livy" EnableKerberos: Description: Enable Kerberos on the Cluster. This is Required for Ranger EMR support Default: true @@ -229,6 +232,7 @@ Parameters: - emr-5.32.0 - emr-6.3.0 - emr-6.4.0 + - emr-6.7.0 S3Bucket: Description: S3Bucket for the code [update this is you want to run this stack in a region other than US-EAST-1] Type: String @@ -346,6 +350,7 @@ Resources: Parameters: S3Bucket: !Ref 'S3ArtifactBucket' S3Key: !Ref 'S3ArtifactKey' + ProjectVersion: !Ref 'ProjectVersion' VPC: !Ref VPC Subnet: !If [ InstallEMRRangerinPublicSubnet, !Ref PublicSubnet1AID, !Ref PrivateSubnet1AID ] DBHostName: !GetAtt 'RDSDatabase.Outputs.RDSInstanceAddress' @@ -369,6 +374,7 @@ Resources: Properties: TemplateURL: !Join ['', ['https://s3.amazonaws.com/', !Ref 'S3ArtifactBucket', '/', !Ref 'S3ArtifactKey', '/', !Ref 'ProjectVersion', '/cloudformation/', 'emr-template.template']] Parameters: + ProjectVersion: !Ref 'ProjectVersion' S3Bucket: !Ref 'S3Bucket' S3Key: !Ref 'S3Key' S3ArtifactBucket: !Ref 'S3ArtifactBucket' diff --git a/aws_emr_blog_v3/code/amilookup-win/amilookup-win.js b/aws_emr_blog_v3/code/amilookup-win/amilookup-win.js index 57c4883..9ab7668 100644 --- a/aws_emr_blog_v3/code/amilookup-win/amilookup-win.js +++ b/aws_emr_blog_v3/code/amilookup-win/amilookup-win.js @@ -12,7 +12,8 @@ var osNameToPattern = { "Windows Server 2008 R2 English 64-bit": "Windows_Server-2008-R2_SP1-English-64Bit-Base-*", "Windows Server 2012 RTM English 64-bit": "Windows_Server-2012-RTM-English-64Bit-Base-*", "Windows Server 2012 R2 English 64-bit": "Windows_Server-2012-R2_RTM-English-64Bit-Base-*", - "Windows Server 2016 Base English 64-bit": "Windows_Server-2016-English-Full-Base-*" + "Windows Server 2016 Base English 64-bit": "Windows_Server-2016-English-Full-Base-*", + "Windows Server 2019 Base English 64-bit": "Windows_Server-2019-English-Full-Base-*" }; var aws = require("aws-sdk"); diff --git a/aws_emr_blog_v3/code/launch-cluster/cremr.py b/aws_emr_blog_v3/code/launch-cluster/cremr.py index 4050057..d31a7ad 100644 --- a/aws_emr_blog_v3/code/launch-cluster/cremr.py +++ b/aws_emr_blog_v3/code/launch-cluster/cremr.py @@ -25,59 +25,41 @@ def create(event, context): apps = event["ResourceProperties"]["AppsEMR"] emrReleaseLabel = event["ResourceProperties"]["emrReleaseLabel"] - prestoEngineRequested = "Presto" - isPrestoAppRequested = False - isSparkAppRequested = False + isTrinoAppRequested = True formatted_applist = apps.split(",") applist = [] for app in formatted_applist: applist.append({"Name": app.strip()}) - if app.strip() in ["Presto", "PrestoSQL"]: - isPrestoAppRequested = True - prestoEngineRequested = app.strip() - if app.strip() in ["Spark"]: - isSparkAppRequested = True - + if app.strip() in ["Trino"]: + isTrinoAppRequested = True try: - emrVersion = emrReleaseLabel.split("-")[1].split(".") + emrVersion = emrReleaseLabel.split("-")[1] + # emrMinorVersion = emrReleaseLabel.split("-")[1].split(".") client = boto3.client("emr", region_name=event["ResourceProperties"]["StackRegion"]) - scriptRunnerJar = "s3://"+event["ResourceProperties"]["StackRegion"]+".elasticmapreduce/libs/script-runner/script-runner.jar" + scriptRunnerJar = "s3://" + event["ResourceProperties"][ + "StackRegion"] + ".elasticmapreduce/libs/script-runner/script-runner.jar" cluster_name = "EMR-" + event["ResourceProperties"]["StackName"] cluster_parameters = {'Name': cluster_name, 'ReleaseLabel': emrReleaseLabel, 'LogUri': event["ResourceProperties"]["LogFolder"], 'AdditionalInfo': '{"clusterType":"development"}', + 'EbsRootVolumeSize': 100, 'BootstrapActions': [ - # { - # "Name": "Install packages", - # "ScriptBootstrapAction": { - # "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][ - # "S3Key"] + "/" + event["ResourceProperties"][ - # "ProjectVersion"] + "/scripts/install-required-packages.sh" - # } - # }, - { - "Name": "Download scripts", - "ScriptBootstrapAction": { - "Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][ - "S3Key"] + "/" + event["ResourceProperties"][ - "ProjectVersion"] + "/scripts/download-scripts.sh", - "Args": [ - "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][ - "S3ArtifactKey"] + "/" + event["ResourceProperties"][ - "ProjectVersion"] - ] - } - } - # , - # { - # "Name": "Setup HDFS home dir", - # "ScriptBootstrapAction": { - # "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][ - # "S3ArtifactKey"] + "/" + event["ResourceProperties"][ - # "ProjectVersion"] + "/scripts/create-hdfs-home-ba.sh" - # } - # } - ], + { + "Name": "Download scripts", + "ScriptBootstrapAction": { + "Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + + event["ResourceProperties"][ + "S3Key"] + "/" + event["ResourceProperties"][ + "ProjectVersion"] + "/scripts/download-scripts.sh", + "Args": [ + "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + + event["ResourceProperties"][ + "S3ArtifactKey"] + "/" + event["ResourceProperties"][ + "ProjectVersion"] + ] + } + } + ], 'Applications': applist, 'Steps': [ { @@ -91,17 +73,17 @@ def create(event, context): ] } }, - # { - # "Name": "CreateExtendedHiveTables", - # "ActionOnFailure": "CONTINUE", - # "HadoopJarStep": { - # "Jar": scriptRunnerJar, - # "Args": [ - # "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createdExtendedHiveTables.sh", - # event["ResourceProperties"]["StackRegion"] - # ] - # } - # }, + { + "Name": "CreateExtendedHiveTables", + "ActionOnFailure": "CONTINUE", + "HadoopJarStep": { + "Jar": scriptRunnerJar, + "Args": [ + "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createdExtendedHiveTables.sh", + event["ResourceProperties"]["StackRegion"] + ] + } + }, { "Name": "LoadHDFSData", "ActionOnFailure": "CONTINUE", @@ -113,25 +95,6 @@ def create(event, context): ] } }, - # { - # "Name": "InstallHiveHDFSRangerPlugin", - # "ActionOnFailure": "CONTINUE", - # "HadoopJarStep": { - # "Jar": scriptRunnerJar, - # "Args": [ - # "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hive-hdfs-ranger-plugin.sh", - # event["ResourceProperties"]["RangerHostname"], - # event["ResourceProperties"]["RangerVersion"], - # "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"]["S3ArtifactKey"], - # event["ResourceProperties"][ - # "ProjectVersion"], - # event["ResourceProperties"]["emrReleaseLabel"], - # event["ResourceProperties"]["RangerHttpProtocol"], - # event["ResourceProperties"]["InstallCloudWatchAgentForAudit"] - # ] - # } - # }, - { "Name": "InstallRangerServiceDef", "ActionOnFailure": "CONTINUE", @@ -267,7 +230,8 @@ def create(event, context): "Classification": "ldap", "Properties": { "base_dn": event["ResourceProperties"]["LDAPGroupSearchBase"], - "bind_dn": event["ResourceProperties"]["LDAPBindUserName"] + '@' + + "bind_dn": event["ResourceProperties"][ + "LDAPBindUserName"] + '@' + event["ResourceProperties"]["DomainDNSName"], "bind_password": event["ResourceProperties"][ "LDAPBindPassword"], @@ -320,16 +284,6 @@ def create(event, context): } ) - # if event["ResourceProperties"]["InstallCloudWatchAgentForAudit"] == "true": - # cluster_parameters['BootstrapActions'].append( - # { - # "Name": "Install cloudwatch agent", - # "ScriptBootstrapAction": { - # "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][ - # "S3ArtifactKey"] + "/" + event["ResourceProperties"][ - # "ProjectVersion"] + "/scripts/install-cloudwatch-agent.sh" - # } - # }) if event["ResourceProperties"]["EMRSecurityConfig"] != "false": cluster_parameters['SecurityConfiguration'] = event["ResourceProperties"]["EMRSecurityConfig"] cluster_parameters['KerberosAttributes'] = { @@ -340,20 +294,6 @@ def create(event, context): "ADDomainJoinPassword": event["ResourceProperties"]["ADDomainJoinPassword"] } - # if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true": - # cluster_parameters['Configurations'].append({ - # "Classification": "hive-site", - # "Properties": { - # "hive.server2.thrift.http.port": "10001", - # "hive.server2.thrift.http.path": "cliservice", - # "hive.server2.transport.mode": "binary", - # "hive.server2.allow.user.substitution": "true", - # "hive.server2.authentication.kerberos.principal": "hive/_HOST@"+event["ResourceProperties"]["DefaultDomain"], - # "hive.server2.enable.doAs": "false", - # "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" - # } - # }) - # else: cluster_parameters['Configurations'].append({ "Classification": "hive-site", "Properties": { @@ -366,21 +306,12 @@ def create(event, context): "hive.server2.thrift.http.path": "cliservice", "hive.server2.transport.mode": "binary", "hive.server2.allow.user.substitution": "true", - "hive.server2.authentication.kerberos.principal": "hive/_HOST@"+event["ResourceProperties"]["DefaultDomain"], + "hive.server2.authentication.kerberos.principal": "hive/_HOST@" + event["ResourceProperties"][ + "DefaultDomain"], "hive.server2.enable.doAs": "false" } }) - # ## If Hive LDAP - # cluster_parameters['Configurations'].append({ - # "Classification": "hive-site", - # "Properties": { - # "hive.server2.authentication": "LDAP", - # "hive.server2.authentication.ldap.url": "ldap://" + event["ResourceProperties"][ - # "LDAPHostPrivateIP"], - # "hive.server2.authentication.ldap.baseDN": event["ResourceProperties"]["LDAPGroupSearchBase"] - # } - # }) cluster_parameters['Configurations'].append({ "Classification": "core-site", "Properties": { @@ -399,74 +330,33 @@ def create(event, context): "hadoop.proxyuser.livy.hosts": "*", "hadoop.proxyuser.hive.hosts": "*", "hadoop.proxyuser.hive.groups": "*", + "hadoop.proxyuser.trino.hosts": "*", + "hadoop.proxyuser.trino.groups": "*", "hadoop.proxyuser.hue_hive.groups": "*" } }) + if emrVersion.split(".")[0] == '6' and emrVersion.split(".")[1] == '7': + cluster_parameters['BootstrapActions'].append({ + "Name": "Remove Yum Package Name Validator", + "ScriptBootstrapAction": { + "Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + + event["ResourceProperties"][ + "S3Key"] + "/" + event["ResourceProperties"][ + "ProjectVersion"] + "/scripts/remove-yum-package-name-validator.sh" + } + }) + if isTrinoAppRequested: + cluster_parameters['Steps'].append({ + "Name": "Trino-update-user-mapping", + "ActionOnFailure": "CONTINUE", + "HadoopJarStep": { + "Jar": scriptRunnerJar, + "Args": [ + "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/trino-update-user-mapping.sh" + ] + } + }) - # if isPrestoAppRequested: - # if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "false": - # cluster_parameters['BootstrapActions'].append( - # { - # "Name": "Setup Presto Kerberos", - # "ScriptBootstrapAction": { - # "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][ - # "S3ArtifactKey"] + "/" + event["ResourceProperties"][ - # "ProjectVersion"] + "/scripts/configure_presto_kerberos_ba.sh", - # "Args": [ - # "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][ - # "S3ArtifactKey"] + "/" + event["ResourceProperties"][ - # "ProjectVersion"], - # event["ResourceProperties"]["KdcAdminPassword"] - # ] - # } - # }) - # if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true": - # if prestoEngineRequested == "PrestoSQL": - # cluster_parameters['Configurations'].append( - # { - # "Classification": "prestosql-connector-hive", - # "Properties": { - # "hive.metastore": "glue" - # } - # }); - # else: - # cluster_parameters['Configurations'].append( - # { - # "Classification": "presto-connector-hive", - # "Properties": { - # "hive.metastore": "glue" - # } - # }); - # if isSparkAppRequested and event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true": - # cluster_parameters['Configurations'].append( - # { - # "Classification": "spark-hive-site", - # "Properties": { - # "hive.server2.enable.doAs": "true", - # "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" - # } - # }); - - # if isPrestoAppRequested and event["ResourceProperties"]["InstallPrestoPlugin"] == "true": - # cluster_parameters['Steps'].append({ - # "Name": "InstallRangerPrestoPlugin", - # "ActionOnFailure": "CONTINUE", - # "HadoopJarStep": { - # "Jar": scriptRunnerJar, - # "Args": [ - # "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-presto-ranger-plugin.sh", - # event["ResourceProperties"]["RangerHostname"], - # event["ResourceProperties"]["RangerVersion"], - # "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"]["S3ArtifactKey"], - # event["ResourceProperties"][ - # "ProjectVersion"], - # event["ResourceProperties"]["emrReleaseLabel"], - # prestoEngineRequested, - # event["ResourceProperties"]["RangerHttpProtocol"], - # event["ResourceProperties"]["InstallCloudWatchAgentForAudit"] - # ] - # } - # }) cluster_id = client.run_job_flow(**cluster_parameters) physical_resource_id = cluster_id["JobFlowId"] diff --git a/aws_emr_blog_v3/inputdata/processSalesData.py b/aws_emr_blog_v3/inputdata/processSalesData.py new file mode 100644 index 0000000..d07addd --- /dev/null +++ b/aws_emr_blog_v3/inputdata/processSalesData.py @@ -0,0 +1,33 @@ +from pyspark import SparkContext +from pyspark import SQLContext + +# Initialize spark SQL context +sqlContext = SQLContext(sparkContext=sc) + +# Join orders and products to get the sales rollup +products_sql = sqlContext.sql("select * from staging.products") +products_sql.registerTempTable("products") +products_sql.show(n=2) + + +# customers_sql = sqlContext.sql("select * from staging.customers") +# customers_sql.show(n=2) + +# Load orders data from S3 into Datafram +orders_sql = sqlContext.sql("select order_date,price,sku from staging.orders") +orders_sql.registerTempTable("orders") + +# Join orders and products to get the sales rollup +sales_breakup_sql = sqlContext.sql(""" + SELECT sum(orders.price) total_sales, products.sku, products.product_category + FROM orders join products where orders.sku = products.sku + group by products.sku, products.product_category + """) + +#products_all = products_sql.map(lambda p: "Counts: {0} Ipsum Comment: {1}".format(p.name, p.comment_col)) +sales_breakup_sql.show(n=2) + +# Write output back to s3 under processed +sales_breakup_sql.write.mode('overwrite'). \ + format("parquet").option("path", "s3://aws-datalake-security-data-vbhamidi-us-east-1/processed/sales/"). \ + saveAsTable("processed.sales") diff --git a/aws_emr_blog_v3/inputdata/ranger-trino-policy-analyst1.json b/aws_emr_blog_v3/inputdata/ranger-trino-policy-analyst1.json new file mode 100644 index 0000000..a06e8e0 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-trino-policy-analyst1.json @@ -0,0 +1,100 @@ +{ + "isEnabled":true, + "version":1, + "service":"amazonemrtrino", + "name":"Analyst1Policy", + "policyType":0, + "policyPriority":0, + "description":"Analyst1Policy", + "isAuditEnabled":true, + "resources": { + "schema": { + "values": [ + "default" + ], + "isExcludes": false, + "isRecursive": false + }, + "catalog": { + "values": [ + "hive" + ], + "isExcludes": false, + "isRecursive": false + }, + "column": { + "values": [ + "*" + ], + "isExcludes": false, + "isRecursive": false + }, + "table": { + "values": [ + "tblanalyst1" + ], + "isExcludes": false, + "isRecursive": false + } + }, + "policyItems": [ + { + "accesses": [ + { + "type": "select", + "isAllowed": true + }, + { + "type": "insert", + "isAllowed": true + }, + { + "type": "use", + "isAllowed": true + }, + { + "type": "alter", + "isAllowed": true + }, + { + "type": "all", + "isAllowed": true + } + ], + "users": [ + "analyst1" + ], + "groups": [], + "roles": [], + "conditions": [], + "delegateAdmin": false + } + ], + "denyPolicyItems":[ + + ], + "allowExceptions":[ + + ], + "denyExceptions":[ + + ], + "dataMaskPolicyItems":[ + + ], + "rowFilterPolicyItems":[ + + ], + "serviceType":"trino", + "options":{ + + }, + "validitySchedules":[ + + ], + "policyLabels":[ + + ], + "zoneName":"", + "isDenyAllElse":false +} diff --git a/aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-access.json b/aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-access.json new file mode 100644 index 0000000..38e9023 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-access.json @@ -0,0 +1,75 @@ +{ + "isEnabled":true, + "version":1, + "service":"amazonemrtrino", + "name":"HiveCatalogAccess", + "policyType":0, + "policyPriority":0, + "description":"HiveCatalogAccess", + "isAuditEnabled":true, + "resources": { + "schema": { + "values": [ + "*" + ], + "isExcludes": false, + "isRecursive": false + }, + "catalog": { + "values": [ + "hive" + ], + "isExcludes": false, + "isRecursive": false + } + }, + "policyItems": [ + { + "accesses": [ + { + "type": "select", + "isAllowed": true + }, + { + "type": "use", + "isAllowed": true + } + ], + "users": [ + "analyst1", + "analyst2" + ], + "groups": [], + "roles": [], + "conditions": [], + "delegateAdmin": false + } + ], + "denyPolicyItems":[ + + ], + "allowExceptions":[ + + ], + "denyExceptions":[ + + ], + "dataMaskPolicyItems":[ + + ], + "rowFilterPolicyItems":[ + + ], + "serviceType":"trino", + "options":{ + + }, + "validitySchedules":[ + + ], + "policyLabels":[ + + ], + "zoneName":"", + "isDenyAllElse":false +} diff --git a/aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-table-access.json b/aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-table-access.json new file mode 100644 index 0000000..92f58b7 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-trino-policy-hive-catalog-table-access.json @@ -0,0 +1,92 @@ +{ + "isEnabled":true, + "version":1, + "service":"amazonemrtrino", + "name":"HiveCatalogTableAccess", + "policyType":0, + "policyPriority":0, + "description":"HiveCatalogTableAccess", + "isAuditEnabled":true, + "resources": { + "schema": { + "values": [ + "information_schema" + ], + "isExcludes": false, + "isRecursive": false + }, + "catalog": { + "values": [ + "hive" + ], + "isExcludes": false, + "isRecursive": false + }, + "column": { + "values": [ + "*" + ], + "isExcludes": false, + "isRecursive": false + }, + "table": { + "values": [ + "tables", + "schemata", + "views", + "columns" + ], + "isExcludes": false, + "isRecursive": false + } + }, + "policyItems": [ + { + "accesses": [ + { + "type": "select", + "isAllowed": true + }, + { + "type": "use", + "isAllowed": true + } + ], + "users": [ + "analyst1", + "analyst2" + ], + "groups": [], + "roles": [], + "conditions": [], + "delegateAdmin": false + } + ], + "denyPolicyItems":[ + + ], + "allowExceptions":[ + + ], + "denyExceptions":[ + + ], + "dataMaskPolicyItems":[ + + ], + "rowFilterPolicyItems":[ + + ], + "serviceType":"trino", + "options":{ + + }, + "validitySchedules":[ + + ], + "policyLabels":[ + + ], + "zoneName":"", + "isDenyAllElse":false +} diff --git a/aws_emr_blog_v3/inputdata/ranger-trino-policy-impersonate-user.json b/aws_emr_blog_v3/inputdata/ranger-trino-policy-impersonate-user.json new file mode 100644 index 0000000..f7c2baa --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-trino-policy-impersonate-user.json @@ -0,0 +1,63 @@ +{ + "isEnabled":true, + "version":1, + "service":"amazonemrtrino", + "name":"ImpersonateUser", + "policyType":0, + "policyPriority":0, + "description":"ImpersonateUser", + "isAuditEnabled":true, + "resources": { + "trinouser": { + "values": [ + "analyst*" + ], + "isExcludes": false, + "isRecursive": false + } + }, + "policyItems": [ + { + "accesses": [ + { + "type": "impersonate", + "isAllowed": true + } + ], + "users": [ + "trino" + ], + "groups": [], + "roles": [], + "conditions": [], + "delegateAdmin": false + } + ], + "denyPolicyItems":[ + + ], + "allowExceptions":[ + + ], + "denyExceptions":[ + + ], + "dataMaskPolicyItems":[ + + ], + "rowFilterPolicyItems":[ + + ], + "serviceType":"trino", + "options":{ + + }, + "validitySchedules":[ + + ], + "policyLabels":[ + + ], + "zoneName":"", + "isDenyAllElse":false +} diff --git a/aws_emr_blog_v3/inputdata/ranger-trino-repo.json b/aws_emr_blog_v3/inputdata/ranger-trino-repo.json new file mode 100644 index 0000000..a10e6bc --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-trino-repo.json @@ -0,0 +1,13 @@ +{ + "name": "amazonemrtrino", + "description": "Create EMR Trino Repo using CURL", + "type": "trino", + "configs": { + "username": "policymgr_hive", + "password": "policymgr_hive", + "jdbc.driverClassName": "org.apache.hive.jdbc.HiveDriver", + "jdbc.url": "jdbc:hive2://emr_masternode:10000/default", + "commonNameForCertificate": "*.default_domain" + }, + "isEnabled": true +} diff --git a/aws_emr_blog_v3/inputdata/ranger-trino-s3-policy.json b/aws_emr_blog_v3/inputdata/ranger-trino-s3-policy.json new file mode 100644 index 0000000..224dbb8 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-trino-s3-policy.json @@ -0,0 +1,61 @@ +{ + "service": "amazonemrs3", + "name": "Trino S3", + "policyType": 0, + "policyPriority": 0, + "description": "", + "isAuditEnabled": true, + "resources": { + "sthreeresource": { + "values": [ + "us-east-1.elasticmapreduce.samples/*", + "us-east-1.elasticmapreduce.samples", + "aws-bigdata-blog", + "aws-bigdata-blog/artifacts/*" + ], + "isExcludes": false, + "isRecursive": true + } + }, + "policyItems": [ + { + "accesses": [ + { + "type": "GetObject", + "isAllowed": true + }, + { + "type": "ListObjects", + "isAllowed": true + }, + { + "type": "PutObjects", + "isAllowed": true + }, + { + "type": "DeleteObjects", + "isAllowed": true + } + ], + "users": [ + "trino" + ], + "groups": [], + "roles": [], + "conditions": [], + "delegateAdmin": false + } + ], + "denyPolicyItems": [], + "allowExceptions": [], + "denyExceptions": [], + "dataMaskPolicyItems": [], + "rowFilterPolicyItems": [], + "serviceType": "amazon-emr-emrfs", + "options": {}, + "validitySchedules": [], + "policyLabels": [], + "zoneName": "", + "isDenyAllElse": false, + "isEnabled": true +} diff --git a/aws_emr_blog_v3/inputdata/ranger-users/ranger-hue-user.json b/aws_emr_blog_v3/inputdata/ranger-users/ranger-hue-user.json new file mode 100644 index 0000000..ee9c9b1 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-users/ranger-hue-user.json @@ -0,0 +1,15 @@ +{ + "name": "hue1", + "firstName": "hue", + "lastName": "", + "loginId": "hue1", + "emailAddress" : null, + "description" : "hue user", + "password" : "user1pass", + "groupIdList": [], + "groupNameList": [], + "status": 1, + "isVisible": 1, + "userRoleList": [ "ROLE_USER" ], + "userSource": 0 +} diff --git a/aws_emr_blog_v3/inputdata/ranger-users/ranger-trino-user.json b/aws_emr_blog_v3/inputdata/ranger-users/ranger-trino-user.json new file mode 100644 index 0000000..f731056 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/ranger-users/ranger-trino-user.json @@ -0,0 +1,15 @@ +{ + "name": "trino", + "firstName": "trino", + "lastName": " ", + "loginId": "trino", + "emailAddress": null, + "description": "trino user", + "password" : "user1pass", + "groupIdList": [], + "groupNameList": [], + "status": 1, + "isVisible": 1, + "userRoleList": ["ROLE_USER"], + "userSource": 0 +} diff --git a/aws_emr_blog_v3/inputdata/redshift-queries.sql b/aws_emr_blog_v3/inputdata/redshift-queries.sql new file mode 100644 index 0000000..6f53be2 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/redshift-queries.sql @@ -0,0 +1,6 @@ +create table public.products (company VARCHAR, link VARCHAR, price FLOAT, product_category VARCHAR, release_date VARCHAR, sku VARCHAR); + +COPY public.products +FROM 's3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/staging/products/' +IAM_ROLE '<>' +FORMAT AS PARQUET; diff --git a/aws_emr_blog_v3/inputdata/redshift.properties b/aws_emr_blog_v3/inputdata/redshift.properties index 65b736f..106bffe 100644 --- a/aws_emr_blog_v3/inputdata/redshift.properties +++ b/aws_emr_blog_v3/inputdata/redshift.properties @@ -1,5 +1,4 @@ connector.name=redshift -connection-url=jdbc:postgresql://:5439/> -connection-user= -connection-password= -allow-drop-table=true +connection-url=jdbc:redshift://example.net:5439/database +connection-user=root +connection-password=secret diff --git a/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-amazon-emr-spark.json b/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-amazon-emr-spark.json index 18f42b1..a62a3d2 100644 --- a/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-amazon-emr-spark.json +++ b/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-amazon-emr-spark.json @@ -1,101 +1,140 @@ { - "name": "amazon-emr-spark", - "implClass": "org.apache.ranger.services.spark.RangerServiceSpark", - "label": "Amazon EMR Spark", - "description": "Amazon EMR Spark", - "guid": "f4707ecc-b5c6-11ea-b8ab-02f5a39015b1", - "resources": - [ - { - "itemId": 1, - "name": "database", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": false, - "recursiveSupported": false, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", - "matcherOptions": { "wildCard":true, "ignoreCase":true }, - "validationRegEx":"", - "validationMessage": "", - "uiHint":"", - "label": "EMR Spark Database", - "description": "EMR Spark Database" - }, + "name": "amazon-emr-spark", + "implClass": "org.apache.ranger.services.spark.RangerServiceSpark", + "label": "Amazon EMR Spark", + "description": "Amazon EMR Spark", + "guid": "f4707ecc-b5c6-11ea-b8ab-02f5a39015b1", + "resources": + [ + { + "itemId": 1, + "name": "database", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Database", + "description": "EMR Spark Database" + }, - { - "itemId": 2, - "name": "table", - "type": "string", - "level": 20, - "parent": "database", - "mandatory": true, - "lookupSupported": false, - "recursiveSupported": false, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", - "matcherOptions": { "wildCard":true, "ignoreCase":true }, - "validationRegEx":"", - "validationMessage": "", - "uiHint":"", - "label": "EMR Spark Table", - "description": "EMR Spark Table" - }, + { + "itemId": 2, + "name": "table", + "type": "string", + "level": 20, + "parent": "database", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Table", + "description": "EMR Spark Table" + }, - { - "itemId": 3, - "name": "column", - "type": "string", - "level": 30, - "parent": "table", - "mandatory": true, - "lookupSupported": false, - "recursiveSupported": false, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", - "matcherOptions": { "wildCard":true, "ignoreCase":true }, - "validationRegEx":"", - "validationMessage": "", - "uiHint":"", - "label": "EMR Spark Column", - "description": "EMR Spark Column" - } - ], + { + "itemId": 3, + "name": "column", + "type": "string", + "level": 30, + "parent": "table", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":true }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Column", + "description": "EMR Spark Column" + }, - "accessTypes": - [ - { - "itemId": 1, - "name": "select", - "label": "select" - } - ], - "configs": - [ - { - "itemId": 1, - "name": "commonNameForCertificate", - "type": "string", - "mandatory": false, - "validationRegEx":"", - "validationMessage": "", - "uiHint":"", - "label": "Common Name for Certificate" - } - ], + { + "itemId": 4, + "name": "url", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerURLResourceMatcher", + "matcherOptions": { "wildCard":true, "ignoreCase":false }, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "EMR Spark Table URL", + "description": "EMR Spark Table URL" + } + ], - "enums": - [ + "accessTypes": + [ + { + "itemId": 1, + "name": "select", + "label": "select" + }, + { + "itemId": 2, + "name": "update", + "label": "update" + }, + { + "itemId": 3, + "name": "alter", + "label": "alter" + }, + { + "itemId": 4, + "name": "read", + "label": "read" + }, + { + "itemId": 5, + "name": "write", + "label": "write" + } + ], + "configs": + [ + { + "itemId": 1, + "name": "commonNameForCertificate", + "type": "string", + "mandatory": false, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "Common Name for Certificate" + } + ], - ], + "enums": + [ - "contextEnrichers": - [ - ], + ], - "policyConditions": - [ - ] -} + "contextEnrichers": + [ + ], + + "policyConditions": + [ + ] +} \ No newline at end of file diff --git a/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-trino.json b/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-trino.json new file mode 100644 index 0000000..4e1475b --- /dev/null +++ b/aws_emr_blog_v3/inputdata/service-definition/2.0.0/ranger-servicedef-trino.json @@ -0,0 +1,516 @@ +{ + "name": "trino", + "displayName": "trino", + "implClass": "", + "label": "Trino", + "description": "Trino", + "guid": "379a9fe5-1b6e-4091-a584-4890e245e6c1", + "resources": [ + { + "itemId": 1, + "name": "catalog", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Catalog", + "description": "Trino Catalog" + }, + { + "itemId": 2, + "name": "schema", + "type": "string", + "level": 20, + "parent": "catalog", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Schema", + "description": "Trino Schema" + }, + { + "itemId": 3, + "name": "table", + "type": "string", + "level": 30, + "parent": "schema", + "mandatory": true, + "isValidLeaf": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Table", + "description": "Trino Table" + }, + { + "itemId": 4, + "name": "column", + "type": "string", + "level": 40, + "parent": "table", + "mandatory": true, + "lookupSupported": true, + "recursiveSupported": false, + "excludesSupported": true, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Column", + "description": "Trino Column" + }, + { + "itemId": 5, + "name": "trinouser", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino User", + "description": "Trino User", + "accessTypeRestrictions": ["impersonate"] + }, + { + "itemId": 6, + "name": "systemproperty", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "System Property", + "description": "Trino System Property", + "accessTypeRestrictions": ["alter"] + }, + { + "itemId": 7, + "name": "sessionproperty", + "type": "string", + "level": 20, + "parent": "catalog", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Catalog Session Property", + "description": "Trino Catalog Session Property", + "accessTypeRestrictions": ["alter"] + }, + { + "itemId": 8, + "name": "function", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Trino Function", + "description": "Trino Function", + "accessTypeRestrictions": ["execute", "grant"] + }, + { + "itemId": 9, + "name": "procedure", + "type": "string", + "level": 30, + "parent": "schema", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": false, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": true + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Schema Procedure", + "description": "Schema Procedure", + "accessTypeRestrictions": ["execute", "grant"] + }, + { + "itemId": 10, + "name": "url", + "type": "string", + "level": 10, + "parent": "", + "mandatory": true, + "lookupSupported": false, + "recursiveSupported": true, + "excludesSupported": false, + "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", + "matcherOptions": { + "wildCard": true, + "ignoreCase": false + }, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "URL", + "description": "External URL", + "accessTypeRestrictions": ["read", "write"] + } + ], + "accessTypes": [ + { + "itemId": 1, + "name": "select", + "label": "Select" + }, + { + "itemId": 2, + "name": "insert", + "label": "Insert" + }, + { + "itemId": 3, + "name": "create", + "label": "Create" + }, + { + "itemId": 4, + "name": "drop", + "label": "Drop" + }, + { + "itemId": 5, + "name": "delete", + "label": "Delete" + }, + { + "itemId": 6, + "name": "use", + "label": "Use" + }, + { + "itemId": 7, + "name": "alter", + "label": "Alter" + }, + { + "itemId": 8, + "name": "grant", + "label": "Grant" + }, + { + "itemId": 9, + "name": "revoke", + "label": "Revoke" + }, + { + "itemId": 10, + "name": "show", + "label": "Show" + }, + { + "itemId": 11, + "name": "impersonate", + "label": "Impersonate" + }, + { + "itemId": 12, + "name": "all", + "label": "All", + "impliedGrants": [ + "select", + "insert", + "create", + "delete", + "drop", + "use", + "alter", + "grant", + "revoke", + "show", + "impersonate", + "execute", + "read", + "write" + ] + }, + { + "itemId": 13, + "name": "execute", + "label": "execute" + }, + { + "itemId": 14, + "name": "read", + "label": "Read" + }, + { + "itemId": 15, + "name": "write", + "label": "Write" + } + ], + "configs": [ + { + "itemId": 1, + "name": "username", + "type": "string", + "mandatory": true, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Username" + }, + { + "itemId": 2, + "name": "password", + "type": "password", + "mandatory": false, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "label": "Password" + }, + { + "itemId": 3, + "name": "jdbc.driverClassName", + "type": "string", + "mandatory": true, + "validationRegEx": "", + "validationMessage": "", + "uiHint": "", + "defaultValue": "io.trino.jdbc.TrinoDriver" + }, + { + "itemId": 4, + "name": "jdbc.url", + "type": "string", + "mandatory": true, + "defaultValue": "", + "validationRegEx": "", + "validationMessage": "", + "uiHint": "" + }, + { + "itemId": 5, + "name": "commonNameForCertificate", + "type": "string", + "mandatory": false, + "validationRegEx":"", + "validationMessage": "", + "uiHint":"", + "label": "Common Name for Certificate" + } + ], + "enums": [ + ], + "contextEnrichers": [ + ], + "policyConditions": + [ + ], + "dataMaskDef": { + "accessTypes": [ + { + "name": "select" + } + ], + "resources": [ + { + "name": "catalog", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "schema", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "table", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + }, + { + "name": "column", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "uiHint":"{ \"singleValue\":true }" + } + ], + "maskTypes": [ + { + "itemId": 1, + "name": "MASK", + "label": "Redact", + "description": "Replace lowercase with 'x', uppercase with 'X', digits with '0'", + "transformer": "cast(regexp_replace(regexp_replace(regexp_replace({col},'([A-Z])', 'X'),'([a-z])','x'),'([0-9])','0') as {type})", + "dataMaskOptions": { + } + }, + { + "itemId": 2, + "name": "MASK_SHOW_LAST_4", + "label": "Partial mask: show last 4", + "description": "Show last 4 characters; replace rest with 'X'", + "transformer": "cast(regexp_replace({col}, '(.*)(.{4}$)', x -> regexp_replace(x[1], '.', 'X') || x[2]) as {type})" + }, + { + "itemId": 3, + "name": "MASK_SHOW_FIRST_4", + "label": "Partial mask: show first 4", + "description": "Show first 4 characters; replace rest with 'x'", + "transformer": "cast(regexp_replace({col}, '(^.{4})(.*)', x -> x[1] || regexp_replace(x[2], '.', 'X')) as {type})" + }, + { + "itemId": 4, + "name": "MASK_HASH", + "label": "Hash", + "description": "Hash the value of a varchar with sha256", + "transformer": "cast(to_hex(sha256(to_utf8({col}))) as {type})" + }, + { + "itemId": 5, + "name": "MASK_NULL", + "label": "Nullify", + "description": "Replace with NULL" + }, + { + "itemId": 6, + "name": "MASK_NONE", + "label": "Unmasked (retain original value)", + "description": "No masking" + }, + { + "itemId": 12, + "name": "MASK_DATE_SHOW_YEAR", + "label": "Date: show only year", + "description": "Date: show only year", + "transformer": "date_trunc('year', {col})" + }, + { + "itemId": 13, + "name": "CUSTOM", + "label": "Custom", + "description": "Custom" + } + ] + }, + "rowFilterDef": { + "accessTypes": [ + { + "name": "select" + } + ], + "resources": [ + { + "name": "catalog", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + }, + { + "name": "schema", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + }, + { + "name": "table", + "matcherOptions": { + "wildCard": "true" + }, + "lookupSupported": true, + "mandatory": true, + "uiHint": "{ \"singleValue\":true }" + } + ] + } + +} diff --git a/aws_emr_blog_v3/inputdata/spark-notebook.py b/aws_emr_blog_v3/inputdata/spark-notebook.py new file mode 100644 index 0000000..cc0833e --- /dev/null +++ b/aws_emr_blog_v3/inputdata/spark-notebook.py @@ -0,0 +1,26 @@ +from pyspark import SparkContext +from pyspark import SQLContext + +# Initialize spark SQL context +sqlContext = SQLContext(sparkContext=sc) + +# Spark access allowed by the policy: +spark.sql("select * from tblanalyst1 limit 10").show() +# Spark access that will fail due to permission error: +spark.sql("select * from tblanalyst2 limit 10").show() +# S3 access allowed by the policy: `productsFile` +sqlContext.read.parquet("s3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/staging/products/") +# S3 access that will fail due to permission error: `customersFile` +sqlContext.read.parquet("s3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/staging/customers/") + +# Create Table in Hive +# CREATE EXTERNAL TABLE IF NOT EXISTS students_s3 (name VARCHAR(64), address VARCHAR(64)) +# PARTITIONED BY (student_id INT) +# STORED AS PARQUET +#LOCATION 's3://test-emr-security-ranger-beta-data/students_s3/' +studentsSQL = spark.sql("select * from default.students_s3") + +spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict") +spark.sql("INSERT INTO students_s3 VALUES ('Amy Smith', '123 Park Ave, San Jose', 111111)") + +studentsSQL.show() diff --git a/aws_emr_blog_v3/inputdata/trino-queries.sql b/aws_emr_blog_v3/inputdata/trino-queries.sql new file mode 100644 index 0000000..ea09180 --- /dev/null +++ b/aws_emr_blog_v3/inputdata/trino-queries.sql @@ -0,0 +1,16 @@ +-- This query shows how the Ranger Trino plugin uses column mask to NULL the ad_id and RowFilter to olny filter records with page = 'fox.com' +-- To update policies use: https://ranger-ga-1-1772333129.us-east-1.elb.amazonaws.com/index.html#!/service/4/policies/0 +select * from default.tblanalyst1 limit 10; + +-- Query fails as the user does not have access +-- To update policies use: https://ranger-ga-1-1772333129.us-east-1.elb.amazonaws.com/index.html#!/service/4/policies/0 +select * from default.tblanalyst2 limit 10; + +-- Query Redshift data using the Trino Redshift Connector +-- This query shows how the Ranger Trino plugin uses column mask to NULL the 'firstname' and RowFilter to olny filter records with city = 'Obaha' +-- To update policies use: https://ranger-ga-1-1772333129.us-east-1.elb.amazonaws.com/index.html#!/service/4/policies/0 +select * from redshift.public.users limit 10; + +-- Query fails as the user does not have access +-- To update policies use: https://ranger-ga-1-1772333129.us-east-1.elb.amazonaws.com/index.html#!/service/4/policies/0 +select * from default.tblanalyst2 limit 10; diff --git a/aws_emr_blog_v3/scripts/download-scripts.sh b/aws_emr_blog_v3/scripts/download-scripts.sh index 802b077..39a674b 100644 --- a/aws_emr_blog_v3/scripts/download-scripts.sh +++ b/aws_emr_blog_v3/scripts/download-scripts.sh @@ -24,6 +24,7 @@ set -x #================================================================ scripts_repo_path=$1 +#sudo yum -y install krb5-workstation krb5-libs krb5-auth-dialog mkdir -p /tmp/aws-blog-emr-ranger/scripts/emr-steps/ cd /tmp/aws-blog-emr-ranger/scripts/emr-steps/ #sudo yum -y install svn diff --git a/aws_emr_blog_v3/scripts/emr-steps/createHiveTables.sh b/aws_emr_blog_v3/scripts/emr-steps/createHiveTables.sh index 2973cb3..f69f76a 100644 --- a/aws_emr_blog_v3/scripts/emr-steps/createHiveTables.sh +++ b/aws_emr_blog_v3/scripts/emr-steps/createHiveTables.sh @@ -57,5 +57,29 @@ PARTITIONED BY ( STORED AS SEQUENCEFILE LOCATION '$hive_script_data_location/joined_impressions/'; MSCK REPAIR TABLE tblanalyst2; +CREATE EXTERNAL TABLE IF NOT EXISTS impressions ( + requestBeginTime string, adId string, impressionId string, referrer string, + userAgent string, userCookie string, ip string +) +PARTITIONED BY (dt string) + ROW FORMAT + serde 'org.apache.hive.hcatalog.data.JsonSerDe' + with serdeproperties ( 'paths'='requestBeginTime, adId, impressionId, referrer, userAgent, userCookie, ip' ) +LOCATION '$hive_script_data_location/impressions/'; +MSCK REPAIR TABLE impressions; +CREATE EXTERNAL TABLE IF NOT EXISTS clicks ( + impressionId string + ) + partitioned by (dt string) + row format + serde 'org.apache.hive.hcatalog.data.JsonSerDe' + with serdeproperties ( 'paths'='impressionId' ) +location '$hive_script_data_location/clicks/' ; +MSCK REPAIR TABLE clicks; +CREATE TABLE IF NOT EXISTS user_mapping ( + user_name STRING, + page STRING) +STORED AS PARQUET +LOCATION 's3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/user_mapping/'; " >> createTable.hql sudo -u hive hive -f createTable.hql diff --git a/aws_emr_blog_v3/scripts/emr-steps/createdExtendedHiveTables.sh b/aws_emr_blog_v3/scripts/emr-steps/createdExtendedHiveTables.sh new file mode 100644 index 0000000..fad05ea --- /dev/null +++ b/aws_emr_blog_v3/scripts/emr-steps/createdExtendedHiveTables.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -euo pipefail +set -x +# Define variables +awsregion=$1 +cd /tmp/ +echo " +CREATE DATABASE IF NOT EXISTS staging + COMMENT 'Databse to hold the staging data for retail schema' + WITH DBPROPERTIES ('creator'='AWS', 'Dept.'='EMR Ranger team'); + +CREATE DATABASE IF NOT EXISTS processed + COMMENT 'Databse to hold the processed data for retail schema' + WITH DBPROPERTIES ('creator'='AWS', 'Dept.'='EMR Ranger team'); + +CREATE EXTERNAL TABLE IF NOT EXISTS staging.orders ( +customer_id string COMMENT 'from deserializer', +order_date string COMMENT 'from deserializer', +price double COMMENT 'from deserializer', +sku string COMMENT 'from deserializer') +STORED AS PARQUET +LOCATION 's3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/staging/orders/'; + +MSCK REPAIR TABLE staging.orders; + + +CREATE EXTERNAL TABLE IF NOT EXISTS staging.customers ( +cbgid bigint COMMENT 'from deserializer', +customer_id string COMMENT 'from deserializer', +education_level string COMMENT 'from deserializer', +first_name string COMMENT 'from deserializer', +last_name string COMMENT 'from deserializer', +marital_status string COMMENT 'from deserializer', +region string COMMENT 'from deserializer', +state string COMMENT 'from deserializer') +STORED AS PARQUET +LOCATION 's3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/staging/customers/'; + +MSCK REPAIR TABLE staging.customers; + + +CREATE EXTERNAL TABLE IF NOT EXISTS staging.products ( +company string COMMENT 'from deserializer', +link string COMMENT 'from deserializer', +price double COMMENT 'from deserializer', +product_category string COMMENT 'from deserializer', +release_date string COMMENT 'from deserializer', +sku string COMMENT 'from deserializer') +STORED AS PARQUET +LOCATION 's3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger/data/staging/products/'; + +MSCK REPAIR TABLE staging.products; + +" > createdExtendedHiveTables.hql +sudo -u hive hive -f /tmp/createdExtendedHiveTables.hql diff --git a/aws_emr_blog_v3/scripts/emr-steps/presto-cli-kerberos_fix.sh b/aws_emr_blog_v3/scripts/emr-steps/presto-cli-kerberos_fix.sh new file mode 100644 index 0000000..b5b6739 --- /dev/null +++ b/aws_emr_blog_v3/scripts/emr-steps/presto-cli-kerberos_fix.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +TRUST_STORE_PASS=amazon +KEY_STORE_PASS=amazon + +HUE_INI='/etc/hue/conf.empty/hue.ini' + +sudo bash -c "cat > /usr/bin/trino-cli" <<'EOF' +#!/bin/bash +source /etc/trino/conf/trino-env.sh +TRINO_HOME=/usr/lib/trino +export PATH=$JAVA8_HOME/bin:$PATH + +USER_TICKET_CACHE=$(klist | grep cache | cut -d ':' -f3) +USER_KRB_PRINCIPAL=$(klist | grep Default | cut -d':' -f2 | tr -d ' ') + +$TRINO_HOME/bin/trino-cli-*-executable ${EXTRA_ARGS} --krb5-principal ${USER_KRB_PRINCIPAL} --krb5-credential-cache-path ${USER_TICKET_CACHE} "$@" + +EOF + +hue_trino_config() { + echo "Configuring Hue" + KrbREALM=$(sudo cat /etc/krb5.conf | grep default_realm | cut -d'=' -f2 | tr -d '[:blank:]') + HuetrinoKeytab='/etc/hue-trino.keytab' + KeyStorePath='/usr/lib/trino/etc/trino-client-truststore.jks' + TrustStorePath="$JAVA_HOME/lib/security/cacerts" + + TRINO_JDBC_URL="jdbc:trino://$(hostname -f):7778/hive/default?SSL=true\&SSLKeyStorePath=${KeyStorePath}\&SSLKeyStorePassword=${KEY_STORE_PASS}\&\ +SSLTrustStorePath=${TrustStorePath}\&SSLTrustStorePassword=${TRUST_STORE_PASS}\&KerberosConfigPath=/etc/krb5.conf\&\ +KerberosKeytabPath=${HuetrinoKeytab}\&KerberosPrincipal=trino/$(hostname -f)@${KrbREALM}\&KerberosRemoteServiceName=trino\&KerberosUseCanonicalHostname=false" + + trinoOpts='options='"'"'{"url": "'"${TRINO_JDBC_URL}"'","driver": "io.trino.jdbc.TrinoDriver","user":"","password":""}'"'" + + sudo cp /etc/trino.keytab /etc/hue-trino.keytab + sudo chown hue:hue /etc/hue-trino.keytab + sudo sed -i "s|.*io.trino.jdbc.TrinoDriver.*|$trinoOpts|" ${HUE_INI} + + sudo sed -i "s|.*pam_service=login| pam_service=login|" ${HUE_INI} + sudo sed -i "s|backend.*desktop.auth.backend.AllowFirstUserDjangoBackend.*|backend=desktop.auth.backend.PamBackend|" ${HUE_INI} + + sudo systemctl restart hue +} + +if [ -f "${HUE_INI}" ]; then + hue_trino_config +fi + +exit 0 diff --git a/aws_emr_blog_v3/scripts/emr-steps/trino-update-user-mapping.sh b/aws_emr_blog_v3/scripts/emr-steps/trino-update-user-mapping.sh new file mode 100644 index 0000000..dcb484a --- /dev/null +++ b/aws_emr_blog_v3/scripts/emr-steps/trino-update-user-mapping.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -euo pipefail +set -x + +HUE_INI='/etc/hue/conf.empty/hue.ini' + +## Update Trino user mapping +sudo sed -i "s|http-server.authentication.krb5.user-mapping.pattern.*|http-server.authentication.krb5.user-mapping.pattern=(.*)(/)(.*)|g" /etc/trino/conf/config.properties + +# Update Hue.ini +sudo sed -i 's/"io.trino.jdbc.TrinoDriver"/& , "user" : "", "password" : "" /' /etc/hue/conf/hue.ini + +sudo /opt/aws/puppet/bin/puppet apply -e 'service { "trino-server": ensure => false, }' +sudo /opt/aws/puppet/bin/puppet apply -e 'service { "trino-server": ensure => true, }' + +sudo systemctl restart hue + +exit 0 diff --git a/aws_emr_blog_v3/scripts/emr-tls/create-tls-certs.sh b/aws_emr_blog_v3/scripts/emr-tls/create-tls-certs.sh index 78c39ff..f438399 100644 --- a/aws_emr_blog_v3/scripts/emr-tls/create-tls-certs.sh +++ b/aws_emr_blog_v3/scripts/emr-tls/create-tls-certs.sh @@ -59,7 +59,7 @@ generate_certs() { rm -rf $1 mkdir -p $1 pushd $1 - openssl req -x509 -newkey rsa:4096 -keyout privateKey.pem -out certificateChain.pem -days 365 -nodes -subj ${certs_subject} + openssl req -x509 -newkey rsa:4096 -keyout privateKey.pem -out certificateChain.pem -days 1095 -nodes -subj ${certs_subject} cp certificateChain.pem trustedCertificates.pem zip -r -X ../$1-certs.zip certificateChain.pem privateKey.pem trustedCertificates.pem # rm -rf *.pem diff --git a/aws_emr_blog_v3/scripts/install-ranger-admin-server.sh b/aws_emr_blog_v3/scripts/install-ranger-admin-server.sh index fdf87b5..b09d071 100644 --- a/aws_emr_blog_v3/scripts/install-ranger-admin-server.sh +++ b/aws_emr_blog_v3/scripts/install-ranger-admin-server.sh @@ -372,6 +372,7 @@ rm -rf ${certs_path} sudo /usr/bin/ranger-admin stop || true sudo /usr/bin/ranger-admin start +sudo chkconfig ranger-admin on i=0; while ! timeout 1 bash -c "echo > /dev/tcp/$current_hostname/6182"; do sleep 10; @@ -383,6 +384,7 @@ done #Start Ranger Usersync sudo /usr/bin/ranger-usersync stop || true sudo /usr/bin/ranger-usersync start +sudo chkconfig ranger-usersync on #cd $installpath ## Update the Ranger service def @@ -399,7 +401,16 @@ for i in `find . -name "ranger-servicedef-*.json" -type f`; do curl -iv --insecure -u admin:admin -X POST -d @$file_name -H "Accept: application/json" -H "Content-Type: application/json" -k $HTTP_URL/service/public/v2/api/servicedef done +aws s3 cp $s3bucket/${project_version}/inputdata/ranger-users/ . --recursive --exclude "*" --include "*.json" --region us-east-1 +for i in `find . -name "ranger-.*-user.json" -type f`; do + file_name=`echo "$i" | cut -c 3-` + echo "$file_name" + curl -iv --insecure -u admin:admin -X POST -d @$file_name -H "Accept: application/json" -H "Content-Type: application/json" -k $HTTP_URL/service/xusers/secure/users +done + # Restart SOLR sudo /opt/solr/ranger_audit_server/scripts/stop_solr.sh || true sudo /opt/solr/ranger_audit_server/scripts/start_solr.sh +sudo cp /opt/solr/bin/init.d/solr /etc/init.d/ || true +sudo chkconfig solr on || true #curl -X POST -H 'Content-Type: application/json' http://localhost:8983/solr/ranger_audits/update?commit=true -d '{ "delete": {"query":"*:*"} }' diff --git a/aws_emr_blog_v3/scripts/remove-yum-package-name-validator.sh b/aws_emr_blog_v3/scripts/remove-yum-package-name-validator.sh new file mode 100644 index 0000000..317e242 --- /dev/null +++ b/aws_emr_blog_v3/scripts/remove-yum-package-name-validator.sh @@ -0,0 +1 @@ +sudo sed -ie "s#--yum-config-option skip_missing_names_on_install=False ##g" /usr/share/aws/emr/node-provisioner/bin/provision-node diff --git a/aws_emr_blog_v3/scripts/setup-trino-redshift-connector.sh b/aws_emr_blog_v3/scripts/setup-trino-redshift-connector.sh new file mode 100644 index 0000000..ce7a560 --- /dev/null +++ b/aws_emr_blog_v3/scripts/setup-trino-redshift-connector.sh @@ -0,0 +1,4 @@ +sudo echo "connector.name=redshift +connection-url=jdbc:redshift://example.net:5439/database +connection-user= +connection-password=" > /etc/trino/conf.dist/catalog/redshift.properties From 2ae89474789dbd2aae335096de527f40f1df2f77 Mon Sep 17 00:00:00 2001 From: Varun Rao Bhamidimarri Date: Fri, 15 Jul 2022 11:39:28 -0500 Subject: [PATCH 6/6] Add a new examples folder with sample queries for hive/spark/trino --- aws_emr_blog_v3/{inputdata => examples}/processSalesData.py | 0 aws_emr_blog_v3/{inputdata => examples}/redshift-queries.sql | 0 aws_emr_blog_v3/{inputdata => examples}/spark-notebook.py | 0 aws_emr_blog_v3/{inputdata => examples}/trino-queries.sql | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename aws_emr_blog_v3/{inputdata => examples}/processSalesData.py (100%) rename aws_emr_blog_v3/{inputdata => examples}/redshift-queries.sql (100%) rename aws_emr_blog_v3/{inputdata => examples}/spark-notebook.py (100%) rename aws_emr_blog_v3/{inputdata => examples}/trino-queries.sql (100%) diff --git a/aws_emr_blog_v3/inputdata/processSalesData.py b/aws_emr_blog_v3/examples/processSalesData.py similarity index 100% rename from aws_emr_blog_v3/inputdata/processSalesData.py rename to aws_emr_blog_v3/examples/processSalesData.py diff --git a/aws_emr_blog_v3/inputdata/redshift-queries.sql b/aws_emr_blog_v3/examples/redshift-queries.sql similarity index 100% rename from aws_emr_blog_v3/inputdata/redshift-queries.sql rename to aws_emr_blog_v3/examples/redshift-queries.sql diff --git a/aws_emr_blog_v3/inputdata/spark-notebook.py b/aws_emr_blog_v3/examples/spark-notebook.py similarity index 100% rename from aws_emr_blog_v3/inputdata/spark-notebook.py rename to aws_emr_blog_v3/examples/spark-notebook.py diff --git a/aws_emr_blog_v3/inputdata/trino-queries.sql b/aws_emr_blog_v3/examples/trino-queries.sql similarity index 100% rename from aws_emr_blog_v3/inputdata/trino-queries.sql rename to aws_emr_blog_v3/examples/trino-queries.sql