Skip to content

Commit

Permalink
Changed based on feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
lusoal committed May 10, 2024
1 parent 96cd130 commit e5a2ef2
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# This script is used to run the spark-consumer application on EKS,
# users need to replace MY_BUCKET_NAME and MY_KAFKA_BROKERS_ADRESSm to match your environment.
---
apiVersion: "sparkoperator.k8s.io/v1beta2"
kind: SparkApplication
metadata:
name: spark-consumer
namespace: spark-operator
namespace: spark-team-a
spec:
type: Python
pythonVersion: "3"
mode: cluster
image: "public.ecr.aws/data-on-eks/consumer-spark-streaming-3.3.2-kafka:1"
image: "public.ecr.aws/data-on-eks/consumer-spark-streaming-3.3.2-kafka:1" # You can build your own image using the Dockefile in this folder
mainApplicationFile: "local:///app/app.py"
sparkVersion: "3.3.2"
deps:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# This is the producer deployment file, you can adjust the number of replicas to produce more data.
# You will need to change __MY_AWS_REGION__, __MY_KAFKA_BROKERS__, and __MY_PRODUCER_ROLE_ARN__ to match your environment.
apiVersion: v1
kind: ServiceAccount
metadata:
Expand Down
17 changes: 1 addition & 16 deletions streaming/spark-streaming/terraform/apps.tf
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,4 @@ module "producer_iam_role" {
}
}

module "consumer_iam_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"

role_name = "consumer-irsa"

role_policy_arns = {
arn = aws_iam_policy.consumer_s3_kafka.arn
}

oidc_providers = {
main = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["spark-operator:consumer-sa"]
}
}
}
# Consumer IAM role and Spark additional components are being managed by spark-team.tf
2 changes: 1 addition & 1 deletion streaming/spark-streaming/terraform/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ output "producer_iam_role_arn" {

output "consumer_iam_role_arn" {
description = "IAM role ARN for the consumer"
value = module.consumer_iam_role.iam_role_arn
value = module.spark_team_a_irsa.iam_role_arn
}
142 changes: 142 additions & 0 deletions streaming/spark-streaming/terraform/spark-team.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
locals {
spark_team = "spark-team-a"
}

resource "kubernetes_namespace_v1" "spark_team_a" {
metadata {
name = local.spark_team
}
timeouts {
delete = "15m"
}
}

resource "kubernetes_service_account_v1" "spark_team_a" {
metadata {
name = local.spark_team
namespace = kubernetes_namespace_v1.spark_team_a.metadata[0].name
annotations = { "eks.amazonaws.com/role-arn" : module.spark_team_a_irsa.iam_role_arn }
}

automount_service_account_token = true
}

resource "kubernetes_secret_v1" "spark_team_a" {
metadata {
name = "${local.spark_team}-secret"
namespace = kubernetes_namespace_v1.spark_team_a.metadata[0].name
annotations = {
"kubernetes.io/service-account.name" = kubernetes_service_account_v1.spark_team_a.metadata[0].name
"kubernetes.io/service-account.namespace" = kubernetes_namespace_v1.spark_team_a.metadata[0].name
}
}

type = "kubernetes.io/service-account-token"
}

#---------------------------------------------------------------
# IRSA for Spark driver/executor pods for "spark-team-a"
#---------------------------------------------------------------
module "spark_team_a_irsa" {
source = "aws-ia/eks-blueprints-addon/aws"
version = "~> 1.0"

# Disable helm release
create_release = false

# IAM role for service account (IRSA)
create_role = true
role_name = "${local.name}-${local.spark_team}"
create_policy = false
role_policies = {
consumer_s3_kafka_policy = aws_iam_policy.consumer_s3_kafka.arn
}

oidc_providers = {
this = {
provider_arn = module.eks.oidc_provider_arn
namespace = local.spark_team
service_account = local.spark_team
}
}
}

#---------------------------------------------------------------
# Kubernetes Cluster role for service Account analytics-k8s-data-team-a
#---------------------------------------------------------------
resource "kubernetes_cluster_role" "spark_role" {
metadata {
name = "spark-cluster-role"
}

rule {
verbs = ["get", "list", "watch"]
api_groups = [""]
resources = ["namespaces", "nodes", "persistentvolumes"]
}

rule {
verbs = ["list", "watch"]
api_groups = ["storage.k8s.io"]
resources = ["storageclasses"]
}
rule {
verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"]
api_groups = [""]
resources = ["serviceaccounts", "services", "configmaps", "events", "pods", "pods/log", "persistentvolumeclaims"]
}

rule {
verbs = ["create", "patch", "delete", "watch"]
api_groups = [""]
resources = ["secrets"]
}

rule {
verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
api_groups = ["apps"]
resources = ["statefulsets", "deployments"]
}

rule {
verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
api_groups = ["batch", "extensions"]
resources = ["jobs"]
}

rule {
verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
api_groups = ["extensions"]
resources = ["ingresses"]
}

rule {
verbs = ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"]
api_groups = ["rbac.authorization.k8s.io"]
resources = ["roles", "rolebindings"]
}

depends_on = [module.spark_team_a_irsa]
}
#---------------------------------------------------------------
# Kubernetes Cluster Role binding role for service Account analytics-k8s-data-team-a
#---------------------------------------------------------------
resource "kubernetes_cluster_role_binding" "spark_role_binding" {
metadata {
name = "spark-cluster-role-bind"
}

subject {
kind = "ServiceAccount"
name = local.spark_team
namespace = local.spark_team
}

role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.spark_role.id
}

depends_on = [module.spark_team_a_irsa]
}
4 changes: 2 additions & 2 deletions streaming/spark-streaming/terraform/variables.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
variable "name" {
description = "Name of the VPC and EKS Cluster"
default = "spark-operator-doeks"
default = "spark-streaming-doeks"
type = string
}

Expand All @@ -12,7 +12,7 @@ variable "region" {

variable "eks_cluster_version" {
description = "EKS Cluster version"
default = "1.28"
default = "1.29"
type = string
}

Expand Down
2 changes: 1 addition & 1 deletion streaming/spark-streaming/terraform/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ terraform {
# backend "s3" {
# bucket = "doeks-github-actions-e2e-test-state"
# region = "us-west-2"
# key = "e2e/spark-k8s-operator/terraform.tfstate"
# key = "e2e/spark-streaming-doeks/terraform.tfstate"
# }
}

0 comments on commit e5a2ef2

Please sign in to comment.