diff --git a/README.md b/README.md index 644fe81..ba52de9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # oke-flink -Deploy Flink Operator on a Kubernetes cluster on Oracle Cloud Infrastructure. +Deploy a Kubernetes cluster on Oracle Cloud Infrastructure with multiple node pools and add-ons like Apache Flink. [![Deploy to Oracle Cloud][magic_button]][magic_oke_flink_stack] @@ -15,7 +15,8 @@ The OKE cluster template features the following: - Option to use Secrets encryption. - Option to enable Image Validation and Pod Admission Controllers. - Option to install metrics server (required by cluster auto-scaler) -- Opton to install cert-manager (required by Flink Operator) +- Option to install cert-manager (required by Flink Operator) +- Option to install a monitoring stack based on Prometheus and Grafana ## Getting started with Apache Flink Operator @@ -64,6 +65,8 @@ spec: # high-availability.storageDir: s3:///ha rest.flamegraph.enabled: "true" restart-strategy: exponential-delay + metrics.reporters: prom + metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory serviceAccount: flink podTemplate: apiVersion: v1 @@ -130,6 +133,19 @@ spec: upgradeMode: stateless # Use savepoint if state management is configuered. `last-state` is not supported. ``` +## Send Flink metrics to Prometheus + +To send Flink metrics to Prometheus, some specific configuration is needed in the Flink deployment. + +Make sure to add the following to you Flink Session or Application deployment: + +```yaml +spec: + flinkConfiguration: + metrics.reporters: prom + metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory +``` + ## Use the Terraform template To use the Terraform template locally, configure the OCI Command Line Interface with a Private/Public key pair added to your user. diff --git a/add_on_helm_flink.tf b/add_on_helm_flink.tf index 07af728..0b90443 100644 --- a/add_on_helm_flink.tf +++ b/add_on_helm_flink.tf @@ -4,7 +4,7 @@ resource "helm_release" "flink_operator" { count = var.enable_flink ? 1 : 0 name = "flink-operator" - repository = "https://downloads.apache.org/flink/flink-kubernetes-operator-1.3.1/" + repository = "https://downloads.apache.org/flink/flink-kubernetes-operator-1.4.0/" chart = "flink-kubernetes-operator" namespace = "flink" create_namespace = true diff --git a/add_on_monitoring_stack.tf b/add_on_monitoring_stack.tf index 1fff235..0c2f238 100644 --- a/add_on_monitoring_stack.tf +++ b/add_on_monitoring_stack.tf @@ -10,10 +10,6 @@ locals { grafana_plugins = file("${path.module}/templates/grafana.plugins.yaml") } -output dash { - value = local.grafana_dashboards -} - resource "random_password" "grafana_password" { count = local.enable_monitoring_stack ? 1 : 0 length = 20 diff --git a/examples/flink-basic-example.yaml b/examples/flink-basic-example.yaml new file mode 100644 index 0000000..a26b0dc --- /dev/null +++ b/examples/flink-basic-example.yaml @@ -0,0 +1,42 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +apiVersion: flink.apache.org/v1beta1 +kind: FlinkDeployment +metadata: + name: basic-example +spec: + image: flink:1.15 + flinkVersion: v1_15 + flinkConfiguration: + taskmanager.numberOfTaskSlots: "2" + metrics.reporters: prom + metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory + serviceAccount: flink + jobManager: + resource: + memory: "2048m" + cpu: 1 + taskManager: + resource: + memory: "2048m" + cpu: 1 + job: + jarURI: local:///opt/flink/examples/streaming/StateMachineExample.jar + parallelism: 2 + upgradeMode: stateless \ No newline at end of file diff --git a/schema.yaml b/schema.yaml index 8ceb52c..f6e4e45 100644 --- a/schema.yaml +++ b/schema.yaml @@ -410,6 +410,7 @@ variables: dependsOn: compartmentId: cluster_compartment_id required: true + default: 4 visible: and: - ge: @@ -442,6 +443,7 @@ variables: dependsOn: compartmentId: cluster_compartment_id required: true + default: 64 visible: and: - ge: @@ -502,7 +504,7 @@ variables: type: number minimum: 1 maximum: 256 - default: 3 + default: 6 title: Maximum Number of Nodes description: Maximum number of nodes the pool can scale to. required: true @@ -700,7 +702,7 @@ variables: type: number minimum: 1 maximum: 256 - default: 3 + default: 6 title: Maximum Number of Nodes description: Maximum number of nodes the pool can scale to. required: true @@ -890,7 +892,7 @@ variables: type: number minimum: 1 maximum: 256 - default: 3 + default: 6 title: Maximum Number of Nodes description: Maximum number of nodes the pool can scale to. required: true diff --git a/templates/prometheus_flink.scrapeConfigs.yaml b/templates/prometheus_flink.scrapeConfigs.yaml index b5e91e0..785a1cb 100644 --- a/templates/prometheus_flink.scrapeConfigs.yaml +++ b/templates/prometheus_flink.scrapeConfigs.yaml @@ -3,13 +3,13 @@ scrape_interval: 3s kubernetes_sd_configs: - role: pod - namespaces: - names: - - flink relabel_configs: - source_labels: [__meta_kubernetes_pod_label_component] action: keep regex: '(job|task)manager' + - source_labels: [__meta_kubernetes_namespace] + action: keep + regex: flink - source_labels: [__meta_kubernetes_pod_ip] action: replace target_label: __address__ diff --git a/variables.tf b/variables.tf index 23d5867..19fda8b 100644 --- a/variables.tf +++ b/variables.tf @@ -91,11 +91,11 @@ variable "np1_node_shape" { } variable "np1_ocpus" { - default = 1 + default = 4 } variable "np1_memory_gb" { - default = 4 + default = 64 } variable "np1_image_id" { @@ -147,11 +147,11 @@ variable "np2_node_shape" { } variable "np2_ocpus" { - default = 1 + default = 4 } variable "np2_memory_gb" { - default = 4 + default = 64 } variable "np2_image_id" { @@ -203,11 +203,11 @@ variable "np3_node_shape" { } variable "np3_ocpus" { - default = 1 + default = 4 } variable "np3_memory_gb" { - default = 4 + default = 64 } variable "np3_image_id" {