From 04edd533e5014557a548b48ffbb0fe79bed6a2a8 Mon Sep 17 00:00:00 2001 From: krassowski <5832902+krassowski@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:38:42 +0000 Subject: [PATCH 1/9] Add "Open VS Code" entry in services --- .../services/jupyterhub/files/jupyterlab/overrides.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterlab/overrides.json b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterlab/overrides.json index 357ab1af0c..fd6cafc624 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterlab/overrides.json +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterlab/overrides.json @@ -84,6 +84,13 @@ "text": "Argo Workflows", "newBrowserTab": true } + }, + { + "command": "nebari:open-proxy", + "rank": 5, + "args": { + "name": "vscode" + } } ] }, From 529adc113c9e9e70af0469aef907210838844785 Mon Sep 17 00:00:00 2001 From: Marcelo Villa Date: Thu, 22 Feb 2024 14:46:31 -0500 Subject: [PATCH 2/9] Change nebari and nebari workflow controller image tags. --- src/_nebari/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/_nebari/constants.py b/src/_nebari/constants.py index 19ed5ce7e8..229d7957a4 100644 --- a/src/_nebari/constants.py +++ b/src/_nebari/constants.py @@ -12,8 +12,8 @@ DEFAULT_GKE_RELEASE_CHANNEL = "UNSPECIFIED" DEFAULT_NEBARI_DASK_VERSION = CURRENT_RELEASE -DEFAULT_NEBARI_IMAGE_TAG = CURRENT_RELEASE -DEFAULT_NEBARI_WORKFLOW_CONTROLLER_IMAGE_TAG = "2024.1.1" +DEFAULT_NEBARI_IMAGE_TAG = "2024.2.1rc2" +DEFAULT_NEBARI_WORKFLOW_CONTROLLER_IMAGE_TAG = "2024.2.1rc2" DEFAULT_CONDA_STORE_IMAGE_TAG = "2024.1.1" From a1e5fd1e7262d5193e9c34a55b1c26a8a6fcca8f Mon Sep 17 00:00:00 2001 From: Marcelo Villa Date: Thu, 22 Feb 2024 14:46:42 -0500 Subject: [PATCH 3/9] Add upgrade message for new version. --- src/_nebari/upgrade.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index ef933f48ea..17cb4ef9f0 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -705,6 +705,20 @@ def _version_specific_upgrade( return config +class Upgrade_2024_2_1(UpgradeStep): + version = "2024.2.1" + + def _version_specific_upgrade( + self, config, start_version, config_filename: Path, *args, **kwargs + ): + rich.print("\n ⚠️ Warning ⚠️") + rich.print( + "-> Please run the [green]rm -rf stages[/green] so that we can regenerate an updated set of Terraform scripts for your deployment." + ) + + return config + + __rounded_version__ = str(rounded_ver_parse(__version__)) # Manually-added upgrade steps must go above this line From 292df05ad9de616a4b316989a20737e8065cfccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Krassowski?= <5832902+krassowski@users.noreply.github.com> Date: Tue, 5 Mar 2024 14:50:34 +0000 Subject: [PATCH 4/9] Fix syntax error in jupyter-server-config Python file (#2286) --- .../kubernetes/services/jupyterhub/configmaps.tf | 10 ++++++++++ .../jupyter/jupyter_jupyterlab_pioneer_config.py.tpl | 2 +- .../files/jupyter/jupyter_server_config.py.tpl | 4 +--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/configmaps.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/configmaps.tf index 6fe6cd0e5e..4b8f9145b9 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/configmaps.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/configmaps.tf @@ -30,11 +30,21 @@ locals { resource "local_file" "jupyter_server_config_py" { content = local.jupyter-notebook-config-py-template filename = "${path.module}/files/jupyter/jupyter_server_config.py" + + provisioner "local-exec" { + # check the syntax of the config file without running it + command = "python -m py_compile ${self.filename}" + } } resource "local_file" "jupyter_jupyterlab_pioneer_config_py" { content = local.jupyter-pioneer-config-py-template filename = "${path.module}/files/jupyter/jupyter_jupyterlab_pioneer_config.py" + + provisioner "local-exec" { + # check the syntax of the config file without running it + command = "python -m py_compile ${self.filename}" + } } diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_jupyterlab_pioneer_config.py.tpl b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_jupyterlab_pioneer_config.py.tpl index 2149d298f8..66b653b894 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_jupyterlab_pioneer_config.py.tpl +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_jupyterlab_pioneer_config.py.tpl @@ -3,7 +3,7 @@ import json default_log_format = "%(asctime)s %(levelname)9s %(lineno)4s %(module)s: %(message)s" -log_format = ${log_format} +log_format = "${log_format}" logging.basicConfig( level=logging.INFO, diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_server_config.py.tpl b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_server_config.py.tpl index ab5117ee1b..d5e089dfa3 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_server_config.py.tpl +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_server_config.py.tpl @@ -4,14 +4,12 @@ # Extra config available at: # https://zero-to-jupyterhub.readthedocs.io/en/1.x/jupyterhub/customizing/user-management.html#culling-user-pods - # Enable Show Hidden Files menu option in View menu c.ContentsManager.allow_hidden = True c.FileContentsManager.allow_hidden = True # Set the preferred path for the frontend to start in -preferred_dir = ${jupyterlab_preferred_dir} -c.FileContentsManager.preferred_dir = preferred_dir if preferred_dir else None +c.FileContentsManager.preferred_dir = "${jupyterlab_preferred_dir}" # Timeout (in seconds) in which a terminal has been inactive and ready to # be culled. From 3bd47cb11b6ea440ee34a604305cfb77f9190b01 Mon Sep 17 00:00:00 2001 From: Marcelo Villa Date: Tue, 5 Mar 2024 15:10:06 -0500 Subject: [PATCH 5/9] Update version and warning message. --- src/_nebari/upgrade.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 17cb4ef9f0..ec0b1227ad 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -705,15 +705,15 @@ def _version_specific_upgrade( return config -class Upgrade_2024_2_1(UpgradeStep): - version = "2024.2.1" +class Upgrade_2024_3_1(UpgradeStep): + version = "2024.3.1" def _version_specific_upgrade( self, config, start_version, config_filename: Path, *args, **kwargs ): rich.print("\n ⚠️ Warning ⚠️") rich.print( - "-> Please run the [green]rm -rf stages[/green] so that we can regenerate an updated set of Terraform scripts for your deployment." + "-> Please run [green]mv stages stages.old[/green] so that we can keep a backup of the original Terraform scripts and regenerate an updated set of them for your deployment." ) return config From 2abed564104f13447be063a4c56db93107fe00ea Mon Sep 17 00:00:00 2001 From: Marcelo Villa Date: Tue, 5 Mar 2024 17:12:13 -0500 Subject: [PATCH 6/9] Update upgrade message. --- src/_nebari/upgrade.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index ec0b1227ad..5c095f04a2 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -711,10 +711,7 @@ class Upgrade_2024_3_1(UpgradeStep): def _version_specific_upgrade( self, config, start_version, config_filename: Path, *args, **kwargs ): - rich.print("\n ⚠️ Warning ⚠️") - rich.print( - "-> Please run [green]mv stages stages.old[/green] so that we can keep a backup of the original Terraform scripts and regenerate an updated set of them for your deployment." - ) + rich.print("Ready to upgrade to Nebari version [green]2024.3.1[/green].") return config From 0210a47a1acbe4940e0f6ff18fb72f630e224230 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Fri, 8 Mar 2024 19:15:52 +0000 Subject: [PATCH 7/9] Add Grafana Loki integration (#2156) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vinicius D. Cerutti <51954708+viniciusdc@users.noreply.github.com> --- .cirun.yml | 4 +- .github/workflows/test_local_integration.yaml | 13 +- RELEASE.md | 2 + .../stages/kubernetes_services/__init__.py | 22 +++ .../services/monitoring/loki/main.tf | 103 ++++++++++++++ .../services/monitoring/loki/values_loki.yaml | 78 +++++++++++ .../monitoring/loki/values_minio.yaml | 1 + .../monitoring/loki/values_promtail.yaml | 1 + .../services/monitoring/loki/variables.tf | 84 ++++++++++++ .../services/monitoring/values.yaml | 6 + .../template/monitoring.tf | 11 ++ .../kubernetes_services/template/variables.tf | 24 ++++ tests/common/kube_api.py | 40 ++++++ .../tests_deployment/test_loki_deployment.py | 126 ++++++++++++++++++ .../min.happy.monitoring.overrides.yaml | 10 ++ 15 files changed, 521 insertions(+), 4 deletions(-) create mode 100644 src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/main.tf create mode 100644 src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_loki.yaml create mode 100644 src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_minio.yaml create mode 100644 src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_promtail.yaml create mode 100644 src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/variables.tf create mode 100644 tests/common/kube_api.py create mode 100644 tests/tests_deployment/test_loki_deployment.py create mode 100644 tests/tests_unit/cli_validate/min.happy.monitoring.overrides.yaml diff --git a/.cirun.yml b/.cirun.yml index bdabe6500b..dcc829bb8b 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -4,8 +4,8 @@ runners: - name: run-k8s-tests # Cloud Provider: AWS cloud: aws - # Instance Type has 4 vcpu, 16 GiB memory, Up to 5 Gbps Network Performance - instance_type: t3a.xlarge + # Instance Type has 8 vcpu, 32 GiB memory, Up to 5 Gbps Network Performance + instance_type: t3a.2xlarge # Custom AMI with docker/cypress/hub pre-installed machine_image: ami-0a388df278199ff52 # Region: Oregon diff --git a/.github/workflows/test_local_integration.yaml b/.github/workflows/test_local_integration.yaml index ac5ff87b46..05dec384b0 100644 --- a/.github/workflows/test_local_integration.yaml +++ b/.github/workflows/test_local_integration.yaml @@ -96,7 +96,6 @@ jobs: sed -i -E 's/(cpu_guarantee):\s+[0-9\.]+/\1: 0.25/g' "nebari-config.yaml" sed -i -E 's/(mem_guarantee):\s+[A-Za-z0-9\.]+/\1: 0.25G/g' "nebari-config.yaml" - # Change default JupyterLab theme cat >> nebari-config.yaml <<- EOM jupyterlab: @@ -105,6 +104,16 @@ jobs: theme: JupyterLab Dark EOM + # Change default value for minio persistence size + cat >> nebari-config.yaml <<- EOM + monitoring: + enabled: true + overrides: + minio: + persistence: + size: 1Gi + EOM + cat nebari-config.yaml - name: Deploy Nebari @@ -115,7 +124,7 @@ jobs: - name: Basic kubectl checks after deployment if: always() run: | - kubectl get all,cm,secret,ing -A + kubectl get all,cm,secret,pv,pvc,ing -A - name: Check github-actions.nebari.dev resolves run: | diff --git a/RELEASE.md b/RELEASE.md index f3f93499ae..076754b3a6 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -11,6 +11,8 @@ This file is copied to nebari-dev/nebari-docs using a GitHub Action. --> ## Upcoming Release +* Added Grafana Loki to aggregate, index and search logs + ## Release 2024.1.1 - January 17, 2024 ### Feature changes and enhancements diff --git a/src/_nebari/stages/kubernetes_services/__init__.py b/src/_nebari/stages/kubernetes_services/__init__.py index 0702a27c52..a9124f41ac 100644 --- a/src/_nebari/stages/kubernetes_services/__init__.py +++ b/src/_nebari/stages/kubernetes_services/__init__.py @@ -199,8 +199,16 @@ class JHubApps(schema.Base): enabled: bool = False +class MonitoringOverrides(schema.Base): + loki: typing.Dict = {} + promtail: typing.Dict = {} + minio: typing.Dict = {} + + class Monitoring(schema.Base): enabled: bool = True + overrides: MonitoringOverrides = MonitoringOverrides() + minio_enabled: bool = True class JupyterLabPioneer(schema.Base): @@ -381,6 +389,12 @@ class DaskGatewayInputVars(schema.Base): class MonitoringInputVars(schema.Base): monitoring_enabled: bool = Field(alias="monitoring-enabled") + minio_enabled: bool = Field(alias="minio-enabled") + grafana_loki_overrides: List[str] = Field(alias="grafana-loki-overrides") + grafana_promtail_overrides: List[str] = Field(alias="grafana-promtail-overrides") + grafana_loki_minio_overrides: List[str] = Field( + alias="grafana-loki-minio-overrides" + ) class TelemetryInputVars(schema.Base): @@ -524,6 +538,14 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]): monitoring_vars = MonitoringInputVars( monitoring_enabled=self.config.monitoring.enabled, + minio_enabled=self.config.monitoring.minio_enabled, + grafana_loki_overrides=[json.dumps(self.config.monitoring.overrides.loki)], + grafana_promtail_overrides=[ + json.dumps(self.config.monitoring.overrides.promtail) + ], + grafana_loki_minio_overrides=[ + json.dumps(self.config.monitoring.overrides.minio) + ], ) telemetry_vars = TelemetryInputVars( diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/main.tf new file mode 100644 index 0000000000..8180d46fb8 --- /dev/null +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/main.tf @@ -0,0 +1,103 @@ +resource "random_password" "minio_root_password" { + length = 32 + special = false +} + +locals { + minio-url = "http://${var.minio-release-name}:${var.minio-port}" + node-selector = { + "${var.node-group.key}" = "${var.node-group.value}" + } +} + +resource "helm_release" "loki-minio" { + count = var.minio-enabled ? 1 : 0 + name = var.minio-release-name + namespace = var.namespace + repository = "https://raw.githubusercontent.com/bitnami/charts/defb094c658024e4aa8245622dab202874880cbc/bitnami" + chart = "minio" + # last release that was Apache-2.0 + version = var.minio-helm-chart-version + + set { + name = "accessKey.password" + value = "admin" + } + + set { + name = "secretKey.password" + value = random_password.minio_root_password.result + } + + set { + name = "defaultBuckets" + value = join(" ", var.buckets) + } + + set { + name = "persistence.size" + value = var.minio-storage + } + + values = concat([ + file("${path.module}/values_minio.yaml"), + jsonencode({ + nodeSelector : local.node-selector + }) + ], var.grafana-loki-minio-overrides) +} + + +resource "helm_release" "grafana-loki" { + name = "nebari-loki" + namespace = var.namespace + repository = "https://grafana.github.io/helm-charts" + chart = "loki" + version = var.loki-helm-chart-version + + values = concat([ + file("${path.module}/values_loki.yaml"), + jsonencode({ + loki : { + storage : { + s3 : { + endpoint : local.minio-url, + accessKeyId : "admin" + secretAccessKey : random_password.minio_root_password.result, + s3ForcePathStyle : true + } + } + } + storageConfig : { + # We configure MinIO by using the AWS config because MinIO implements the S3 API + aws : { + s3 : local.minio-url + s3ForcePathStyle : true + } + } + write : { nodeSelector : local.node-selector } + read : { nodeSelector : local.node-selector } + backend : { nodeSelector : local.node-selector } + gateway : { nodeSelector : local.node-selector } + }) + ], var.grafana-loki-overrides) + + depends_on = [helm_release.loki-minio] +} + +resource "helm_release" "grafana-promtail" { + # Promtail ships the contents of logs to Loki instance + name = "nebari-promtail" + namespace = var.namespace + repository = "https://grafana.github.io/helm-charts" + chart = "promtail" + version = var.promtail-helm-chart-version + + values = concat([ + file("${path.module}/values_promtail.yaml"), + jsonencode({ + }) + ], var.grafana-promtail-overrides) + + depends_on = [helm_release.grafana-loki] +} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_loki.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_loki.yaml new file mode 100644 index 0000000000..c11ebe5d1f --- /dev/null +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_loki.yaml @@ -0,0 +1,78 @@ +# https://github.com/grafana/loki/blob/4cae003ecedd474e4c15feab4ea2ef435afff83f/production/helm/loki/values.yaml + +loki: + storage: + type: s3 + commonConfig: + replication_factor: 1 + # Not required as it is inside cluster and not exposed to the public network + auth_enabled: false + + # The Compactor deduplicates index entries and also apply granular retention. + compactor: + # is the directory where marked chunks and temporary tables will be saved. + working_directory: /var/loki/compactor/data/retention + # minio s3 + shared_store: s3 + # how often compaction will happen + compaction_interval: 1h + # should delete old logs after retention delete delay + # ideally we would want to do storage based retention, but this is not + # currently implemented in loki, that's why we're doing time based retention. + retention_enabled: true + # is the delay after which the Compactor will delete marked chunks. + retention_delete_delay: 1h + # specifies the maximum quantity of goroutine workers instantiated to delete chunks. + retention_delete_worker_count: 150 + + limits_config: + # The minimum retention period is 24h. + # This is reasonable in most cases, but if people would like to retain logs for longer + # then they can override this variable from nebari-config.yaml + retention_period: 60d + + schema_config: + configs: + # list of period_configs + # The date of the first day that index buckets should be created. + - from: "2024-03-01" + index: + period: 24h + prefix: loki_index_ + object_store: s3 + schema: v11 + store: boltdb-shipper + storage_config: + boltdb_shipper: + # Directory where ingesters would write index files which would then be + # uploaded by shipper to configured storage + active_index_directory: /var/loki/compactor/data/index + # Cache location for restoring index files from storage for queries + cache_location: /var/loki/compactor/data/boltdb-cache + # Shared store for keeping index files + shared_store: s3 + +# Configuration for the write pod(s) +write: + # -- Number of replicas for the write + # Keeping cost of running Nebari in mind + # We don't need so many replicas, if people need it + # they can always override from nebari-config.yaml + replicas: 1 + +read: + # -- Number of replicas for the read + replicas: 1 + +backend: + # -- Number of replicas for the backend + replicas: 1 + +minio: + # We are deploying minio from bitnami chart separately + enabled: false + +monitoring: + selfMonitoring: + grafanaAgent: + installOperator: false diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_minio.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_minio.yaml new file mode 100644 index 0000000000..666542bb45 --- /dev/null +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_minio.yaml @@ -0,0 +1 @@ +# https://github.com/bitnami/charts/blob/440ec159c26e4ff0748b9e9866b345d98220c40a/bitnami/minio/values.yaml diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_promtail.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_promtail.yaml new file mode 100644 index 0000000000..5a18a9bc09 --- /dev/null +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/values_promtail.yaml @@ -0,0 +1 @@ +# https://github.com/grafana/helm-charts/blob/3831194ba2abd2a0ca7a14ca00e578f8e9d2abc6/charts/promtail/values.yaml diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/variables.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/variables.tf new file mode 100644 index 0000000000..a43695252c --- /dev/null +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/loki/variables.tf @@ -0,0 +1,84 @@ +variable "namespace" { + description = "deploy monitoring services on this namespace" + type = string + default = "dev" +} + +variable "loki-helm-chart-version" { + description = "version to deploy for the loki helm chart" + type = string + default = "5.43.3" +} + +variable "promtail-helm-chart-version" { + description = "version to deploy for the promtail helm chart" + type = string + default = "6.15.5" +} + +variable "minio-helm-chart-version" { + description = "version to deploy for the minio helm chart" + type = string + default = "6.7.4" +} + +variable "grafana-loki-overrides" { + description = "Grafana Loki helm chart overrides" + type = list(string) + default = [] +} + +variable "grafana-promtail-overrides" { + description = "Grafana Promtail helm chart overrides" + type = list(string) + default = [] +} + +variable "grafana-loki-minio-overrides" { + description = "Grafana Loki minio helm chart overrides" + type = list(string) + default = [] +} + +variable "minio-release-name" { + description = "Grafana Loki minio release name" + type = string + default = "nebari-loki-minio" +} + +variable "minio-port" { + description = "Grafana Loki minio port" + type = number + default = 9000 +} + +variable "buckets" { + description = "Minio buckets" + type = list(string) + default = [ + "chunks", + "ruler", + "admin", + "loki" + ] +} + +variable "minio-storage" { + description = "Minio storage" + type = string + default = "50Gi" +} + +variable "minio-enabled" { + description = "Deploy minio along with loki or not" + type = bool + default = true +} + +variable "node-group" { + description = "Node key value pair for bound resources" + type = object({ + key = string + value = string + }) +} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/values.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/values.yaml index ada868882f..f3cf47c88d 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/values.yaml +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/monitoring/values.yaml @@ -1 +1,7 @@ # https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml + +grafana: + additionalDataSources: + - name: Loki + type: loki + url: http://loki-gateway.dev diff --git a/src/_nebari/stages/kubernetes_services/template/monitoring.tf b/src/_nebari/stages/kubernetes_services/template/monitoring.tf index ec20a75ba7..39487c4bb1 100644 --- a/src/_nebari/stages/kubernetes_services/template/monitoring.tf +++ b/src/_nebari/stages/kubernetes_services/template/monitoring.tf @@ -14,3 +14,14 @@ module "monitoring" { node-group = var.node_groups.general } + +module "grafana-loki" { + count = var.monitoring-enabled ? 1 : 0 + source = "./modules/kubernetes/services/monitoring/loki" + namespace = var.environment + grafana-loki-overrides = var.grafana-loki-overrides + grafana-promtail-overrides = var.grafana-promtail-overrides + grafana-loki-minio-overrides = var.grafana-loki-minio-overrides + node-group = var.node_groups.general + minio-enabled = var.minio-enabled +} diff --git a/src/_nebari/stages/kubernetes_services/template/variables.tf b/src/_nebari/stages/kubernetes_services/template/variables.tf index 4b78f5994e..9e36e65979 100644 --- a/src/_nebari/stages/kubernetes_services/template/variables.tf +++ b/src/_nebari/stages/kubernetes_services/template/variables.tf @@ -63,3 +63,27 @@ variable "cloud-provider" { description = "Name of cloud provider." type = string } + +variable "grafana-loki-overrides" { + description = "Helm chart overrides for loki" + type = list(string) + default = [] +} + +variable "grafana-promtail-overrides" { + description = "Helm chart overrides for promtail" + type = list(string) + default = [] +} + +variable "grafana-loki-minio-overrides" { + description = "Grafana Loki minio helm chart overrides" + type = list(string) + default = [] +} + +variable "minio-enabled" { + description = "Deploy minio along with loki or not" + type = bool + default = true +} diff --git a/tests/common/kube_api.py b/tests/common/kube_api.py new file mode 100644 index 0000000000..eec1d05d7b --- /dev/null +++ b/tests/common/kube_api.py @@ -0,0 +1,40 @@ +import socket +import typing + +from kubernetes import config +from kubernetes.client.api import core_v1_api +from kubernetes.client.models import V1Pod +from kubernetes.stream import portforward + + +def kubernetes_port_forward( + pod_labels: typing.Dict[str, str], port: int, namespace: str = "dev" +) -> V1Pod: + """Given pod labels and port, finds the pod name and port forwards to + the given port. + :param pod_labels: dict of labels, by which to search the pod + :param port: port number to forward + :param namespace: kubernetes namespace name + :return: kubernetes pod object + """ + config.load_kube_config() + core_v1 = core_v1_api.CoreV1Api() + label_selector = ",".join([f"{k}={v}" for k, v in pod_labels.items()]) + pods = core_v1.list_namespaced_pod( + namespace=namespace, label_selector=label_selector + ) + assert pods.items + pod = pods.items[0] + pod_name = pod.metadata.name + + def kubernetes_create_connection(address, *args, **kwargs): + pf = portforward( + core_v1.connect_get_namespaced_pod_portforward, + pod_name, + namespace, + ports=str(port), + ) + return pf.socket(port) + + socket.create_connection = kubernetes_create_connection + return pod diff --git a/tests/tests_deployment/test_loki_deployment.py b/tests/tests_deployment/test_loki_deployment.py new file mode 100644 index 0000000000..59210a8fc3 --- /dev/null +++ b/tests/tests_deployment/test_loki_deployment.py @@ -0,0 +1,126 @@ +import json +import urllib.parse +import urllib.request as urllib_request + +import pytest +from kubernetes.client import V1Pod + +from tests.common.kube_api import kubernetes_port_forward + +LOKI_BACKEND_PORT = 3100 +LOKI_BACKEND_POD_LABELS = { + "app.kubernetes.io/instance": "nebari-loki", + "app.kubernetes.io/component": "backend", +} + +MINIO_PORT = 9000 +MINIO_POD_LABELS = { + "app.kubernetes.io/instance": "nebari-loki-minio", + "app.kubernetes.io/name": "minio", +} + +LOKI_GATEWAY_PORT = 8080 +LOKI_GATEWAY_POD_LABELS = { + "app.kubernetes.io/instance": "nebari-loki", + "app.kubernetes.io/component": "gateway", +} + + +@pytest.fixture(scope="module") +def port_forward_fixture(request): + """Pytest fixture to port forward loki backend pod to make it accessible + on localhost so that we can run some tests on it. + """ + return kubernetes_port_forward( + pod_labels=request.param["labels"], port=request.param["port"] + ) + + +def port_forward(labels, port): + params = {"labels": labels, "port": port} + return pytest.mark.parametrize("port_forward_fixture", [params], indirect=True) + + +@pytest.mark.parametrize( + "endpoint_path", + ( + "metrics", + "services", + "config", + "ready", + "log_level", + ), +) +@port_forward(labels=LOKI_BACKEND_POD_LABELS, port=LOKI_BACKEND_PORT) +def test_loki_endpoint(endpoint_path: str, port_forward_fixture: V1Pod): + """This will hit some endpoints in the loki API and verify that we + get a 200 status code, to make sure Loki is working properly. + :param endpoint_path: a loki api endpoint path + :param port_forward_fixture: pytest fixture to port forward. + :return: + """ + pod_name = port_forward_fixture.metadata.name + url = f"http://{pod_name}.pod.dev.kubernetes:{LOKI_BACKEND_PORT}/{endpoint_path}" + response = urllib_request.urlopen(url) + response.read().decode("utf-8") + assert response.code == 200 + response.close() + + +@port_forward(labels=MINIO_POD_LABELS, port=MINIO_PORT) +def test_minio_accessible(port_forward_fixture: V1Pod): + """This will hit liveness endpoint of minio API and verify that we + get a 200 status code, to make sure minio is up and running. + :param port_forward_fixture: pytest fixture to port forward. + :return: + """ + pod_name = port_forward_fixture.metadata.name + url = f"http://{pod_name}.pod.dev.kubernetes:{MINIO_PORT}/minio/health/live" + response = urllib_request.urlopen(url) + response.read().decode("utf-8") + assert response.code == 200 + response.close() + + +@port_forward(labels=LOKI_GATEWAY_POD_LABELS, port=LOKI_GATEWAY_PORT) +def test_loki_gateway(port_forward_fixture: V1Pod): + """This will hit an endpoint of loki gateway API and verify that we + get a 200 status code, to make sure minio is up and running. + :param port_forward_fixture: pytest fixture to port forward. + :return: + """ + pod_name = port_forward_fixture.metadata.name + url = f"http://{pod_name}.pod.dev.kubernetes:{LOKI_BACKEND_PORT}/loki/api/v1/labels" + response = urllib_request.urlopen(url) + response_content = response.read().decode("utf-8") + response_json = json.loads(response_content) + assert response.code == 200 + assert response_json["status"] == "success" + response.close() + + +@port_forward(labels=LOKI_GATEWAY_POD_LABELS, port=LOKI_GATEWAY_PORT) +def test_loki_gateway_fetch_logs(port_forward_fixture: V1Pod): + """This will hit an endpoint of loki gateway API to fetch some logs + and verify logs received. + :param port_forward_fixture: pytest fixture to port forward. + :return: None + """ + pod_name = port_forward_fixture.metadata.name + query_params = { + "limit": "5", + # Fetch logs for jupyterhub app + "query": '{app="jupyterhub"}', + } + + encoded_params = urllib.parse.urlencode(query_params) + path = f"/loki/api/v1/query_range?{encoded_params}" + url = f"http://{pod_name}.pod.dev.kubernetes:{LOKI_BACKEND_PORT}/{path}" + response = urllib_request.urlopen(url) + response_content = response.read().decode("utf-8") + response_json = json.loads(response_content) + assert response.code == 200 + assert response_json["status"] == "success" + # Make sure log lines received + assert len(response_json["data"]["result"][0]["values"]) > 0 + response.close() diff --git a/tests/tests_unit/cli_validate/min.happy.monitoring.overrides.yaml b/tests/tests_unit/cli_validate/min.happy.monitoring.overrides.yaml new file mode 100644 index 0000000000..587c0cf5cb --- /dev/null +++ b/tests/tests_unit/cli_validate/min.happy.monitoring.overrides.yaml @@ -0,0 +1,10 @@ +project_name: test +monitoring: + enabled: true + overrides: + loki: + loki: foobar + promtail: + promtail: foobar + minio: + minio: foobar From 930707029cb12432f63a7f6206c41b0e7ae112ae Mon Sep 17 00:00:00 2001 From: Marcelo Villa Date: Mon, 11 Mar 2024 08:50:55 -0500 Subject: [PATCH 8/9] Update curretn version and image tags. --- src/_nebari/constants.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/_nebari/constants.py b/src/_nebari/constants.py index 229d7957a4..0d49bc6e1b 100644 --- a/src/_nebari/constants.py +++ b/src/_nebari/constants.py @@ -1,4 +1,4 @@ -CURRENT_RELEASE = "2024.1.1" +CURRENT_RELEASE = "2024.3.1" # NOTE: Terraform cannot be upgraded further due to Hashicorp licensing changes # implemented in August 2023. @@ -12,8 +12,8 @@ DEFAULT_GKE_RELEASE_CHANNEL = "UNSPECIFIED" DEFAULT_NEBARI_DASK_VERSION = CURRENT_RELEASE -DEFAULT_NEBARI_IMAGE_TAG = "2024.2.1rc2" -DEFAULT_NEBARI_WORKFLOW_CONTROLLER_IMAGE_TAG = "2024.2.1rc2" +DEFAULT_NEBARI_IMAGE_TAG = CURRENT_RELEASE +DEFAULT_NEBARI_WORKFLOW_CONTROLLER_IMAGE_TAG = CURRENT_RELEASE DEFAULT_CONDA_STORE_IMAGE_TAG = "2024.1.1" From 47fa44d62fdfe7e721de844d427f6648b09d51b2 Mon Sep 17 00:00:00 2001 From: Marcelo Villa Date: Mon, 11 Mar 2024 12:23:24 -0500 Subject: [PATCH 9/9] Add release notes. --- RELEASE.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 076754b3a6..41433e9e13 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -11,7 +11,28 @@ This file is copied to nebari-dev/nebari-docs using a GitHub Action. --> ## Upcoming Release -* Added Grafana Loki to aggregate, index and search logs +## Release 2024.3.1 - March 11, 2024 + +### What's Changed +* Modify Playwright test to account for changes in JupyterLab UI. by @marcelovilla in https://github.com/nebari-dev/nebari/pull/2232 +* Add favicon to jupyterhub theme. by @jbouder in https://github.com/nebari-dev/nebari/pull/2222 +* Set min nodes to 0 for worker and user. by @pt247 in https://github.com/nebari-dev/nebari/pull/2168 +* Remove `jhub-client` from pyproject.toml by @pavithraes in https://github.com/nebari-dev/nebari/pull/2242 +* Include permission validation step to programmatically cloned repos by @viniciusdc in https://github.com/nebari-dev/nebari/pull/2258 +* Expose jupyter's preferred dir as a config option by @krassowski in https://github.com/nebari-dev/nebari/pull/2251 +* Allow to configure default settings for JupyterLab (`overrides.json`) by @krassowski in https://github.com/nebari-dev/nebari/pull/2249 +* Feature/jlab menu customization by @marcelovilla in https://github.com/nebari-dev/nebari/pull/2259 +* Add cloud provider to the dask config.json file by @marcelovilla in https://github.com/nebari-dev/nebari/pull/2266 +* Fix syntax error in jupyter-server-config Python file by @krassowski in https://github.com/nebari-dev/nebari/pull/2286 +* Add "Open VS Code" entry in services by @krassowski in https://github.com/nebari-dev/nebari/pull/2267 +* Add Grafana Loki integration by @aktech in https://github.com/nebari-dev/nebari/pull/2156 + +### New Contributors +* @jbouder made their first contribution in https://github.com/nebari-dev/nebari/pull/2222 +* @krassowski made their first contribution in https://github.com/nebari-dev/nebari/pull/2251 + +**Full Changelog**: https://github.com/nebari-dev/nebari/compare/2024.1.1...2024.3.1 + ## Release 2024.1.1 - January 17, 2024