From 1ebc0f1e71a2c6f993d969c3c654fc9d6e0fe196 Mon Sep 17 00:00:00 2001 From: Markus Date: Thu, 11 Jul 2024 07:33:21 +0300 Subject: [PATCH 01/25] take off filesystem metric check from system and related alert --- infrastructure/monitoring/beats/metricbeat.yml | 16 ++++++++-------- .../monitoring/elastalert/rules/alert.yaml | 6 +++--- infrastructure/monitoring/kibana/config.ndjson | 1 - 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/infrastructure/monitoring/beats/metricbeat.yml b/infrastructure/monitoring/beats/metricbeat.yml index cb4875457..520ed80bd 100644 --- a/infrastructure/monitoring/beats/metricbeat.yml +++ b/infrastructure/monitoring/beats/metricbeat.yml @@ -38,14 +38,14 @@ metricbeat.modules: cpu.metrics: ['percentages'] core.metrics: ['percentages'] - - module: system - period: 1m - metricsets: - - filesystem - - fsstat - processors: - - drop_event.when.regexp: - system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)' + # - module: system + # period: 1m + # metricsets: + # - filesystem + # - fsstat + # processors: + # - drop_event.when.regexp: + # system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)' - module: system period: 15m diff --git a/infrastructure/monitoring/elastalert/rules/alert.yaml b/infrastructure/monitoring/elastalert/rules/alert.yaml index d5bfbcb50..58a74d374 100644 --- a/infrastructure/monitoring/elastalert/rules/alert.yaml +++ b/infrastructure/monitoring/elastalert/rules/alert.yaml @@ -30,9 +30,9 @@ filter: - term: rule.name.keyword: value: 'CPU under heavy load' - - term: - rule.name.keyword: - value: 'Low on available disk space' + # - term: + # rule.name.keyword: + # value: 'Low on available disk space' minimum_should_match: 1 alert: post2 diff --git a/infrastructure/monitoring/kibana/config.ndjson b/infrastructure/monitoring/kibana/config.ndjson index a8f47219d..6c26fe491 100644 --- a/infrastructure/monitoring/kibana/config.ndjson +++ b/infrastructure/monitoring/kibana/config.ndjson @@ -5,6 +5,5 @@ {"attributes":{"anomalyThreshold":50,"description":"","fields":{"container":"container.id","host":"host.name","message":["message","@message"],"pod":"kubernetes.pod.uid","tiebreaker":"_doc","timestamp":"@timestamp"},"inventoryDefaultView":"0","logColumns":[{"timestampColumn":{"id":"5e7f964a-be8a-40d8-88d2-fbcfbdca0e2f"}},{"fieldColumn":{"field":"event.dataset","id":" eb9777a8-fcd3-420e-ba7d-172fff6da7a2"}},{"messageColumn":{"id":"b645d6da-824b-4723-9a2a-e8cece1645c0"}}],"logIndices":{"indexName":"logs-*,filebeat-*,kibana_sample_data_logs*,logstash*","type":"index_name"},"metricAlias":"metrics-*,metricbeat-*","metricsExplorerDefaultView":"0","name":"Default"},"coreMigrationVersion":"7.17.0","id":"default","migrationVersion":{"infrastructure-ui-source":"7.16.2"},"references":[],"sort":[1707273006619,217714],"type":"infrastructure-ui-source","updated_at":"2024-02-07T02:30:06.619Z","version":"WzQ5MzAyNywxOV0="} {"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"query matched","params":{"documents":["{\"@timestamp\":\"2023-11-20T10:19:30.521Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"]}}],"alertTypeId":".es-query","apiKey":null,"apiKeyOwner":null,"consumer":"alerts","createdAt":"2023-11-20T09:12:19.237Z","createdBy":"elastic","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"Successful SSH login","notifyWhen":"onActionGroupChange","params":{"esQuery":"{ \"query\": { \"bool\": { \"must\": [ \n { \"term\": { \"log.file.path\": \"/var/log/auth.log\" } },\n { \"term\": { \"event.outcome\": \"success\" }}\n ] } } }","index":["filebeat-*"],"size":100,"threshold":[1],"thresholdComparator":">=","timeField":"@timestamp","timeWindowSize":1,"timeWindowUnit":"m"},"schedule":{"interval":"1m"},"scheduledTaskId":null,"tags":[],"throttle":null,"updatedAt":"2024-02-07T02:27:19.537Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"e79aaa90-8784-11ee-b9ba-89bbe73df7ff","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294457367,232778],"type":"alert","updated_at":"2024-02-07T08:27:37.367Z","version":"WzQ5NTQ0MCwxOV0="} {"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"metrics.inventory_threshold.fired","params":{"documents":["{\"@timestamp\":\"2022-06-20T06:16:33.414Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"]}}],"alertTypeId":"metrics.alert.inventory.threshold","apiKey":null,"apiKeyOwner":null,"consumer":"alerts","createdAt":"2022-05-31T10:10:47.084Z","createdBy":"opencrvs-admin","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"CPU under heavy load","notifyWhen":"onActionGroupChange","params":{"criteria":[{"comparator":">","customMetric":{"aggregation":"avg","field":"","id":"alert-custom-metric","type":"custom"},"metric":"cpu","threshold":[70],"timeSize":1,"timeUnit":"m"}],"nodeType":"host","sourceId":"default"},"schedule":{"interval":"1m"},"scheduledTaskId":null,"tags":["infra","opencrvs-builtin"],"throttle":null,"updatedAt":"2024-02-07T02:27:30.573Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"f022bee0-e0c9-11ec-99b8-dbfd54551fda","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294457362,232774],"type":"alert","updated_at":"2024-02-07T08:27:37.362Z","version":"WzQ5NTQzOCwxOV0="} -{"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"metrics.inventory_threshold.fired","params":{"documents":["{\"@timestamp\":\"2023-11-17T13:17:52.791Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"]}}],"alertTypeId":"metrics.alert.inventory.threshold","apiKey":null,"apiKeyOwner":null,"consumer":"infrastructure","createdAt":"2022-05-31T10:10:47.080Z","createdBy":"opencrvs-admin","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"Low on available disk space","notifyWhen":"onActionGroupChange","params":{"alertOnNoData":true,"criteria":[{"comparator":">","customMetric":{"aggregation":"max","field":"system.filesystem.used.pct","id":"alert-custom-metric","label":"","type":"custom"},"metric":"custom","threshold":[0.7],"timeSize":1,"timeUnit":"h","warningComparator":">","warningThreshold":[0.5]}],"filterQuery":"{\"bool\":{\"should\":[{\"match_phrase\":{\"system.filesystem.device_name\":\"/dev/mapper/cryptfs\"}}],\"minimum_should_match\":1}}","filterQueryText":"system.filesystem.device_name : \"/dev/mapper/cryptfs\"","nodeType":"host","sourceId":"default"},"schedule":{"interval":"1h"},"scheduledTaskId":null,"tags":["infra","opencrvs-builtin"],"throttle":null,"updatedAt":"2024-02-07T02:27:20.542Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"f023d050-e0c9-11ec-99b8-dbfd54551fda","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294448366,232768],"type":"alert","updated_at":"2024-02-07T08:27:28.366Z","version":"WzQ5NTQzNSwxOV0="} {"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"threshold_met","params":{"documents":["{\"@timestamp\":\"2023-06-15T07:57:35.954Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"],"indexOverride":"kibana-alert-history-services"}}],"alertTypeId":"apm.error_rate","apiKey":null,"apiKeyOwner":null,"consumer":"alerts","createdAt":"2022-05-31T10:10:47.069Z","createdBy":"opencrvs-admin","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"Error in service","notifyWhen":"onActionGroupChange","params":{"environment":"ENVIRONMENT_ALL","threshold":1,"windowSize":1,"windowUnit":"m"},"schedule":{"interval":"1m"},"scheduledTaskId":null,"tags":[],"throttle":null,"updatedAt":"2024-02-07T02:27:21.551Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"f02b4a60-e0c9-11ec-99b8-dbfd54551fda","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294457374,232780],"type":"alert","updated_at":"2024-02-07T08:27:37.374Z","version":"WzQ5NTQ0MSwxOV0="} {"excludedObjects":[],"excludedObjectsCount":0,"exportedCount":9,"missingRefCount":0,"missingReferences":[]} \ No newline at end of file From 72c2b8ea4ee6a333f489147304aec1dc316e5001 Mon Sep 17 00:00:00 2001 From: Markus Date: Thu, 11 Jul 2024 10:55:57 +0300 Subject: [PATCH 02/25] take up docker module --- .../monitoring/beats/metricbeat.yml | 37 ++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/infrastructure/monitoring/beats/metricbeat.yml b/infrastructure/monitoring/beats/metricbeat.yml index 520ed80bd..98bf4ffdb 100644 --- a/infrastructure/monitoring/beats/metricbeat.yml +++ b/infrastructure/monitoring/beats/metricbeat.yml @@ -19,39 +19,10 @@ metricbeat.modules: #------------------------------- System Module ------------------------------- - module: system metricsets: - [ - 'cpu', - 'load', - 'memory', - 'network', - 'process', - 'process_summary', - 'core', - 'diskio', - 'socket' - ] - processes: ['.*'] - process.include_top_n: - by_cpu: 5 - by_memory: 5 - period: 10s - cpu.metrics: ['percentages'] - core.metrics: ['percentages'] - - # - module: system - # period: 1m - # metricsets: - # - filesystem - # - fsstat - # processors: - # - drop_event.when.regexp: - # system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)' - - - module: system - period: 15m - metricsets: - - uptime - + - filesystem + processors: + - drop_event.when.regexp: + system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)' #------------------------------- Docker Module ------------------------------- - module: docker metricsets: From 619c19bd663b2bc0c395a84dd8ef078aa7cc85ef Mon Sep 17 00:00:00 2001 From: Markus Date: Thu, 11 Jul 2024 16:37:10 +0300 Subject: [PATCH 03/25] Use kibana access token for connections --- infrastructure/docker-compose.deploy.yml | 6 ++++-- infrastructure/monitoring/kibana/kibana.yml | 2 +- infrastructure/monitoring/kibana/setup-config.sh | 15 ++++++++------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index 4d8a8473f..85ecfc2c5 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -77,6 +77,7 @@ services: - ELASTICSEARCH_HOST=elasticsearch:9200 - ELASTICSEARCH_USERNAME=elastic - ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_SUPERUSER_PASSWORD} + - KIBANA_ACCESS_TOKEN=${KIBANA_ACCESS_TOKEN} - KIBANA_HOST=kibana:5601 command: ['--strict.perms=false'] deploy: @@ -127,8 +128,8 @@ services: entrypoint: [ 'curl', - '-u', - 'elastic:${ELASTICSEARCH_SUPERUSER_PASSWORD}', + '-H', + 'Authorization: Bearer ${KIBANA_ACCESS_TOKEN}', '-X', 'POST', 'http://kibana:5601/api/saved_objects/_import?overwrite=true', @@ -175,6 +176,7 @@ services: environment: - ELASTICSEARCH_USERNAME=elastic - ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_SUPERUSER_PASSWORD} + - KIBANA_ACCESS_TOKEN=${KIBANA_ACCESS_TOKEN} configs: - source: kibana.{{ts}} target: /usr/share/kibana/config/kibana.yml diff --git a/infrastructure/monitoring/kibana/kibana.yml b/infrastructure/monitoring/kibana/kibana.yml index 90404ecab..1530fc336 100644 --- a/infrastructure/monitoring/kibana/kibana.yml +++ b/infrastructure/monitoring/kibana/kibana.yml @@ -64,7 +64,7 @@ xpack.infra.sources.default.logAlias: 'logs-*,filebeat-*,kibana_sample_data_logs # Kibana can also authenticate to Elasticsearch via "service account tokens". # Service account tokens are Bearer style tokens that replace the traditional username/password based configuration. # Use this token instead of a username/password. -# elasticsearch.serviceAccountToken: "my_token" +elasticsearch.serviceAccountToken: '{{KIBANA_ACCESS_TOKEN}}' # Time in milliseconds to wait for Elasticsearch to respond to pings. Defaults to the value of # the elasticsearch.requestTimeout setting. diff --git a/infrastructure/monitoring/kibana/setup-config.sh b/infrastructure/monitoring/kibana/setup-config.sh index a2f5cca78..34fc10bd3 100755 --- a/infrastructure/monitoring/kibana/setup-config.sh +++ b/infrastructure/monitoring/kibana/setup-config.sh @@ -15,7 +15,7 @@ kibana_alerting_api_url="http://kibana:5601/api/alerting/rules/_find?page=1&per_ docker_command="docker run --rm -v /opt/opencrvs/infrastructure/monitoring/kibana/config.ndjson:/config.ndjson --network=opencrvs_overlay_net curlimages/curl" # Initial API status check to ensure Kibana is ready -status_code=$($docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") +status_code=$($docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") if [ "$status_code" -ne 200 ]; then echo "Kibana is not ready. API returned status code: $status_code" @@ -23,15 +23,16 @@ if [ "$status_code" -ne 200 ]; then fi # Delete all alerts -$docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "http://kibana:5601/api/alerting/rule/$id" +$docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "http://kibana:5601/api/alerting/rule/$id" done # Import configuration -$docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null +$docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null # Re-enable all alerts -$docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "http://kibana:5601/api/alerting/rule/$id/_disable" - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "http://kibana:5601/api/alerting/rule/$id/_enable" +$docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "http://kibana:5601/api/alerting/rule/$id/_disable" + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "http://kibana:5601/api/alerting/rule/$id/_enable" done + From d02723285acdea5120dee041ef07f5f3b4a4fd2a Mon Sep 17 00:00:00 2001 From: Markus Date: Fri, 12 Jul 2024 10:08:28 +0300 Subject: [PATCH 04/25] use kibana_system account instead of elastic --- infrastructure/deployment/deploy.sh | 3 +++ infrastructure/docker-compose.deploy.yml | 11 +++++------ infrastructure/elasticsearch/setup-users.sh | 1 + infrastructure/monitoring/kibana/kibana.yml | 1 - infrastructure/monitoring/kibana/setup-config.sh | 15 +++++++-------- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/infrastructure/deployment/deploy.sh b/infrastructure/deployment/deploy.sh index 5d8725915..9fe757fad 100755 --- a/infrastructure/deployment/deploy.sh +++ b/infrastructure/deployment/deploy.sh @@ -331,6 +331,9 @@ export ROTATING_METRICBEAT_ELASTIC_PASSWORD=`generate_password` # Used by APM for writing data to ElasticSearch export ROTATING_APM_ELASTIC_PASSWORD=`generate_password` +# Used by Kibana for writing data to ElasticSearch +export ROTATING_KIBANA_ELASTIC_PASSWORD=`generate_password` + # Download core compose files to /tmp/ for compose_file in ${COMPOSE_FILES_DOWNLOADED_FROM_CORE[@]}; do if [ ! -f $compose_file ]; then diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index 85ecfc2c5..78580d51b 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -77,7 +77,6 @@ services: - ELASTICSEARCH_HOST=elasticsearch:9200 - ELASTICSEARCH_USERNAME=elastic - ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_SUPERUSER_PASSWORD} - - KIBANA_ACCESS_TOKEN=${KIBANA_ACCESS_TOKEN} - KIBANA_HOST=kibana:5601 command: ['--strict.perms=false'] deploy: @@ -128,8 +127,8 @@ services: entrypoint: [ 'curl', - '-H', - 'Authorization: Bearer ${KIBANA_ACCESS_TOKEN}', + '-u', + 'kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD}', '-X', 'POST', 'http://kibana:5601/api/saved_objects/_import?overwrite=true', @@ -174,9 +173,8 @@ services: networks: - overlay_net environment: - - ELASTICSEARCH_USERNAME=elastic - - ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_SUPERUSER_PASSWORD} - - KIBANA_ACCESS_TOKEN=${KIBANA_ACCESS_TOKEN} + - ELASTICSEARCH_USERNAME=kibana_system + - ELASTICSEARCH_PASSWORD=${ROTATING_KIBANA_ELASTIC_PASSWORD} configs: - source: kibana.{{ts}} target: /usr/share/kibana/config/kibana.yml @@ -367,6 +365,7 @@ services: - APM_ELASTIC_PASSWORD=${ROTATING_APM_ELASTIC_PASSWORD} - SEARCH_ELASTIC_USERNAME=search-user - SEARCH_ELASTIC_PASSWORD=${ROTATING_SEARCH_ELASTIC_PASSWORD} + - KIBANA_ELASTIC_PASSWORD=${ROTATING_KIBANA_ELASTIC_PASSWORD} - KIBANA_USERNAME=${KIBANA_USERNAME} - KIBANA_PASSWORD=${KIBANA_PASSWORD} volumes: diff --git a/infrastructure/elasticsearch/setup-users.sh b/infrastructure/elasticsearch/setup-users.sh index 366090da1..05f6aae0f 100755 --- a/infrastructure/elasticsearch/setup-users.sh +++ b/infrastructure/elasticsearch/setup-users.sh @@ -25,6 +25,7 @@ users_passwords=( [beats_system]="${METRICBEAT_ELASTIC_PASSWORD:-}" [apm_system]="${APM_ELASTIC_PASSWORD:-}" [$KIBANA_USERNAME]="${KIBANA_PASSWORD:-}" + [kibana_system]="${KIBANA_ELASTIC_PASSWORD:-}" ) # ------------------------------------- diff --git a/infrastructure/monitoring/kibana/kibana.yml b/infrastructure/monitoring/kibana/kibana.yml index 1530fc336..7b2f480c1 100644 --- a/infrastructure/monitoring/kibana/kibana.yml +++ b/infrastructure/monitoring/kibana/kibana.yml @@ -64,7 +64,6 @@ xpack.infra.sources.default.logAlias: 'logs-*,filebeat-*,kibana_sample_data_logs # Kibana can also authenticate to Elasticsearch via "service account tokens". # Service account tokens are Bearer style tokens that replace the traditional username/password based configuration. # Use this token instead of a username/password. -elasticsearch.serviceAccountToken: '{{KIBANA_ACCESS_TOKEN}}' # Time in milliseconds to wait for Elasticsearch to respond to pings. Defaults to the value of # the elasticsearch.requestTimeout setting. diff --git a/infrastructure/monitoring/kibana/setup-config.sh b/infrastructure/monitoring/kibana/setup-config.sh index 34fc10bd3..e6f8d864f 100755 --- a/infrastructure/monitoring/kibana/setup-config.sh +++ b/infrastructure/monitoring/kibana/setup-config.sh @@ -15,7 +15,7 @@ kibana_alerting_api_url="http://kibana:5601/api/alerting/rules/_find?page=1&per_ docker_command="docker run --rm -v /opt/opencrvs/infrastructure/monitoring/kibana/config.ndjson:/config.ndjson --network=opencrvs_overlay_net curlimages/curl" # Initial API status check to ensure Kibana is ready -status_code=$($docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") +status_code=$($docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") if [ "$status_code" -ne 200 ]; then echo "Kibana is not ready. API returned status code: $status_code" @@ -23,16 +23,15 @@ if [ "$status_code" -ne 200 ]; then fi # Delete all alerts -$docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "http://kibana:5601/api/alerting/rule/$id" +$docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "http://kibana:5601/api/alerting/rule/$id" done # Import configuration -$docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null +$docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null # Re-enable all alerts -$docker_command --connect-timeout 60 -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "http://kibana:5601/api/alerting/rule/$id/_disable" - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -H "Authorization: Bearer $KIBANA_ACCESS_TOKEN" "http://kibana:5601/api/alerting/rule/$id/_enable" +$docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_disable" + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_enable" done - From eabdf4623e3c26b1f8f5ad64f900e32ffa469b48 Mon Sep 17 00:00:00 2001 From: Markus Date: Fri, 12 Jul 2024 11:29:08 +0300 Subject: [PATCH 05/25] add kibana_system_password to top level env --- infrastructure/deployment/deploy.sh | 2 -- infrastructure/docker-compose.deploy.yml | 6 ++--- infrastructure/elasticsearch/setup-users.sh | 2 +- .../environments/setup-environment.ts | 24 +++++++++++++++++++ .../monitoring/kibana/setup-config.sh | 14 +++++------ 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/infrastructure/deployment/deploy.sh b/infrastructure/deployment/deploy.sh index 9fe757fad..e1e58fe10 100755 --- a/infrastructure/deployment/deploy.sh +++ b/infrastructure/deployment/deploy.sh @@ -331,8 +331,6 @@ export ROTATING_METRICBEAT_ELASTIC_PASSWORD=`generate_password` # Used by APM for writing data to ElasticSearch export ROTATING_APM_ELASTIC_PASSWORD=`generate_password` -# Used by Kibana for writing data to ElasticSearch -export ROTATING_KIBANA_ELASTIC_PASSWORD=`generate_password` # Download core compose files to /tmp/ for compose_file in ${COMPOSE_FILES_DOWNLOADED_FROM_CORE[@]}; do diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index 78580d51b..ef35f1763 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -128,7 +128,7 @@ services: [ 'curl', '-u', - 'kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD}', + 'kibana_system:${KIBANA_SYSTEM_PASSWORD}', '-X', 'POST', 'http://kibana:5601/api/saved_objects/_import?overwrite=true', @@ -174,7 +174,7 @@ services: - overlay_net environment: - ELASTICSEARCH_USERNAME=kibana_system - - ELASTICSEARCH_PASSWORD=${ROTATING_KIBANA_ELASTIC_PASSWORD} + - ELASTICSEARCH_PASSWORD=${KIBANA_SYSTEM_PASSWORD} configs: - source: kibana.{{ts}} target: /usr/share/kibana/config/kibana.yml @@ -365,7 +365,7 @@ services: - APM_ELASTIC_PASSWORD=${ROTATING_APM_ELASTIC_PASSWORD} - SEARCH_ELASTIC_USERNAME=search-user - SEARCH_ELASTIC_PASSWORD=${ROTATING_SEARCH_ELASTIC_PASSWORD} - - KIBANA_ELASTIC_PASSWORD=${ROTATING_KIBANA_ELASTIC_PASSWORD} + - KIBANA_SYSTEM_PASSWORD=${KIBANA_SYSTEM_PASSWORD} - KIBANA_USERNAME=${KIBANA_USERNAME} - KIBANA_PASSWORD=${KIBANA_PASSWORD} volumes: diff --git a/infrastructure/elasticsearch/setup-users.sh b/infrastructure/elasticsearch/setup-users.sh index 05f6aae0f..16c7534dd 100755 --- a/infrastructure/elasticsearch/setup-users.sh +++ b/infrastructure/elasticsearch/setup-users.sh @@ -25,7 +25,7 @@ users_passwords=( [beats_system]="${METRICBEAT_ELASTIC_PASSWORD:-}" [apm_system]="${APM_ELASTIC_PASSWORD:-}" [$KIBANA_USERNAME]="${KIBANA_PASSWORD:-}" - [kibana_system]="${KIBANA_ELASTIC_PASSWORD:-}" + [kibana_system]="${KIBANA_SYSTEM_PASSWORD:-}" ) # ------------------------------------- diff --git a/infrastructure/environments/setup-environment.ts b/infrastructure/environments/setup-environment.ts index 83a0642bf..aa5d3dd41 100644 --- a/infrastructure/environments/setup-environment.ts +++ b/infrastructure/environments/setup-environment.ts @@ -653,6 +653,13 @@ const derivedVariables = [ type: 'disabled', scope: 'ENVIRONMENT' }, + { + name: 'KIBANA_SYSTEM_PASSWORD', + valueLabel: 'KIBANA_SYSTEM_PASSWORD', + valueType: 'SECRET', + type: 'disabled', + scope: 'ENVIRONMENT' + }, { name: 'MINIO_ROOT_USER', valueLabel: 'MINIO_ROOT_USER', @@ -1103,6 +1110,23 @@ const SPECIAL_NON_APPLICATION_ENVIRONMENTS = ['jump', 'backup'] ), scope: 'ENVIRONMENT' as const }, + { + name: 'KIBANA_SYSTEM_PASSWORD', + type: 'SECRET' as const, + didExist: findExistingValue( + 'KIBANA_SYSTEM_PASSWORD', + 'SECRET', + 'ENVIRONMENT', + existingValues + ), + value: findExistingOrDefine( + 'KIBANA_SYSTEM_PASSWORD', + 'SECRET', + 'ENVIRONMENT', + generateLongPassword() + ), + scope: 'ENVIRONMENT' as const + }, { name: 'MINIO_ROOT_USER', type: 'SECRET' as const, diff --git a/infrastructure/monitoring/kibana/setup-config.sh b/infrastructure/monitoring/kibana/setup-config.sh index e6f8d864f..e3876ae68 100755 --- a/infrastructure/monitoring/kibana/setup-config.sh +++ b/infrastructure/monitoring/kibana/setup-config.sh @@ -15,7 +15,7 @@ kibana_alerting_api_url="http://kibana:5601/api/alerting/rules/_find?page=1&per_ docker_command="docker run --rm -v /opt/opencrvs/infrastructure/monitoring/kibana/config.ndjson:/config.ndjson --network=opencrvs_overlay_net curlimages/curl" # Initial API status check to ensure Kibana is ready -status_code=$($docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") +status_code=$($docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") if [ "$status_code" -ne 200 ]; then echo "Kibana is not ready. API returned status code: $status_code" @@ -23,15 +23,15 @@ if [ "$status_code" -ne 200 ]; then fi # Delete all alerts -$docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "http://kibana:5601/api/alerting/rule/$id" +$docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "http://kibana:5601/api/alerting/rule/$id" done # Import configuration -$docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null +$docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null # Re-enable all alerts -$docker_command --connect-timeout 60 -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_disable" - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${ROTATING_KIBANA_ELASTIC_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_enable" +$docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_disable" + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_enable" done From 55d65625d421a060350097069e0a921b4e7839ae Mon Sep 17 00:00:00 2001 From: Markus Date: Fri, 12 Jul 2024 11:39:37 +0300 Subject: [PATCH 06/25] remove reference to kibana_system from setup-users.sh --- infrastructure/elasticsearch/setup-users.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/infrastructure/elasticsearch/setup-users.sh b/infrastructure/elasticsearch/setup-users.sh index 16c7534dd..366090da1 100755 --- a/infrastructure/elasticsearch/setup-users.sh +++ b/infrastructure/elasticsearch/setup-users.sh @@ -25,7 +25,6 @@ users_passwords=( [beats_system]="${METRICBEAT_ELASTIC_PASSWORD:-}" [apm_system]="${APM_ELASTIC_PASSWORD:-}" [$KIBANA_USERNAME]="${KIBANA_PASSWORD:-}" - [kibana_system]="${KIBANA_SYSTEM_PASSWORD:-}" ) # ------------------------------------- From 07ebadd6f8dad63cd4786dc3d91e940fc2fee56b Mon Sep 17 00:00:00 2001 From: Markus Date: Fri, 12 Jul 2024 12:30:14 +0300 Subject: [PATCH 07/25] use elastic user on setup-config kibana_system does not have access to kibana --- infrastructure/monitoring/kibana/setup-config.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/infrastructure/monitoring/kibana/setup-config.sh b/infrastructure/monitoring/kibana/setup-config.sh index e3876ae68..b94cab21f 100755 --- a/infrastructure/monitoring/kibana/setup-config.sh +++ b/infrastructure/monitoring/kibana/setup-config.sh @@ -15,7 +15,7 @@ kibana_alerting_api_url="http://kibana:5601/api/alerting/rules/_find?page=1&per_ docker_command="docker run --rm -v /opt/opencrvs/infrastructure/monitoring/kibana/config.ndjson:/config.ndjson --network=opencrvs_overlay_net curlimages/curl" # Initial API status check to ensure Kibana is ready -status_code=$($docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") +status_code=$($docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD -o /dev/null -w '%{http_code}' "$kibana_alerting_api_url") if [ "$status_code" -ne 200 ]; then echo "Kibana is not ready. API returned status code: $status_code" @@ -23,15 +23,15 @@ if [ "$status_code" -ne 200 ]; then fi # Delete all alerts -$docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "http://kibana:5601/api/alerting/rule/$id" +$docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X DELETE -H 'kbn-xsrf: true' -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "http://kibana:5601/api/alerting/rule/$id" done # Import configuration -$docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null +$docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD -X POST "http://kibana:5601/api/saved_objects/_import?overwrite=true" -H 'kbn-xsrf: true' --form file=@/config.ndjson > /dev/null # Re-enable all alerts -$docker_command --connect-timeout 60 -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_disable" - $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u kibana_system:${KIBANA_SYSTEM_PASSWORD} "http://kibana:5601/api/alerting/rule/$id/_enable" -done +$docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "$kibana_alerting_api_url" | docker run --rm -i --network=opencrvs_overlay_net ghcr.io/jqlang/jq -r '.data[].id' | while read -r id; do + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "http://kibana:5601/api/alerting/rule/$id/_disable" + $docker_command --connect-timeout 60 -X POST -H 'kbn-xsrf: true' -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD "http://kibana:5601/api/alerting/rule/$id/_enable" +done \ No newline at end of file From 1f7462875df7e7a3ed753736ef83d1df6cd79a2c Mon Sep 17 00:00:00 2001 From: Markus Date: Fri, 12 Jul 2024 12:41:04 +0300 Subject: [PATCH 08/25] remove deprecated dependencies from elastic and kibana --- infrastructure/docker-compose.deploy.yml | 1 - infrastructure/monitoring/kibana/kibana.yml | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index ef35f1763..69d30bb7b 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -282,7 +282,6 @@ services: - path.repo=/data/backups/elasticsearch - cluster.name=docker-cluster - network.host=0.0.0.0 - - discovery.zen.minimum_master_nodes=1 - discovery.type=single-node - xpack.security.enabled=true - xpack.security.authc.api_key.enabled=true diff --git a/infrastructure/monitoring/kibana/kibana.yml b/infrastructure/monitoring/kibana/kibana.yml index 7b2f480c1..ba87306dc 100644 --- a/infrastructure/monitoring/kibana/kibana.yml +++ b/infrastructure/monitoring/kibana/kibana.yml @@ -53,7 +53,6 @@ monitoring.ui.container.elasticsearch.enabled: true xpack.encryptedSavedObjects.encryptionKey: '{{KIBANA_ENCRYPTION_KEY}}' xpack.reporting.encryptionKey: '{{KIBANA_ENCRYPTION_KEY}}' xpack.actions.preconfiguredAlertHistoryEsIndex: true -xpack.infra.sources.default.logAlias: 'logs-*,filebeat-*,kibana_sample_data_logs*,logstash*' # If your Elasticsearch is protected with basic authentication, these settings provide # the username and password that the Kibana server uses to perform maintenance on the Kibana # index at startup. Your Kibana users still need to authenticate with Elasticsearch, which @@ -103,7 +102,7 @@ xpack.infra.sources.default.logAlias: 'logs-*,filebeat-*,kibana_sample_data_logs # =================== System: Logging =================== # Set the value of this setting to off to suppress all logging output, or to debug to log everything. Defaults to 'info' -logging.root.level: error +logging.root.level: debug # Enables you to specify a file where Kibana stores log output. #logging.appenders.default: # type: file From 1c294176b6db73b999e92ef1a0d3cf2ac500605a Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 15 Jul 2024 13:36:45 +0300 Subject: [PATCH 09/25] upgrade kibana to 8.14.3 --- infrastructure/docker-compose.deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index 69d30bb7b..cde5be71d 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -156,7 +156,7 @@ services: gelf-address: 'udp://127.0.0.1:12201' tag: 'setup-kibana-config' kibana: - image: docker.elastic.co/kibana/kibana:7.17.0 + image: docker.elastic.co/kibana/kibana:8.14.3 restart: always deploy: labels: From 08e2850b96855fd2d3374649ed033756d81a4dae Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 15 Jul 2024 13:57:48 +0300 Subject: [PATCH 10/25] upgrade logstash to 8.14.3 --- infrastructure/docker-compose.deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index cde5be71d..152e48796 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -408,7 +408,7 @@ services: tag: 'elastalert' logstash: - image: logstash:7.17.0 + image: logstash:8.14.3 command: logstash -f /etc/logstash/logstash.conf --verbose ports: - '12201:12201' From 8855e07765fa2985646f61391bf536a41692181b Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 15 Jul 2024 14:05:46 +0300 Subject: [PATCH 11/25] upgrade beats to 8.14.3 use monitoring instead of deprecated xpack.monitoring --- infrastructure/docker-compose.deploy.yml | 4 ++-- infrastructure/monitoring/beats/metricbeat.yml | 12 ++++++------ infrastructure/monitoring/filebeat/filebeat.yml | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index 152e48796..e72e52d63 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -60,7 +60,7 @@ services: - overlay_net filebeat: - image: docker.elastic.co/beats/filebeat:7.17.0 + image: docker.elastic.co/beats/filebeat:8.14.3 user: root networks: - overlay_net @@ -85,7 +85,7 @@ services: - 'traefik.enable=false' metricbeat: - image: docker.elastic.co/beats/metricbeat:7.17.13 + image: docker.elastic.co/beats/metricbeat:8.14.3 user: root cap_add: - SYS_PTRACE diff --git a/infrastructure/monitoring/beats/metricbeat.yml b/infrastructure/monitoring/beats/metricbeat.yml index 98bf4ffdb..bb0f9d01b 100644 --- a/infrastructure/monitoring/beats/metricbeat.yml +++ b/infrastructure/monitoring/beats/metricbeat.yml @@ -24,11 +24,11 @@ metricbeat.modules: - drop_event.when.regexp: system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)' #------------------------------- Docker Module ------------------------------- - - module: docker - metricsets: - ['container', 'cpu', 'diskio', 'healthcheck', 'info', 'memory', 'network'] - hosts: ['unix:///var/run/docker.sock'] - period: 10s + # - module: docker + # metricsets: + # ['container', 'cpu', 'diskio', 'healthcheck', 'info', 'memory', 'network'] + # hosts: ['unix:///var/run/docker.sock'] + # period: 10s #================================ Processors =================================== processors: @@ -56,7 +56,7 @@ setup.kibana: password: ${KIBANA_PASSWORD} #============================== Xpack Monitoring =============================== -xpack.monitoring: +monitoring: enabled: true elasticsearch: username: ${BEATS_USERNAME} diff --git a/infrastructure/monitoring/filebeat/filebeat.yml b/infrastructure/monitoring/filebeat/filebeat.yml index d2ac41bcb..8f75ec4b9 100644 --- a/infrastructure/monitoring/filebeat/filebeat.yml +++ b/infrastructure/monitoring/filebeat/filebeat.yml @@ -60,7 +60,7 @@ setup.kibana: password: ${ELASTICSEARCH_PASSWORD} #============================== Xpack Monitoring =============================== -xpack.monitoring: +monitoring: enabled: true elasticsearch: From 449c858e023a747ad35bdece92f321f450d5d05b Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 15 Jul 2024 14:42:18 +0300 Subject: [PATCH 12/25] upgrade apm server to 7.17.22 --- infrastructure/docker-compose.deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index e72e52d63..fdd49d7f4 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -431,7 +431,7 @@ services: - 'traefik.enable=false' replicas: 1 apm-server: - image: docker.elastic.co/apm/apm-server:7.15.2 + image: docker.elastic.co/apm/apm-server:7.17.22 cap_add: ['CHOWN', 'DAC_OVERRIDE', 'SETGID', 'SETUID'] cap_drop: ['ALL'] restart: always From 4548fb5476c3dc5bcb9e284ed2254e433eb73c4a Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 15 Jul 2024 15:16:03 +0300 Subject: [PATCH 13/25] revert log changes after debugging --- .../monitoring/beats/metricbeat.yml | 39 ++++++++++++++++--- infrastructure/monitoring/kibana/kibana.yml | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/infrastructure/monitoring/beats/metricbeat.yml b/infrastructure/monitoring/beats/metricbeat.yml index bb0f9d01b..78bd940a7 100644 --- a/infrastructure/monitoring/beats/metricbeat.yml +++ b/infrastructure/monitoring/beats/metricbeat.yml @@ -18,17 +18,46 @@ metricbeat.autodiscover: metricbeat.modules: #------------------------------- System Module ------------------------------- - module: system + metricsets: + [ + 'cpu', + 'load', + 'memory', + 'network', + 'process', + 'process_summary', + 'core', + 'diskio', + 'socket' + ] + processes: ['.*'] + process.include_top_n: + by_cpu: 5 + by_memory: 5 + period: 10s + cpu.metrics: ['percentages'] + core.metrics: ['percentages'] + + - module: system + period: 1m metricsets: - filesystem + - fsstat processors: - drop_event.when.regexp: system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)' + + - module: system + period: 15m + metricsets: + - uptime + #------------------------------- Docker Module ------------------------------- - # - module: docker - # metricsets: - # ['container', 'cpu', 'diskio', 'healthcheck', 'info', 'memory', 'network'] - # hosts: ['unix:///var/run/docker.sock'] - # period: 10s + - module: docker + metricsets: + ['container', 'cpu', 'diskio', 'healthcheck', 'info', 'memory', 'network'] + hosts: ['unix:///var/run/docker.sock'] + period: 10s #================================ Processors =================================== processors: diff --git a/infrastructure/monitoring/kibana/kibana.yml b/infrastructure/monitoring/kibana/kibana.yml index ba87306dc..2cfdd4c66 100644 --- a/infrastructure/monitoring/kibana/kibana.yml +++ b/infrastructure/monitoring/kibana/kibana.yml @@ -102,7 +102,7 @@ xpack.actions.preconfiguredAlertHistoryEsIndex: true # =================== System: Logging =================== # Set the value of this setting to off to suppress all logging output, or to debug to log everything. Defaults to 'info' -logging.root.level: debug +logging.root.level: error # Enables you to specify a file where Kibana stores log output. #logging.appenders.default: # type: file From f51e42a702ebe55b3a5439fa90cd0e8312bc4e30 Mon Sep 17 00:00:00 2001 From: Markus Date: Tue, 16 Jul 2024 16:20:39 +0300 Subject: [PATCH 14/25] widen search user privileges to accommodate reindex job --- infrastructure/elasticsearch/roles/search_user.json | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/infrastructure/elasticsearch/roles/search_user.json b/infrastructure/elasticsearch/roles/search_user.json index b7be198b5..da60cdeb8 100644 --- a/infrastructure/elasticsearch/roles/search_user.json +++ b/infrastructure/elasticsearch/roles/search_user.json @@ -1,8 +1,17 @@ { + "cluster": ["manage"], "indices": [ { - "names": ["ocrvs"], - "privileges": ["write", "create", "create_index", "delete", "delete_index", "read"] + "names": ["ocrvs", "ocrvs-*"], + "privileges": [ + "write", + "create", + "create_index", + "delete", + "delete_index", + "read", + "manage" + ] } ] } From 7681ad414467e7016caab8d232133675cfcfc066 Mon Sep 17 00:00:00 2001 From: Markus Date: Tue, 16 Jul 2024 16:21:01 +0300 Subject: [PATCH 15/25] upgrade elastalert to 2.19 2.4 onwards elastic8 is supported --- infrastructure/docker-compose.deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/docker-compose.deploy.yml b/infrastructure/docker-compose.deploy.yml index fdd49d7f4..3af57d9e6 100644 --- a/infrastructure/docker-compose.deploy.yml +++ b/infrastructure/docker-compose.deploy.yml @@ -384,7 +384,7 @@ services: gelf-address: 'udp://127.0.0.1:12201' tag: 'setup-elasticsearch-users' elastalert: - image: jertel/elastalert2:2.3.0 + image: jertel/elastalert2:2.19.0 restart: unless-stopped environment: - ES_USERNAME=elastic From 96886c924279a455b370f65bde7147ad0b4e4697 Mon Sep 17 00:00:00 2001 From: Markus Date: Tue, 16 Jul 2024 17:05:09 +0300 Subject: [PATCH 16/25] use minimal access for search user --- infrastructure/elasticsearch/roles/search_user.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/infrastructure/elasticsearch/roles/search_user.json b/infrastructure/elasticsearch/roles/search_user.json index da60cdeb8..1c84a4173 100644 --- a/infrastructure/elasticsearch/roles/search_user.json +++ b/infrastructure/elasticsearch/roles/search_user.json @@ -1,5 +1,5 @@ { - "cluster": ["manage"], + "cluster": ["monitoring"], "indices": [ { "names": ["ocrvs", "ocrvs-*"], @@ -9,8 +9,7 @@ "create_index", "delete", "delete_index", - "read", - "manage" + "read" ] } ] From 5cd3377683ac4ef630875c6f4f5ea5669d063e36 Mon Sep 17 00:00:00 2001 From: Markus Date: Tue, 16 Jul 2024 17:26:26 +0300 Subject: [PATCH 17/25] update index creation error check match against v8 error message https://github.com/elastic/elasticsearch/blob/e64aab1b08f03a0af8a2845ff0cef226bde363af/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java#L186 --- infrastructure/elasticsearch/setup-helpers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/elasticsearch/setup-helpers.sh b/infrastructure/elasticsearch/setup-helpers.sh index c6a25e874..38caab50a 100755 --- a/infrastructure/elasticsearch/setup-helpers.sh +++ b/infrastructure/elasticsearch/setup-helpers.sh @@ -253,7 +253,7 @@ function create_elastic_index { echo "${output}" - if [[ "${output: -3}" -eq 200 || $output == *"resource_already_exists"* ]]; then + if [[ "${output: -3}" -eq 200 || $output == *"already exists as alias"* ]]; then result=0 fi From 0cd875137b48db13c37113056d0293ae14f77f1d Mon Sep 17 00:00:00 2001 From: Markus Date: Tue, 16 Jul 2024 17:36:25 +0300 Subject: [PATCH 18/25] add manage privilege back to search user --- infrastructure/elasticsearch/roles/search_user.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/infrastructure/elasticsearch/roles/search_user.json b/infrastructure/elasticsearch/roles/search_user.json index 1c84a4173..da60cdeb8 100644 --- a/infrastructure/elasticsearch/roles/search_user.json +++ b/infrastructure/elasticsearch/roles/search_user.json @@ -1,5 +1,5 @@ { - "cluster": ["monitoring"], + "cluster": ["manage"], "indices": [ { "names": ["ocrvs", "ocrvs-*"], @@ -9,7 +9,8 @@ "create_index", "delete", "delete_index", - "read" + "read", + "manage" ] } ] From 5fa77fafb2a4131f5a4ddc837e884dd1169a13e3 Mon Sep 17 00:00:00 2001 From: Markus Date: Wed, 17 Jul 2024 08:42:07 +0300 Subject: [PATCH 19/25] add comment for error message check --- infrastructure/elasticsearch/setup-helpers.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/infrastructure/elasticsearch/setup-helpers.sh b/infrastructure/elasticsearch/setup-helpers.sh index 38caab50a..5df149fbb 100755 --- a/infrastructure/elasticsearch/setup-helpers.sh +++ b/infrastructure/elasticsearch/setup-helpers.sh @@ -253,6 +253,8 @@ function create_elastic_index { echo "${output}" + # @TODO: Preferably check whether the index already exists before creating it. + # Error message might change in the future so we should not depend on it. if [[ "${output: -3}" -eq 200 || $output == *"already exists as alias"* ]]; then result=0 fi From af3942374b1dd5f74d9d6477b75483fc95a0b3e0 Mon Sep 17 00:00:00 2001 From: Markus Date: Wed, 17 Jul 2024 13:39:05 +0300 Subject: [PATCH 20/25] fix cherry-pick inconsistencies Changes were initially ran against test country config --- infrastructure/deployment/deploy.sh | 1 - infrastructure/monitoring/elastalert/rules/alert.yaml | 6 +++--- infrastructure/monitoring/kibana/config.ndjson | 1 + infrastructure/monitoring/kibana/kibana.yml | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/infrastructure/deployment/deploy.sh b/infrastructure/deployment/deploy.sh index e1e58fe10..5d8725915 100755 --- a/infrastructure/deployment/deploy.sh +++ b/infrastructure/deployment/deploy.sh @@ -331,7 +331,6 @@ export ROTATING_METRICBEAT_ELASTIC_PASSWORD=`generate_password` # Used by APM for writing data to ElasticSearch export ROTATING_APM_ELASTIC_PASSWORD=`generate_password` - # Download core compose files to /tmp/ for compose_file in ${COMPOSE_FILES_DOWNLOADED_FROM_CORE[@]}; do if [ ! -f $compose_file ]; then diff --git a/infrastructure/monitoring/elastalert/rules/alert.yaml b/infrastructure/monitoring/elastalert/rules/alert.yaml index 58a74d374..d5bfbcb50 100644 --- a/infrastructure/monitoring/elastalert/rules/alert.yaml +++ b/infrastructure/monitoring/elastalert/rules/alert.yaml @@ -30,9 +30,9 @@ filter: - term: rule.name.keyword: value: 'CPU under heavy load' - # - term: - # rule.name.keyword: - # value: 'Low on available disk space' + - term: + rule.name.keyword: + value: 'Low on available disk space' minimum_should_match: 1 alert: post2 diff --git a/infrastructure/monitoring/kibana/config.ndjson b/infrastructure/monitoring/kibana/config.ndjson index 6c26fe491..a8f47219d 100644 --- a/infrastructure/monitoring/kibana/config.ndjson +++ b/infrastructure/monitoring/kibana/config.ndjson @@ -5,5 +5,6 @@ {"attributes":{"anomalyThreshold":50,"description":"","fields":{"container":"container.id","host":"host.name","message":["message","@message"],"pod":"kubernetes.pod.uid","tiebreaker":"_doc","timestamp":"@timestamp"},"inventoryDefaultView":"0","logColumns":[{"timestampColumn":{"id":"5e7f964a-be8a-40d8-88d2-fbcfbdca0e2f"}},{"fieldColumn":{"field":"event.dataset","id":" eb9777a8-fcd3-420e-ba7d-172fff6da7a2"}},{"messageColumn":{"id":"b645d6da-824b-4723-9a2a-e8cece1645c0"}}],"logIndices":{"indexName":"logs-*,filebeat-*,kibana_sample_data_logs*,logstash*","type":"index_name"},"metricAlias":"metrics-*,metricbeat-*","metricsExplorerDefaultView":"0","name":"Default"},"coreMigrationVersion":"7.17.0","id":"default","migrationVersion":{"infrastructure-ui-source":"7.16.2"},"references":[],"sort":[1707273006619,217714],"type":"infrastructure-ui-source","updated_at":"2024-02-07T02:30:06.619Z","version":"WzQ5MzAyNywxOV0="} {"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"query matched","params":{"documents":["{\"@timestamp\":\"2023-11-20T10:19:30.521Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"]}}],"alertTypeId":".es-query","apiKey":null,"apiKeyOwner":null,"consumer":"alerts","createdAt":"2023-11-20T09:12:19.237Z","createdBy":"elastic","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"Successful SSH login","notifyWhen":"onActionGroupChange","params":{"esQuery":"{ \"query\": { \"bool\": { \"must\": [ \n { \"term\": { \"log.file.path\": \"/var/log/auth.log\" } },\n { \"term\": { \"event.outcome\": \"success\" }}\n ] } } }","index":["filebeat-*"],"size":100,"threshold":[1],"thresholdComparator":">=","timeField":"@timestamp","timeWindowSize":1,"timeWindowUnit":"m"},"schedule":{"interval":"1m"},"scheduledTaskId":null,"tags":[],"throttle":null,"updatedAt":"2024-02-07T02:27:19.537Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"e79aaa90-8784-11ee-b9ba-89bbe73df7ff","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294457367,232778],"type":"alert","updated_at":"2024-02-07T08:27:37.367Z","version":"WzQ5NTQ0MCwxOV0="} {"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"metrics.inventory_threshold.fired","params":{"documents":["{\"@timestamp\":\"2022-06-20T06:16:33.414Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"]}}],"alertTypeId":"metrics.alert.inventory.threshold","apiKey":null,"apiKeyOwner":null,"consumer":"alerts","createdAt":"2022-05-31T10:10:47.084Z","createdBy":"opencrvs-admin","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"CPU under heavy load","notifyWhen":"onActionGroupChange","params":{"criteria":[{"comparator":">","customMetric":{"aggregation":"avg","field":"","id":"alert-custom-metric","type":"custom"},"metric":"cpu","threshold":[70],"timeSize":1,"timeUnit":"m"}],"nodeType":"host","sourceId":"default"},"schedule":{"interval":"1m"},"scheduledTaskId":null,"tags":["infra","opencrvs-builtin"],"throttle":null,"updatedAt":"2024-02-07T02:27:30.573Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"f022bee0-e0c9-11ec-99b8-dbfd54551fda","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294457362,232774],"type":"alert","updated_at":"2024-02-07T08:27:37.362Z","version":"WzQ5NTQzOCwxOV0="} +{"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"metrics.inventory_threshold.fired","params":{"documents":["{\"@timestamp\":\"2023-11-17T13:17:52.791Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"]}}],"alertTypeId":"metrics.alert.inventory.threshold","apiKey":null,"apiKeyOwner":null,"consumer":"infrastructure","createdAt":"2022-05-31T10:10:47.080Z","createdBy":"opencrvs-admin","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"Low on available disk space","notifyWhen":"onActionGroupChange","params":{"alertOnNoData":true,"criteria":[{"comparator":">","customMetric":{"aggregation":"max","field":"system.filesystem.used.pct","id":"alert-custom-metric","label":"","type":"custom"},"metric":"custom","threshold":[0.7],"timeSize":1,"timeUnit":"h","warningComparator":">","warningThreshold":[0.5]}],"filterQuery":"{\"bool\":{\"should\":[{\"match_phrase\":{\"system.filesystem.device_name\":\"/dev/mapper/cryptfs\"}}],\"minimum_should_match\":1}}","filterQueryText":"system.filesystem.device_name : \"/dev/mapper/cryptfs\"","nodeType":"host","sourceId":"default"},"schedule":{"interval":"1h"},"scheduledTaskId":null,"tags":["infra","opencrvs-builtin"],"throttle":null,"updatedAt":"2024-02-07T02:27:20.542Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"f023d050-e0c9-11ec-99b8-dbfd54551fda","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294448366,232768],"type":"alert","updated_at":"2024-02-07T08:27:28.366Z","version":"WzQ5NTQzNSwxOV0="} {"attributes":{"actions":[{"actionRef":"preconfigured:preconfigured-alert-history-es-index","actionTypeId":".index","group":"threshold_met","params":{"documents":["{\"@timestamp\":\"2023-06-15T07:57:35.954Z\",\"tags\":\"{{rule.tags}}\",\"rule\":{\"id\":\"{{rule.id}}\",\"name\":\"{{rule.name}}\",\"params\":{\"{{rule__type}}\":\"{{params}}\"},\"space\":\"{{rule.spaceId}}\",\"type\":\"{{rule.type}}\"},\"kibana\":{\"alert\":{\"id\":\"{{alert.id}}\",\"context\":{\"{{rule__type}}\":\"{{context}}\"},\"actionGroup\":\"{{alert.actionGroup}}\",\"actionGroupName\":\"{{alert.actionGroupName}}\"}},\"event\":{\"kind\":\"alert\"}}"],"indexOverride":"kibana-alert-history-services"}}],"alertTypeId":"apm.error_rate","apiKey":null,"apiKeyOwner":null,"consumer":"alerts","createdAt":"2022-05-31T10:10:47.069Z","createdBy":"opencrvs-admin","enabled":false,"executionStatus":{"error":null,"lastExecutionDate":"2024-02-07T08:28:08.400Z","status":"pending"},"legacyId":null,"meta":{"versionApiKeyLastmodified":"7.17.0"},"muteAll":false,"mutedInstanceIds":[],"name":"Error in service","notifyWhen":"onActionGroupChange","params":{"environment":"ENVIRONMENT_ALL","threshold":1,"windowSize":1,"windowUnit":"m"},"schedule":{"interval":"1m"},"scheduledTaskId":null,"tags":[],"throttle":null,"updatedAt":"2024-02-07T02:27:21.551Z","updatedBy":"elastic"},"coreMigrationVersion":"7.17.0","id":"f02b4a60-e0c9-11ec-99b8-dbfd54551fda","migrationVersion":{"alert":"7.16.0"},"references":[],"sort":[1707294457374,232780],"type":"alert","updated_at":"2024-02-07T08:27:37.374Z","version":"WzQ5NTQ0MSwxOV0="} {"excludedObjects":[],"excludedObjectsCount":0,"exportedCount":9,"missingRefCount":0,"missingReferences":[]} \ No newline at end of file diff --git a/infrastructure/monitoring/kibana/kibana.yml b/infrastructure/monitoring/kibana/kibana.yml index 2cfdd4c66..a97e87780 100644 --- a/infrastructure/monitoring/kibana/kibana.yml +++ b/infrastructure/monitoring/kibana/kibana.yml @@ -63,6 +63,7 @@ xpack.actions.preconfiguredAlertHistoryEsIndex: true # Kibana can also authenticate to Elasticsearch via "service account tokens". # Service account tokens are Bearer style tokens that replace the traditional username/password based configuration. # Use this token instead of a username/password. +# elasticsearch.serviceAccountToken: "my_token" # Time in milliseconds to wait for Elasticsearch to respond to pings. Defaults to the value of # the elasticsearch.requestTimeout setting. From fd134263b2b238ed5b9ed49c085addd1de352350 Mon Sep 17 00:00:00 2001 From: Markus Date: Thu, 18 Jul 2024 10:55:47 +0300 Subject: [PATCH 21/25] set kibana_system password on setup-users.sh --- infrastructure/elasticsearch/setup-users.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/infrastructure/elasticsearch/setup-users.sh b/infrastructure/elasticsearch/setup-users.sh index 366090da1..30d2f5923 100755 --- a/infrastructure/elasticsearch/setup-users.sh +++ b/infrastructure/elasticsearch/setup-users.sh @@ -24,6 +24,7 @@ users_passwords=( [$SEARCH_ELASTIC_USERNAME]="${SEARCH_ELASTIC_PASSWORD:-}" [beats_system]="${METRICBEAT_ELASTIC_PASSWORD:-}" [apm_system]="${APM_ELASTIC_PASSWORD:-}" + [kibana_system]="${KIBANA_SYSTEM_PASSWORD:-}" [$KIBANA_USERNAME]="${KIBANA_PASSWORD:-}" ) From eba09adc3e24414b3a10b86a8afa0b6f5aedb5d3 Mon Sep 17 00:00:00 2001 From: Markus Date: Thu, 25 Jul 2024 16:11:49 +0300 Subject: [PATCH 22/25] stop creating ocrvs index as a part of elastic setup --- infrastructure/elasticsearch/setup-helpers.sh | 34 ------------------- .../elasticsearch/setup-settings.sh | 3 -- 2 files changed, 37 deletions(-) diff --git a/infrastructure/elasticsearch/setup-helpers.sh b/infrastructure/elasticsearch/setup-helpers.sh index 5df149fbb..e55e0a4ae 100755 --- a/infrastructure/elasticsearch/setup-helpers.sh +++ b/infrastructure/elasticsearch/setup-helpers.sh @@ -231,37 +231,3 @@ function ensure_settings { return $result } - -function create_elastic_index { - local index_name=$1 - local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" - - local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' - "http://${elasticsearch_host}:9200/${index_name}" - '-X' 'PUT' - '-H' 'Content-Type: application/json' - ) - - if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then - args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) - fi - - local -i result=1 - local output - - output="$(curl "${args[@]}")" - - echo "${output}" - - # @TODO: Preferably check whether the index already exists before creating it. - # Error message might change in the future so we should not depend on it. - if [[ "${output: -3}" -eq 200 || $output == *"already exists as alias"* ]]; then - result=0 - fi - - if ((result)); then - echo -e "\n${output::-3}\n" - fi - - return $result -} diff --git a/infrastructure/elasticsearch/setup-settings.sh b/infrastructure/elasticsearch/setup-settings.sh index eea61f90a..260e13a31 100644 --- a/infrastructure/elasticsearch/setup-settings.sh +++ b/infrastructure/elasticsearch/setup-settings.sh @@ -19,8 +19,5 @@ echo "-------- $(date) --------" log 'Waiting for availability of Elasticsearch' wait_for_elasticsearch -log "Creating index for Elasticsearch. Index: ocrvs" -create_elastic_index "ocrvs" - log "Updating replicas for Elasticsearch" ensure_settings "{\"index\":{\"number_of_replicas\":0}}" From 4264ab1288fcc76b9dd5c0fec93f3a761ba62d41 Mon Sep 17 00:00:00 2001 From: Markus Date: Fri, 26 Jul 2024 10:12:43 +0300 Subject: [PATCH 23/25] stop creating ocrvs index before running migrations during cleanup --- infrastructure/run-migrations.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/infrastructure/run-migrations.sh b/infrastructure/run-migrations.sh index d68ee546b..fbe00d8b0 100755 --- a/infrastructure/run-migrations.sh +++ b/infrastructure/run-migrations.sh @@ -25,13 +25,5 @@ elasticsearch_host() { fi } -create_elastic_index () { - local index_name=$1 - echo "Creating ElasticSearch Index: ${index_name}" - docker run --rm --network=opencrvs_overlay_net appropriate/curl curl -XPUT "http://$(elasticsearch_host)/$index_name" -v -} - -create_elastic_index "ocrvs" - # run migration by restarting migration service docker service update --force --update-parallelism 1 --update-delay 30s opencrvs_migration From 709c8d81df1eb5bb61fd9167fb8d7a246acfca38 Mon Sep 17 00:00:00 2001 From: Markus Date: Mon, 29 Jul 2024 13:03:57 +0300 Subject: [PATCH 24/25] Restart and remove existing elastalert indices --- .../elasticsearch/setup-elastalert-indices.sh | 29 +++++++++++++++++ infrastructure/elasticsearch/setup-helpers.sh | 31 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100755 infrastructure/elasticsearch/setup-elastalert-indices.sh diff --git a/infrastructure/elasticsearch/setup-elastalert-indices.sh b/infrastructure/elasticsearch/setup-elastalert-indices.sh new file mode 100755 index 000000000..e53e4ce4a --- /dev/null +++ b/infrastructure/elasticsearch/setup-elastalert-indices.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# +# OpenCRVS is also distributed under the terms of the Civil Registration +# & Healthcare Disclaimer located at http://opencrvs.org/license. +# +# Copyright (C) The OpenCRVS Authors located at https://github.com/opencrvs/opencrvs-core/blob/master/AUTHORS. + +set -eu +set -o pipefail + +source "$(dirname "${BASH_SOURCE[0]}")/setup-helpers.sh" + +echo "-------- $(date) --------" + +log 'Waiting for availability of Elasticsearch' +wait_for_elasticsearch + + +log 'Scaling down Elastalert' +docker service scale opencrvs_elastalert=0 +log 'Deleting Elastalert indices' +delete_elastalert_indices +log 'Scaling up Elastalert' +docker service scale opencrvs_elastalert=1 + diff --git a/infrastructure/elasticsearch/setup-helpers.sh b/infrastructure/elasticsearch/setup-helpers.sh index e55e0a4ae..b058a2658 100755 --- a/infrastructure/elasticsearch/setup-helpers.sh +++ b/infrastructure/elasticsearch/setup-helpers.sh @@ -231,3 +231,34 @@ function ensure_settings { return $result } +# Upgrading from 7 to 8 requires deleting elastalert indices. https://elastalert2.readthedocs.io/en/latest/recipes/faq.html#does-elastalert-2-support-elasticsearch-8 +# Elastalert depends on kibana/beat indices, so we delete can elastalert indices during each deployment. +function delete_elastalert_indices { + # Opt for explicity over wildcard since we are deleting indices + local indices='elastalert_status,elastalert_status_error,elastalert_status_past,elastalert_status_silence,elastalert_status_status' + + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' + "http://${elasticsearch_host}:9200/${indices}" + '-X' 'DELETE' + ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + local output + + output="$(curl "${args[@]}")" + if [[ "${output: -3}" -eq 200 ]]; then + result=0 + fi + + if ((result)); then + echo -e "\n${output::-3}\n" + fi + + return $result +} \ No newline at end of file From 1db45683303240c88076855e890fe992f547c376 Mon Sep 17 00:00:00 2001 From: Markus Date: Tue, 30 Jul 2024 07:47:26 +0300 Subject: [PATCH 25/25] move restart to deploy script --- infrastructure/deployment/deploy.sh | 9 +++++ .../elasticsearch/setup-elastalert-indices.sh | 35 +++++++++++++------ infrastructure/elasticsearch/setup-helpers.sh | 32 ----------------- 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/infrastructure/deployment/deploy.sh b/infrastructure/deployment/deploy.sh index 5d8725915..ba3eda77d 100755 --- a/infrastructure/deployment/deploy.sh +++ b/infrastructure/deployment/deploy.sh @@ -386,6 +386,15 @@ echo echo "Waiting 2 mins for mongo to deploy before working with data. Please note it can take up to 10 minutes for the entire stack to deploy in some scenarios." echo +echo 'Setting up elastalert indices' + +while true; do + if configured_ssh "/opt/opencrvs/infrastructure/elasticsearch/setup-elastalert-indices.sh"; then + break + fi + sleep 5 +done + echo "Setting up Kibana config & alerts" while true; do diff --git a/infrastructure/elasticsearch/setup-elastalert-indices.sh b/infrastructure/elasticsearch/setup-elastalert-indices.sh index e53e4ce4a..699ec03de 100755 --- a/infrastructure/elasticsearch/setup-elastalert-indices.sh +++ b/infrastructure/elasticsearch/setup-elastalert-indices.sh @@ -9,21 +9,36 @@ # # Copyright (C) The OpenCRVS Authors located at https://github.com/opencrvs/opencrvs-core/blob/master/AUTHORS. -set -eu -set -o pipefail +# Upgrading from 7 to 8 requires deleting elastalert indices. https://elastalert2.readthedocs.io/en/latest/recipes/faq.html#does-elastalert-2-support-elasticsearch-8 -source "$(dirname "${BASH_SOURCE[0]}")/setup-helpers.sh" +set -e -echo "-------- $(date) --------" +docker_command="docker run --rm --network=opencrvs_overlay_net curlimages/curl" -log 'Waiting for availability of Elasticsearch' -wait_for_elasticsearch +echo 'Waiting for availability of Elasticsearch' +ping_status_code=$($docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD -o /dev/null -w '%{http_code}' "http://elasticsearch:9200") +if [ "$ping_status_code" -ne 200 ]; then + echo "Elasticsearch is not ready. API returned status code: $ping_status_code" + exit 1 +fi + + + +echo 'Scaling down Elastalert' -log 'Scaling down Elastalert' docker service scale opencrvs_elastalert=0 -log 'Deleting Elastalert indices' -delete_elastalert_indices -log 'Scaling up Elastalert' + +echo 'Deleting Elastalert indices' +indices='elastalert_status,elastalert_status_error,elastalert_status_past,elastalert_status_silence,elastalert_status_status' + +delete_status_code=$($docker_command --connect-timeout 60 -u elastic:$ELASTICSEARCH_SUPERUSER_PASSWORD -o /dev/null -w '%{http_code}' "http://elasticsearch:9200/${indices}" -X DELETE) + +if [ "$delete_status_code" -ne 200 ]; then + echo "Could not delete indices. API returned status code: $delete_status_code" + exit 1 +fi + +echo 'Scaling up Elastalert' docker service scale opencrvs_elastalert=1 diff --git a/infrastructure/elasticsearch/setup-helpers.sh b/infrastructure/elasticsearch/setup-helpers.sh index b058a2658..1b3380f0f 100755 --- a/infrastructure/elasticsearch/setup-helpers.sh +++ b/infrastructure/elasticsearch/setup-helpers.sh @@ -230,35 +230,3 @@ function ensure_settings { return $result } - -# Upgrading from 7 to 8 requires deleting elastalert indices. https://elastalert2.readthedocs.io/en/latest/recipes/faq.html#does-elastalert-2-support-elasticsearch-8 -# Elastalert depends on kibana/beat indices, so we delete can elastalert indices during each deployment. -function delete_elastalert_indices { - # Opt for explicity over wildcard since we are deleting indices - local indices='elastalert_status,elastalert_status_error,elastalert_status_past,elastalert_status_silence,elastalert_status_status' - - local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" - - local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' - "http://${elasticsearch_host}:9200/${indices}" - '-X' 'DELETE' - ) - - if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then - args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) - fi - - local -i result=1 - local output - - output="$(curl "${args[@]}")" - if [[ "${output: -3}" -eq 200 ]]; then - result=0 - fi - - if ((result)); then - echo -e "\n${output::-3}\n" - fi - - return $result -} \ No newline at end of file