From c32df6c7b6c2096e78e8939d27680956c1767f4c Mon Sep 17 00:00:00 2001 From: wangyelei Date: Fri, 7 Feb 2025 09:44:34 +0800 Subject: [PATCH] chore: support pitr for mysql and apecloud-mysql (#1451) Co-authored-by: wangyelei --- .../apecloud-mysql-pitr-backup.sh | 159 ++++++++ .../apecloud-mysql-pitr-restore.sh | 53 +++ .../dataprotection/common-scripts.sh | 120 ++++++ .../templates/actionset-pitr.yaml | 76 ++++ .../templates/backuppolicytemplate.yaml | 14 + addons/apecloud-mysql/values.yaml | 3 + addons/minio/templates/cmpd.yaml | 10 + addons/mysql/dataprotection/common-scripts.sh | 120 ++++++ .../mysql/dataprotection/mysql-pitr-backup.sh | 347 ++++++++++++++++++ .../dataprotection/mysql-pitr-restore.sh | 28 ++ addons/mysql/templates/actionset-pitr.yaml | 64 ++++ .../mysql/templates/backuppolicytemplate.yaml | 25 +- addons/mysql/values.yaml | 3 + 13 files changed, 1020 insertions(+), 2 deletions(-) create mode 100644 addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-backup.sh create mode 100644 addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-restore.sh create mode 100644 addons/apecloud-mysql/dataprotection/common-scripts.sh create mode 100644 addons/apecloud-mysql/templates/actionset-pitr.yaml create mode 100644 addons/mysql/dataprotection/common-scripts.sh create mode 100644 addons/mysql/dataprotection/mysql-pitr-backup.sh create mode 100644 addons/mysql/dataprotection/mysql-pitr-restore.sh create mode 100644 addons/mysql/templates/actionset-pitr.yaml diff --git a/addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-backup.sh b/addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-backup.sh new file mode 100644 index 000000000..08ad8722e --- /dev/null +++ b/addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-backup.sh @@ -0,0 +1,159 @@ +#!/bin/bash +# export wal-g environments +export WALG_MYSQL_DATASOURCE_NAME="${DP_DB_USER}:${DP_DB_PASSWORD}@tcp(${DP_DB_HOST}:${DP_DB_PORT})/mysql" +export WALG_COMPRESSION_METHOD=zstd +# use datasafed and default config +export WALG_DATASAFED_CONFIG="" +export PATH="$PATH:$DP_DATASAFED_BIN_PATH" +export WALG_MYSQL_CHECK_GTIDS=true +export MYSQL_PWD=${DP_DB_PASSWORD} +# work directory to save necessary file for backup +export KB_BACKUP_WORKDIR=${VOLUME_DATA_DIR}/kb-backup + +# get binlog basename +MYSQL_CMD="mysql -u ${DP_DB_USER} -h ${DP_DB_HOST} -N" +log_bin_basename=$(${MYSQL_CMD} -e "SHOW VARIABLES LIKE 'log_bin_basename';" | awk -F'\t' '{print $2}') +if [ -z ${log_bin_basename} ]; then + echo "ERROR: pod/${DP_TARGET_POD_NAME} connect failed." + exit 1 +fi +LOG_DIR=$(dirname $log_bin_basename) +LOG_PREFIX=$(basename $log_bin_basename) + +global_latest_bin_log="" +global_last_flush_logs_time=$(date +%s) +global_last_purge_time=$(date +%s) +global_old_size=0 +global_flush_bin_logs_interval=600 + +if [[ ${DP_ARCHIVE_INTERVAL} =~ ^[0-9]+s$ ]];then + global_flush_bin_logs_interval=${DP_ARCHIVE_INTERVAL%s} +fi + +# checks if the mysql process is ok +function check_mysql_process() { + is_ok=false + for ((i=1;i<4;i++));do + role=$(${MYSQL_CMD} -e "select role from information_schema.wesql_cluster_local;" | head -n 1) + if [[ $? -eq 0 && (-z ${TARGET_POD_ROLE} || "${TARGET_POD_ROLE,,}" == "${role,,}") ]]; then + is_ok=true + break + fi + DP_error_log "target backup pod/${DP_TARGET_POD_NAME} is not OK, target role: ${TARGET_POD_ROLE}, current role: ${role}, retry detection!" + sleep 1 + done + if [[ ${is_ok} == "false" ]];then + DP_error_log "target backup pod/${DP_TARGET_POD_NAME} is not OK, target role: ${TARGET_POD_ROLE}, current role: ${role}!" + exit 1 + fi +} + +# clean up expired logfiles, interval is 60s +function purge_expired_files() { + export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + local currentUnix=$(date +%s) + info=$(DP_purge_expired_files ${currentUnix} ${global_last_purge_time}) + if [ ! -z "${info}" ]; then + global_last_purge_time=${currentUnix} + DP_log "cleanup expired binlog files: ${info}" + local TOTAL_SIZE=$(datasafed stat / | grep TotalSize | awk '{print $2}') + DP_save_backup_status_info "${TOTAL_SIZE}" + fi +} + +# flush bin logs, interval is 600s by default +function flush_binlogs() { + local binlog=$(ls -Ft ${LOG_DIR}/|grep -e "^${LOG_PREFIX}.*[[:digit:]]$" |head -n 1) + if [ -z ${binlog} ]; then + return + fi + local curr_time=$(date +%s) + # if size greater than FLUSH_BINLOG_AFTER_SIZE, will flush binary logs. + if [ $(stat -c%s ${LOG_DIR}/${binlog}) -gt ${FLUSH_BINLOG_AFTER_SIZE} ]; then + DP_log "flush binary logs" + ${MYSQL_CMD} -e "flush binary logs"; + global_last_flush_logs_time=${curr_time} + return + fi + local diff_time=$((${curr_time}-${global_last_flush_logs_time})) + if [[ ${diff_time} -lt ${global_flush_bin_logs_interval} ]]; then + return + fi + local LATEST_TRANS=$(mysqlbinlog ${LOG_DIR}/${binlog} |grep 'Xid =' |head -n 1) + # only flush bin logs when Xid exists + if [[ -n "${LATEST_TRANS}" ]]; then + DP_log "flush binary logs" + ${MYSQL_CMD} -e "flush binary logs"; + fi + global_last_flush_logs_time=${curr_time} +} + +# upload bin logs by walg +function upload_bin_logs() { + export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH/${DP_TARGET_POD_NAME}" + global_latest_bin_log=$(ls -Ftr ${LOG_DIR}/|grep -e "^${LOG_PREFIX}.*[[:digit:]]$"|tail -n 1) + if [ ! -z ${global_latest_bin_log} ];then + global_latest_bin_log="${LOG_DIR}/${global_latest_bin_log}" + fi + wal-g binlog-push; +} + +# get binlog start time +function get_binlog_start_time() { + local binlog="${1:?missing binlog name}" + local time=$(mysqlbinlog ${binlog} | grep -m 1 "end_log_pos" | awk '{print $1, $2}'|tr -d '#') + local time=$(date -d "$time" -u '+%Y-%m-%dT%H:%M:%SZ') + echo $time +} + +# pull binlog and decompress +function pull_binlog() { + file="${1:?missing file name}" + fileName=$(basename ${file}) + datasafed pull ${file} ${fileName} + zstd -d --rm ${fileName} +} + +# get the start time for backup.status.timeRange +function get_start_time_for_range() { + local oldest_bin_log=$(datasafed list -f --recursive / -o json | jq -s -r '.[] | sort_by(.mtime) | .[] | .path' | grep .zst | head -n 1) + if [ ! -z ${oldest_bin_log} ]; then + START_TIME=$(DP_analyze_start_time_from_datasafed "${oldest_bin_log}" get_binlog_start_time pull_binlog) + echo ${START_TIME} + fi +} + +# save backup status info to sync file +function save_backup_status() { + export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + local TOTAL_SIZE=$(datasafed stat / | grep TotalSize | awk '{print $2}') + # if no size changes, return + if [[ ${TOTAL_SIZE} == ${global_old_size} ]];then + return + fi + global_old_size=${TOTAL_SIZE} + local START_TIME=$(get_start_time_for_range) + local STOP_TIME=$(get_binlog_start_time ${global_latest_bin_log}) + DP_save_backup_status_info "${TOTAL_SIZE}" "${START_TIME}" "${STOP_TIME}" +} + +# trap term signal +trap "echo 'Terminating...' && sync && exit 0" TERM +DP_log "start to archive binlog" +while true; do + # check if mysql process is ok + check_mysql_process + + # flush bin logs + flush_binlogs + + # upload bin log + upload_bin_logs + + # save backup status which will be updated to `backup` CR by the sidecar + save_backup_status + + # purge the expired bin logs + purge_expired_files + sleep ${BINLOG_ARCHIVE_INTERVAL} +done \ No newline at end of file diff --git a/addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-restore.sh b/addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-restore.sh new file mode 100644 index 000000000..8a792d8ff --- /dev/null +++ b/addons/apecloud-mysql/dataprotection/apecloud-mysql-pitr-restore.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -e; + +# use datasafed and default config +export PATH="$PATH:$DP_DATASAFED_BIN_PATH" +export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + +baseBackupStartTimestamp=${DP_BASE_BACKUP_START_TIMESTAMP} +if [ -f $DATA_DIR/xtrabackup_info ]; then + DP_BASE_BACKUP_START_TIME=$(cat $DATA_DIR/xtrabackup_info | grep start_time | awk -F ' = ' '{print $2}'); + baseBackupStartTimestamp=$(date -d"${DP_BASE_BACKUP_START_TIME}" +%s) +fi +log_index_name="archive_log.index" + +function fetch_pitr_binlogs() { + echo "INFO: fetch binlogs from ${DP_BASE_BACKUP_START_TIME}" + for file in $(datasafed list -f --recursive --newer-than ${baseBackupStartTimestamp} / -o json | jq -s -r '.[] | sort_by(.mtime) | .[] | .path' | grep .zst);do + file_without_zst=${file%.*} + dir_path=`dirname ${file_without_zst}` + # mkdir the log directory + mkdir -p ${PITR_DIR}/${dir_path} + datasafed pull ${file} - | zstd -d -o ${PITR_DIR}/${file_without_zst} + echo "${PITR_RELATIVE_PATH}/${file_without_zst}" >> ${PITR_DIR}/${log_index_name} + # check if the binlog file contains the data for recovery time + log_start_time=$(mysqlbinlog ${PITR_DIR}/${file_without_zst} | grep -m 1 "end_log_pos" | awk '{print $1, $2}'|tr -d '#') + log_start_timestamp=$(date -d "${log_start_time}" +%s) + if [[ ${log_start_timestamp} -gt ${DP_RESTORE_TIMESTAMP} ]];then + DP_log "${file} out of range ${DP_RESTORE_TIME}" + break + fi + done +} + +function save_to_restore_file() { + if [ -f ${DATA_DIR}/.xtrabackup_restore_new_cluster ];then + restore_signal_file=${DATA_DIR}/.xtrabackup_restore_new_cluster + else + restore_signal_file=${DATA_DIR}/.restore_new_cluster + fi + echo "archive_log_index=${PITR_RELATIVE_PATH}/${log_index_name}" > ${restore_signal_file} + kb_recover_time=$(date -d "${DP_RESTORE_TIME}" -u '+%Y-%m-%d %H:%M:%S') + echo "recovery_target_datetime=${kb_recover_time}" >> ${restore_signal_file} + sync +} + +fetch_pitr_binlogs + +if [ -f ${PITR_DIR}/${log_index_name} ];then + save_to_restore_file + DP_log "fetch binlog finished." +else + DP_log "didn't get any binlogs." +fi \ No newline at end of file diff --git a/addons/apecloud-mysql/dataprotection/common-scripts.sh b/addons/apecloud-mysql/dataprotection/common-scripts.sh new file mode 100644 index 000000000..8b1057d0c --- /dev/null +++ b/addons/apecloud-mysql/dataprotection/common-scripts.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# log info file +function DP_log() { + msg=$1 + local curr_date=$(date -u '+%Y-%m-%d %H:%M:%S') + echo "${curr_date} INFO: $msg" +} + +# log error info +function DP_error_log() { + msg=$1 + local curr_date=$(date -u '+%Y-%m-%d %H:%M:%S') + echo "${curr_date} ERROR: $msg" +} + +# Get file names without extensions based on the incoming file path +function DP_get_file_name_without_ext() { + local fileName=$1 + local file_without_ext=${fileName%.*} + echo $(basename ${file_without_ext}) +} + +# Save backup status info file for syncing progress. +# timeFormat: %Y-%m-%dT%H:%M:%SZ +function DP_save_backup_status_info() { + local totalSize=$1 + local startTime=$2 + local stopTime=$3 + local timeZone=$4 + local extras=$5 + local timeZoneStr="" + if [ ! -z ${timeZone} ]; then + timeZoneStr=",\"timeZone\":\"${timeZone}\"" + fi + if [ -z "${stopTime}" ];then + echo "{\"totalSize\":\"${totalSize}\"}" > ${DP_BACKUP_INFO_FILE} + elif [ -z "${startTime}" ];then + echo "{\"totalSize\":\"${totalSize}\",\"extras\":[${extras}],\"timeRange\":{\"end\":\"${stopTime}\"${timeZoneStr}}}" > ${DP_BACKUP_INFO_FILE} + else + echo "{\"totalSize\":\"${totalSize}\",\"extras\":[${extras}],\"timeRange\":{\"start\":\"${startTime}\",\"end\":\"${stopTime}\"${timeZoneStr}}}" > ${DP_BACKUP_INFO_FILE} + fi +} + + +# Clean up expired logfiles. +# Default interval is 60s +# Default rootPath is / +function DP_purge_expired_files() { + local currentUnix="${1:?missing current unix}" + local last_purge_time="${2:?missing last_purge_time}" + local root_path=${3:-"/"} + local interval_seconds=${4:-60} + local diff_time=$((${currentUnix}-${last_purge_time})) + if [[ -z ${DP_TTL_SECONDS} || ${diff_time} -lt ${interval_seconds} ]]; then + return + fi + expiredUnix=$((${currentUnix}-${DP_TTL_SECONDS})) + files=$(datasafed list -f --recursive --older-than ${expiredUnix} ${root_path} ) + for file in ${files} + do + datasafed rm ${file} + echo ${file} + done +} + +# analyze the start time of the earliest file from the datasafed backend. +# Then record the file name into dp_oldest_file.info. +# If the oldest file is no changed, exit the process. +# This can save traffic consumption. +function DP_analyze_start_time_from_datasafed() { + local oldest_file="${1:?missing oldest file}" + local get_start_time_from_file="${2:?missing get_start_time_from_file function}" + local datasafed_pull="${3:?missing datasafed_pull function}" + local info_file="${KB_BACKUP_WORKDIR}/dp_oldest_file.info" + mkdir -p ${KB_BACKUP_WORKDIR} && cd ${KB_BACKUP_WORKDIR} + if [ -f ${info_file} ]; then + last_oldest_file=$(cat ${info_file}) + last_oldest_file_name=$(DP_get_file_name_without_ext ${last_oldest_file}) + if [ "$last_oldest_file" == "${oldest_file}" ]; then + # oldest file no changed. + ${get_start_time_from_file} $last_oldest_file_name + return + fi + # remove last oldest file + if [ -f ${last_oldest_file_name} ];then + rm -rf ${last_oldest_file_name} + fi + fi + # pull file + ${datasafed_pull} ${oldest_file} + # record last oldest file + echo ${oldest_file} > ${info_file} + oldest_file_name=$(DP_get_file_name_without_ext ${oldest_file}) + ${get_start_time_from_file} ${oldest_file_name} +} + +# get the timeZone offset for location, such as Asia/Shanghai +function getTimeZoneOffset() { + local timeZone=${1:?missing time zone} + if [[ $timeZone == "+"* ]] || [[ $timeZone == "-"* ]] ; then + echo ${timeZone} + return + fi + local currTime=$(TZ=UTC date) + local utcHour=$(TZ=UTC date -d "${currTime}" +"%H") + local zoneHour=$(TZ=${timeZone} date -d "${currTime}" +"%H") + local offset=$((${zoneHour}-${utcHour})) + if [ $offset -eq 0 ]; then + return + fi + symbol="+" + if [ $offset -lt 0 ]; then + symbol="-" && offset=${offset:1} + fi + if [ $offset -lt 10 ];then + offset="0${offset}" + fi + echo "${symbol}${offset}:00" +} + diff --git a/addons/apecloud-mysql/templates/actionset-pitr.yaml b/addons/apecloud-mysql/templates/actionset-pitr.yaml new file mode 100644 index 000000000..3bdd338d7 --- /dev/null +++ b/addons/apecloud-mysql/templates/actionset-pitr.yaml @@ -0,0 +1,76 @@ +apiVersion: dataprotection.kubeblocks.io/v1alpha1 +kind: ActionSet +metadata: + labels: + clusterdefinition.kubeblocks.io/name: apecloud-mysql + name: apecloud-mysql-pitr +spec: + backupType: Continuous + env: + - name: VOLUME_DATA_DIR + value: {{ .Values.mysqlConfigs.dataMountPath }} + - name: DATA_DIR + value: {{ .Values.mysqlConfigs.dataDir }} + - name: PITR_RELATIVE_PATH + value: pitr-logs + - name: PITR_DIR + value: "$(DATA_DIR)/$(PITR_RELATIVE_PATH)" + - name: CONF_DIR + value: "$(VOLUME_DATA_DIR)/conf" + - name: TIME_FORMAT + value: 2006-01-02T15:04:05Z + - name: TARGET_POD_ROLE + # TODO input by backup policy + value: leader + - name: DP_DB_PORT + value: "3306" + - name: BINLOG_ARCHIVE_INTERVAL + value: "10" + - name: FLUSH_BINLOG_INTERVAL_SECONDS + value: $(DP_ARCHIVE_INTERVAL) + - name: FLUSH_BINLOG_AFTER_SIZE + # if the binlog size greater than 500Mi, will flush the bin log. + value: "524288000" + restore: + prepareData: + image: {{ default .Values.image.registry }}/{{ .Values.image.walgImage.repository }}:{{ .Values.image.walgImage.tag }} + command: + - bash + - -c + - | + #!/bin/bash + set -e; + {{- .Files.Get "dataprotection/common-scripts.sh" | nindent 8 }} + {{- .Files.Get "dataprotection/apecloud-mysql-pitr-restore.sh" | nindent 8 }} + postReady: + - exec: + command: + - bash + - -c + - | + #!/bin/bash + set -e; + echo "INFO: waiting for analysis of archive logs to complete." + while true; do + if [ ! -f {{ .Values.mysqlConfigs.dataDir }}/.xtrabackup_restore_new_cluster ] && [ ! -f {{ .Values.mysqlConfigs.dataDir }}/.restore_new_cluster ];then + break + fi + sleep 1 + done + rm -rf {{ .Values.mysqlConfigs.dataDir }}/pitr-logs; + echo "INFO: remove {{ .Values.mysqlConfigs.dataDir }}/pitr-logs." + backup: + backupData: + image: {{ .Values.image.registry }}/{{ .Values.image.walgImage.repository }}:{{ .Values.image.walgImage.tag }} + runOnTargetPodNode: true + syncProgress: + enabled: true + intervalSeconds: 5 + command: + - bash + - -c + - | + #!/bin/bash + set -e; + {{- .Files.Get "dataprotection/common-scripts.sh" | nindent 8 }} + {{- .Files.Get "dataprotection/apecloud-mysql-pitr-backup.sh" | nindent 8 }} \ No newline at end of file diff --git a/addons/apecloud-mysql/templates/backuppolicytemplate.yaml b/addons/apecloud-mysql/templates/backuppolicytemplate.yaml index 2b2fe0898..e114a444c 100644 --- a/addons/apecloud-mysql/templates/backuppolicytemplate.yaml +++ b/addons/apecloud-mysql/templates/backuppolicytemplate.yaml @@ -37,6 +37,16 @@ spec: volumeMounts: - name: data mountPath: {{ .Values.mysqlConfigs.dataMountPath }} + - name: archive-binlog + target: + role: leader + account: root + snapshotVolumes: false + actionSetName: apecloud-mysql-pitr + targetVolumes: + volumeMounts: + - name: data + mountPath: {{ .Values.mysqlConfigs.dataMountPath }} schedules: - backupMethod: xtrabackup enabled: false @@ -50,3 +60,7 @@ spec: enabled: false cronExpression: "0 18 * * *" retentionPeriod: 7d + - backupMethod: archive-binlog + enabled: false + cronExpression: "*/30 * * * *" + retentionPeriod: 8d diff --git a/addons/apecloud-mysql/values.yaml b/addons/apecloud-mysql/values.yaml index e6aae9798..719fc52bb 100644 --- a/addons/apecloud-mysql/values.yaml +++ b/addons/apecloud-mysql/values.yaml @@ -11,6 +11,9 @@ image: syncer: repository: apecloud/syncer tag: 0.4.1 + walgImage: + repository: apecloud/wal-g + tag: mysql-8.0 ## MySQL Cluster parameters cluster: diff --git a/addons/minio/templates/cmpd.yaml b/addons/minio/templates/cmpd.yaml index 64cb93ae2..e2a3ffee2 100644 --- a/addons/minio/templates/cmpd.yaml +++ b/addons/minio/templates/cmpd.yaml @@ -87,6 +87,16 @@ spec: - name: readwrite updatePriority: 1 participatesInQuorum: false + policyRules: + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - patch + - update lifecycleActions: roleProbe: diff --git a/addons/mysql/dataprotection/common-scripts.sh b/addons/mysql/dataprotection/common-scripts.sh new file mode 100644 index 000000000..8b1057d0c --- /dev/null +++ b/addons/mysql/dataprotection/common-scripts.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# log info file +function DP_log() { + msg=$1 + local curr_date=$(date -u '+%Y-%m-%d %H:%M:%S') + echo "${curr_date} INFO: $msg" +} + +# log error info +function DP_error_log() { + msg=$1 + local curr_date=$(date -u '+%Y-%m-%d %H:%M:%S') + echo "${curr_date} ERROR: $msg" +} + +# Get file names without extensions based on the incoming file path +function DP_get_file_name_without_ext() { + local fileName=$1 + local file_without_ext=${fileName%.*} + echo $(basename ${file_without_ext}) +} + +# Save backup status info file for syncing progress. +# timeFormat: %Y-%m-%dT%H:%M:%SZ +function DP_save_backup_status_info() { + local totalSize=$1 + local startTime=$2 + local stopTime=$3 + local timeZone=$4 + local extras=$5 + local timeZoneStr="" + if [ ! -z ${timeZone} ]; then + timeZoneStr=",\"timeZone\":\"${timeZone}\"" + fi + if [ -z "${stopTime}" ];then + echo "{\"totalSize\":\"${totalSize}\"}" > ${DP_BACKUP_INFO_FILE} + elif [ -z "${startTime}" ];then + echo "{\"totalSize\":\"${totalSize}\",\"extras\":[${extras}],\"timeRange\":{\"end\":\"${stopTime}\"${timeZoneStr}}}" > ${DP_BACKUP_INFO_FILE} + else + echo "{\"totalSize\":\"${totalSize}\",\"extras\":[${extras}],\"timeRange\":{\"start\":\"${startTime}\",\"end\":\"${stopTime}\"${timeZoneStr}}}" > ${DP_BACKUP_INFO_FILE} + fi +} + + +# Clean up expired logfiles. +# Default interval is 60s +# Default rootPath is / +function DP_purge_expired_files() { + local currentUnix="${1:?missing current unix}" + local last_purge_time="${2:?missing last_purge_time}" + local root_path=${3:-"/"} + local interval_seconds=${4:-60} + local diff_time=$((${currentUnix}-${last_purge_time})) + if [[ -z ${DP_TTL_SECONDS} || ${diff_time} -lt ${interval_seconds} ]]; then + return + fi + expiredUnix=$((${currentUnix}-${DP_TTL_SECONDS})) + files=$(datasafed list -f --recursive --older-than ${expiredUnix} ${root_path} ) + for file in ${files} + do + datasafed rm ${file} + echo ${file} + done +} + +# analyze the start time of the earliest file from the datasafed backend. +# Then record the file name into dp_oldest_file.info. +# If the oldest file is no changed, exit the process. +# This can save traffic consumption. +function DP_analyze_start_time_from_datasafed() { + local oldest_file="${1:?missing oldest file}" + local get_start_time_from_file="${2:?missing get_start_time_from_file function}" + local datasafed_pull="${3:?missing datasafed_pull function}" + local info_file="${KB_BACKUP_WORKDIR}/dp_oldest_file.info" + mkdir -p ${KB_BACKUP_WORKDIR} && cd ${KB_BACKUP_WORKDIR} + if [ -f ${info_file} ]; then + last_oldest_file=$(cat ${info_file}) + last_oldest_file_name=$(DP_get_file_name_without_ext ${last_oldest_file}) + if [ "$last_oldest_file" == "${oldest_file}" ]; then + # oldest file no changed. + ${get_start_time_from_file} $last_oldest_file_name + return + fi + # remove last oldest file + if [ -f ${last_oldest_file_name} ];then + rm -rf ${last_oldest_file_name} + fi + fi + # pull file + ${datasafed_pull} ${oldest_file} + # record last oldest file + echo ${oldest_file} > ${info_file} + oldest_file_name=$(DP_get_file_name_without_ext ${oldest_file}) + ${get_start_time_from_file} ${oldest_file_name} +} + +# get the timeZone offset for location, such as Asia/Shanghai +function getTimeZoneOffset() { + local timeZone=${1:?missing time zone} + if [[ $timeZone == "+"* ]] || [[ $timeZone == "-"* ]] ; then + echo ${timeZone} + return + fi + local currTime=$(TZ=UTC date) + local utcHour=$(TZ=UTC date -d "${currTime}" +"%H") + local zoneHour=$(TZ=${timeZone} date -d "${currTime}" +"%H") + local offset=$((${zoneHour}-${utcHour})) + if [ $offset -eq 0 ]; then + return + fi + symbol="+" + if [ $offset -lt 0 ]; then + symbol="-" && offset=${offset:1} + fi + if [ $offset -lt 10 ];then + offset="0${offset}" + fi + echo "${symbol}${offset}:00" +} + diff --git a/addons/mysql/dataprotection/mysql-pitr-backup.sh b/addons/mysql/dataprotection/mysql-pitr-backup.sh new file mode 100644 index 000000000..30c518ca7 --- /dev/null +++ b/addons/mysql/dataprotection/mysql-pitr-backup.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# export wal-g environments +if [ ! -z "${MYSQL_ADMIN_PASSWORD}" ]; then + DP_DB_PASSWORD=${MYSQL_ADMIN_PASSWORD} + DP_DB_USER=${MYSQL_ADMIN_USER} +fi +export WALG_MYSQL_DATASOURCE_NAME="${DP_DB_USER}:${DP_DB_PASSWORD}@tcp(${DP_DB_HOST}:${DP_DB_PORT})/mysql" +export WALG_COMPRESSION_METHOD=zstd +# use datasafed and default config +export WALG_DATASAFED_CONFIG="" +export PATH="$PATH:$DP_DATASAFED_BIN_PATH" +export WALG_MYSQL_CHECK_GTIDS=true +export MYSQL_PWD=${DP_DB_PASSWORD} +# work directory to save necessary file for backup +export KB_BACKUP_WORKDIR=${VOLUME_DATA_DIR}/kb-backup + +# get binlog basename +MYSQL_CMD="mysql -u ${DP_DB_USER} -h ${DP_DB_HOST} -N" +MYSQL_CMD_WITH_COL="mysql -u ${DP_DB_USER} -h ${DP_DB_HOST}" +log_bin_basename=$(${MYSQL_CMD} -e "SHOW VARIABLES LIKE 'log_bin_basename';" | awk -F'\t' '{print $2}') +if [ -z "${log_bin_basename}" ]; then + echo "ERROR: pod/${DP_TARGET_POD_NAME} connect failed." + exit 1 +fi +LOG_DIR=$(dirname "$log_bin_basename") +LOG_PREFIX=$(basename "$log_bin_basename") + +global_latest_bin_log="" +global_last_flush_logs_time=$(date +%s) +global_last_purge_time=$(date +%s) +global_old_size=0 +global_flush_bin_logs_interval=600 + +if [[ ${DP_ARCHIVE_INTERVAL} =~ ^[0-9]+s$ ]];then + global_flush_bin_logs_interval=${DP_ARCHIVE_INTERVAL%s} +fi + +global_backup_in_secondary= +if [ "${TARGET_POD_ROLE}" == "primary" ]; then + global_backup_in_secondary=f +elif [ "${TARGET_POD_ROLE}" == "secondary" ]; then + global_backup_in_secondary=t +fi + +# checks if the mysql process is ok +function check_mysql_process() { + is_ok=false + sql="show slave status\G" + slave_note="Slave_IO_Running: Yes" + if [ "${USE_REPLICA_STATUS}" == "true" ]; then + sql="show replica status\G" + slave_note="Replica_IO_Running: Yes" + fi + for ((i=1;i<4;i++));do + is_secondary=$(${MYSQL_CMD_WITH_COL} -e "${sql}" 2>/dev/null | grep "${slave_note}" &>/dev/null && echo "t" || echo "f") + if [[ $? -eq 0 && (-z ${TARGET_POD_ROLE} || "${global_backup_in_secondary}" == "${is_secondary}") ]]; then + is_ok=true + break + fi + DP_error_log "target backup pod/${DP_TARGET_POD_NAME} is not OK, target role: ${TARGET_POD_ROLE}, is_secondary: ${is_secondary}, retry detection!" + sleep 1 + done + if [[ ${is_ok} == "false" ]];then + DP_error_log "target backup pod/${DP_TARGET_POD_NAME} is not OK, target role: ${TARGET_POD_ROLE}, is_secondary: ${is_secondary}" + exit 1 + fi +} + +# clean up expired logfiles, interval is 60s +function purge_expired_files() { + export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + local currentUnix=$(date +%s) + info=$(DP_purge_expired_files "${currentUnix}" "${global_last_purge_time}") + if [ ! -z "${info}" ]; then + global_last_purge_time=${currentUnix} + DP_log "cleanup expired binlog files: ${info}" + local TOTAL_SIZE=$(datasafed stat / | grep TotalSize | awk '{print $2}') + DP_save_backup_status_info "${TOTAL_SIZE}" + fi +} + +# flush bin logs, interval is 600s by default +function flush_binlogs() { + local binlog=$(ls -Ft ${LOG_DIR}/|grep -e "^${LOG_PREFIX}.*[[:digit:]]$" |head -n 1) + if [ -z ${binlog} ]; then + return + fi + local curr_time=$(date +%s) + # if size greater than FLUSH_BINLOG_AFTER_SIZE, will flush binary logs. + if [ $(stat -c%s ${LOG_DIR}/${binlog}) -gt "${FLUSH_BINLOG_AFTER_SIZE}" ]; then + DP_log "flush binary logs" + ${MYSQL_CMD} -e "flush binary logs"; + global_last_flush_logs_time=${curr_time} + return + fi + local diff_time=$((${curr_time}-${global_last_flush_logs_time})) + if [[ ${diff_time} -lt ${global_flush_bin_logs_interval} ]]; then + return + fi + local LATEST_TRANS=$(mysqlbinlog "${LOG_DIR}/${binlog}" |grep 'Xid =' |head -n 1) + # only flush bin logs when Xid exists + if [[ -n "${LATEST_TRANS}" ]]; then + DP_log "flush binary logs" + ${MYSQL_CMD} -e "flush binary logs"; + fi + global_last_flush_logs_time=${curr_time} +} + +# upload bin logs by walg +function upload_bin_logs() { + export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + global_latest_bin_log=$(ls -Ftr "${LOG_DIR}"/|grep -e "^${LOG_PREFIX}.*[[:digit:]]$"|tail -n 1) + if [ ! -z "${global_latest_bin_log}" ];then + global_latest_bin_log="${LOG_DIR}/${global_latest_bin_log}" + fi + wal-g binlog-push; +} + +# get binlog start time +function get_binlog_start_time() { + local binlog="${1:?missing binlog name}" + local time=$(mysqlbinlog "${binlog}" | grep -m 1 "end_log_pos" | awk '{print $1, $2}'|tr -d '#') + local time=$(date -d "$time" -u '+%Y-%m-%dT%H:%M:%SZ') + echo $time +} + +# pull binlog and decompress +function pull_binlog() { + file="${1:?missing file name}" + fileName=$(basename "${file}") + datasafed pull "${file}" "${fileName}" + zstd -d --rm "${fileName}" +} + +# get the start time for backup.status.timeRange +function get_start_time_for_range() { + local oldest_bin_log=$(datasafed list -f --recursive / -o json | jq -s -r '.[] | sort_by(.mtime) | .[] | .path' | grep .zst | head -n 1) + if [ ! -z ${oldest_bin_log} ]; then + START_TIME=$(DP_analyze_start_time_from_datasafed "${oldest_bin_log}" get_binlog_start_time pull_binlog) + echo ${START_TIME} + fi +} + +# save backup status info to sync file +function save_backup_status() { + export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" + local TOTAL_SIZE=$(datasafed stat / | grep TotalSize | awk '{print $2}') + # if no size changes, return + if [[ ${TOTAL_SIZE} == ${global_old_size} ]];then + return + fi + global_old_size=${TOTAL_SIZE} + local START_TIME=$(get_start_time_for_range) + local STOP_TIME=$(get_binlog_start_time ${global_latest_bin_log}) + DP_save_backup_status_info "${TOTAL_SIZE}" "${START_TIME}" "${STOP_TIME}" +} + +cleanup_mysql_binlogs() { + + # Get synced binlog files from all replicas + function get_synced_binlogs() { + + readarray -t all_binlogs < <(ls -1 "$LOG_DIR"/*-bin.[0-9]* | sort -V) + + # TODO: KB_ITS_.*_HOSTNAME will be removed in kb1.0 and needs to be modified accordingly + local REPLICA_HOSTS=($(env | grep "KB_ITS_.*_HOSTNAME" | cut -d= -f2 | grep -v "^${DP_DB_HOST}$")) + + # Check synchronization status of each replica + for host in "${REPLICA_HOSTS[@]}"; do + local status_output=$( + mysql -u"${DP_DB_USER}" -h"$host" -p"${DP_DB_PASSWORD}" -N -e "SHOW REPLICA STATUS\G" 2>/dev/null || + mysql -u"${DP_DB_USER}" -h"$host" -p"${DP_DB_PASSWORD}" -N -e "SHOW SLAVE STATUS\G" + ) + local current_file=$(echo "$status_output" | grep -o "${DP_TARGET_POD_NAME}-bin\.[0-9]*" | tail -n1) + + if [[ -z "$current_file" ]]; then + return 1 + fi + + if [[ -z "$min_synced_file" ]] || [[ "$current_file" < "$min_synced_file" ]]; then + min_synced_file="$current_file" + fi + done + + if [[ -z "$min_synced_file" ]]; then + return 1 + fi + + local result_files="" + for binlog in "${all_binlogs[@]}"; do + local basename_binlog=$(basename "$binlog") + if [[ "$basename_binlog" > "$min_synced_file" || "$basename_binlog" == "$min_synced_file" ]]; then + break + fi + result_files="$result_files $basename_binlog" + done + + echo "${result_files# }" + } + + # Get the list of binlog files that have been uploaded to backup storage + function get_uploaded_binlogs() { + datasafed list -f --recursive / -o json \ + | jq -s -r ".[] | sort_by(.mtime) | .[] | .path" \ + | grep "\.zst$" \ + | grep "${DP_TARGET_POD_NAME}" \ + | xargs -I {} basename {} .zst \ + | paste -sd ' ' - + } + + # Clean up old binlog files at master node that have been both synced and uploaded + function purge_master_binlogs() { + local synced_files="$1" + local uploaded_files="$2" + + # Get all binlog files sorted by sequence number + local all_binlogs=($(ls -1 "$LOG_DIR"/*[!.index] | sort -V)) + local total_files=${#all_binlogs[@]} + + # If total files <= 5, no need to purge + if [[ $total_files -le 5 ]]; then + echo "Only $total_files binlog files, no need to purge" + return + fi + + # Get the latest 5 binlog files + local latest_binlogs=$(printf "%s\n" "${all_binlogs[@]: -4}" | xargs -n1 basename) + + for binlog_file in "${all_binlogs[@]}"; do + if [ ! -f "$binlog_file" ]; then + continue + fi + + local base_name=$(basename "$binlog_file") + + # Skip if it's one of the latest 5 files + if echo "$latest_binlogs" | grep -q "$base_name"; then + echo "Keeping $base_name (one of latest 5 binlog files)" + continue + fi + + # Original logic: check if synced and uploaded + if echo "$synced_files" | grep -q "$base_name" && echo "$uploaded_files" | grep -q "$base_name"; then + echo "Purging binary log: $base_name from master host" + + if mysql -u"${DP_DB_USER}" -h"${DP_DB_HOST}" -p"${DP_DB_PASSWORD}" -N -e \ + "PURGE BINARY LOGS TO '$base_name'" &>/dev/null; then + echo "Successfully purged binary log: $base_name on master host ${DP_DB_HOST}" + else + echo "Failed to connect or purge binary log: $base_name on master host ${DP_DB_HOST}" + fi + else + echo "Keeping $base_name (not yet synced or uploaded)" + fi + done + } + + # Purge all binlog files on replica except for the latest 5 files + function purge_replica_binlogs() { + local REPLICA_HOSTS=($(env | grep "KB_ITS_.*_HOSTNAME" | cut -d= -f2 | grep -v "^${DP_DB_HOST}$")) + + for host in "${REPLICA_HOSTS[@]}"; do + echo "Processing replica host: $host" + + # Get all binlog files on this replica, sorted by sequence number + local binlog_files=$(mysql -u"${DP_DB_USER}" -h"$host" -p"${DP_DB_PASSWORD}" -N -e \ + "SHOW BINARY LOGS" 2>/dev/null | awk '{print $1}' | sort -V) + + if [[ -z "$binlog_files" ]]; then + echo "Failed to get binary logs from replica host $host, skipping..." + continue + fi + + # Count total number of binlog files + local total_files=$(echo "$binlog_files" | wc -l) + + # If total files <= 5, no need to purge + if [[ $total_files -le 5 ]]; then + echo "Only $total_files binlog files on $host, no need to purge" + continue + fi + + # Get the target binlog (files to keep are after this one) + local files_to_delete=$((total_files - 4)) + local target_binlog=$(echo "$binlog_files" | head -n $files_to_delete | tail -n 1) + + # Execute PURGE BINARY LOGS command + if mysql -u"${DP_DB_USER}" -h"$host" -p"${DP_DB_PASSWORD}" -N -e \ + "PURGE BINARY LOGS TO '$target_binlog'" &>/dev/null; then + echo "Successfully purged binary logs up to $target_binlog on replica host $host" + else + echo "Failed to connect or purge binary logs on replica host $host" + fi + done + } + + # Get list of synced binlogs + local synced_binlogs=$(get_synced_binlogs) + if [ $? -ne 0 ] || [ -z "$synced_binlogs" ]; then + echo "No synced binlog files found" + return 0 + fi + + # Get list of uploaded binlogs + local uploaded_binlogs=$(get_uploaded_binlogs) + if [ -z "$uploaded_binlogs" ]; then + echo "No uploaded binlog files found" + return 0 + fi + + # Execute cleanup process + purge_master_binlogs "$synced_binlogs" "$uploaded_binlogs" + purge_replica_binlogs +} + +# trap term signal +trap "echo 'Terminating...' && sync && exit 0" TERM +DP_log "start to archive binlog" +if [ -f "${VOLUME_DATA_DIR}/binlog.000004" ]; then + # will create a binlog.000004 after hscale by xtrabckup. + cp ${VOLUME_DATA_DIR}/binlog.000004 ${VOLUME_DATA_DIR}/binlog/binlog.000004 +fi +while true; do + # check if mysql process is ok + check_mysql_process + + # flush bin logs + flush_binlogs + + # upload bin log + upload_bin_logs + + # save backup status which will be updated to `backup` CR by the sidecar + save_backup_status + + # purge the expired bin logs + purge_expired_files + + # clean up synced and uploaded binary log files when disk usage >= 80% + disk_usage=$(df -h ${LOG_DIR} | awk 'NR==2 {print $5}' | cut -d'%' -f1) + if [ -n "${disk_usage}" ] && [ "${disk_usage}" -ge 80 ] && [ "${PURGE_BINLOG}" = "on" ]; then + echo "Executing cleanup_mysql_binlogs due to: Disk usage is ${disk_usage}% (>= 80%)" + cleanup_mysql_binlogs + fi + + sleep "${BINLOG_ARCHIVE_INTERVAL}" +done \ No newline at end of file diff --git a/addons/mysql/dataprotection/mysql-pitr-restore.sh b/addons/mysql/dataprotection/mysql-pitr-restore.sh new file mode 100644 index 000000000..1771a6612 --- /dev/null +++ b/addons/mysql/dataprotection/mysql-pitr-restore.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Refer: https://github.com/wal-g/wal-g/blob/master/docs/MySQL.md#mysql---using-with-xtrabackup +# +# export wal-g environments +export WALG_MYSQL_DATASOURCE_NAME="${MYSQL_ADMIN_USER}:${MYSQL_ADMIN_PASSWORD}@tcp(${DP_DB_HOST}:${DP_DB_PORT})/mysql" +export WALG_COMPRESSION_METHOD=zstd +# use datasafed and default config +export WALG_DATASAFED_CONFIG="" +export PATH="$PATH:$DP_DATASAFED_BIN_PATH" +export WALG_MYSQL_CHECK_GTIDS=true +export MYSQL_PWD=${MYSQL_ADMIN_PASSWORD} +export DATASAFED_BACKEND_BASE_PATH="$DP_BACKUP_BASE_PATH" +export WALG_MYSQL_BINLOG_DST=${PITR_DIR} +export WALG_MYSQL_BINLOG_REPLAY_COMMAND="mysqlbinlog --stop-datetime=\"\$WALG_MYSQL_BINLOG_END_TS\" \"\$WALG_MYSQL_CURRENT_BINLOG\" | grep -v 'INSERT INTO kubeblocks.kb_health_check' | mysql -u ${MYSQL_ADMIN_USER} -h ${DP_DB_HOST}" + +# If pitr logs dir exists, it may be created by previous failed restore. +if [ -d "$WALG_MYSQL_BINLOG_DST" ]; then + DP_log "pitr logs dir $WALG_MYSQL_BINLOG_DST exists, may be created by previous failed restore, exit" + exit 1 +fi + +DP_log "mkdir -p $WALG_MYSQL_BINLOG_DST" +mkdir -p "$WALG_MYSQL_BINLOG_DST" + +DP_log "wal-g binlog-replay --since-time=${DP_BASE_BACKUP_START_TIME} --until=${DP_RESTORE_TIME}" +wal-g binlog-replay --since-time="${DP_BASE_BACKUP_START_TIME}" --until="${DP_RESTORE_TIME}" +echo "mysql binlog replay done." \ No newline at end of file diff --git a/addons/mysql/templates/actionset-pitr.yaml b/addons/mysql/templates/actionset-pitr.yaml new file mode 100644 index 000000000..d24a4f00c --- /dev/null +++ b/addons/mysql/templates/actionset-pitr.yaml @@ -0,0 +1,64 @@ +apiVersion: dataprotection.kubeblocks.io/v1alpha1 +kind: ActionSet +metadata: + labels: + {{- include "mysql.labels" . | nindent 4 }} + name: mysql-pitr +spec: + backupType: Continuous + env: + - name: VOLUME_DATA_DIR + value: "{{ .Values.dataMountPath }}" + - name: DATA_DIR + value: "{{ .Values.dataMountPath }}/data" + - name: PITR_RELATIVE_PATH + value: pitr-logs + - name: PITR_DIR + value: "$(VOLUME_DATA_DIR)/$(PITR_RELATIVE_PATH)" + - name: CONF_DIR + value: "$(VOLUME_DATA_DIR)/conf" + - name: TIME_FORMAT + value: 2006-01-02T15:04:05Z + - name: TARGET_POD_ROLE + value: primary + - name: DP_DB_PORT + value: "3306" + - name: BINLOG_ARCHIVE_INTERVAL + value: "10" + - name: FLUSH_BINLOG_INTERVAL_SECONDS + value: $(DP_ARCHIVE_INTERVAL) + - name: FLUSH_BINLOG_AFTER_SIZE + # if the binlog size greater than 500Mi, will flush the bin log. + value: "524288000" + - name: USE_REPLICA_STATUS + value: "false" + - name: PURGE_BINLOG + value: "on" + backup: + backupData: + image: {{ .Values.image.registry }}/{{ .Values.image.walgImage.repository }}:{{ .Values.image.walgImage.tag }} + runOnTargetPodNode: true + syncProgress: + enabled: true + intervalSeconds: 5 + command: + - bash + - -c + - | + #!/bin/bash + set -e; + {{- .Files.Get "dataprotection/common-scripts.sh" | nindent 8 }} + {{- .Files.Get "dataprotection/mysql-pitr-backup.sh" | nindent 8 }} + restore: + postReady: + - job: + image: {{ .Values.image.registry }}/{{ .Values.image.walgImage.repository }}:{{ .Values.image.walgImage.tag }} + runOnTargetPodNode: true + command: + - bash + - -c + - | + #!/bin/bash + set -e; + {{- .Files.Get "dataprotection/common-scripts.sh" | nindent 10 }} + {{- .Files.Get "dataprotection/mysql-pitr-restore.sh" | nindent 10 }} \ No newline at end of file diff --git a/addons/mysql/templates/backuppolicytemplate.yaml b/addons/mysql/templates/backuppolicytemplate.yaml index 101bdfa26..9a18ab028 100644 --- a/addons/mysql/templates/backuppolicytemplate.yaml +++ b/addons/mysql/templates/backuppolicytemplate.yaml @@ -10,7 +10,7 @@ spec: target: role: secondary fallbackRole: primary - account: root + account: kbadmin backupMethods: - name: xtrabackup snapshotVolumes: false @@ -64,6 +64,23 @@ spec: volumeMounts: - name: data mountPath: {{ .Values.dataMountPath }} + - name: archive-binlog + target: + role: primary + account: kbadmin + snapshotVolumes: false + actionSetName: mysql-pitr + env: + - name: USE_REPLICA_STATUS + valueFrom: + versionMapping: + - serviceVersions: + - "8.4" + mappedValue: "true" + targetVolumes: + volumeMounts: + - name: data + mountPath: {{ .Values.dataMountPath }} schedules: - backupMethod: xtrabackup enabled: false @@ -76,4 +93,8 @@ spec: - backupMethod: volume-snapshot enabled: false cronExpression: "0 18 * * 0" - retentionPeriod: 7d \ No newline at end of file + retentionPeriod: 7d + - backupMethod: archive-binlog + enabled: false + cronExpression: "*/30 * * * *" + retentionPeriod: 8d \ No newline at end of file diff --git a/addons/mysql/values.yaml b/addons/mysql/values.yaml index 04d2dc6b4..936728e8c 100644 --- a/addons/mysql/values.yaml +++ b/addons/mysql/values.yaml @@ -21,6 +21,9 @@ image: orcTools: repository: apecloud/orc-tools tag: 1.0.3 + walgImage: + repository: apecloud/wal-g-mysql + tag: 2.0.1-1-ubuntu ## MySQL Cluster parameters cluster: