diff --git a/.github/workflows/.deploy.yml b/.github/workflows/.deploy.yml index b3ee203e6..49c0dc175 100644 --- a/.github/workflows/.deploy.yml +++ b/.github/workflows/.deploy.yml @@ -114,24 +114,26 @@ jobs: timeout-minutes: 10 strategy: matrix: - name: [backend, frontend, oracle-api] + name: [backend, backup, frontend, oracle-api] include: - name: backend file: backend/openshift.deploy.yml - overwrite: true parameters: -p AWS_COGNITO_ISSUER_URI=https://cognito-idp.ca-central-1.amazonaws.com/${{ vars.VITE_USER_POOLS_ID }} verification_path: "health" + - name: backup + file: common/openshift.backup.yml + parameters: + -p PG_DB_IMAGE=postgis/postgis:15-master + post_rollout: oc create job --from=cronjob/${{ github.event.repository.name }}-${{ inputs.target }}-database-backup ${{ github.event.repository.name }}-${{ inputs.target }}-database-backup-$(date +%Y%m%d%H%M%S) - name: frontend file: frontend/openshift.deploy.yml - overwrite: true parameters: -p FAM_MODDED_ZONE=${{ needs.init.outputs.fam-modded-zone }} -p VITE_SPAR_BUILD_VERSION=snapshot-${{ inputs.target || github.event.number }} -p VITE_USER_POOLS_ID=${{ vars.VITE_USER_POOLS_ID }} - name: oracle-api file: oracle-api/openshift.deploy.yml - overwrite: true parameters: -p AWS_COGNITO_ISSUER_URI=https://cognito-idp.ca-central-1.amazonaws.com/${{ vars.VITE_USER_POOLS_ID }} ${{ github.event_name == 'pull_request' && '-p CPU_LIMIT=100m' || '' }} @@ -147,13 +149,14 @@ jobs: oc_namespace: ${{ vars.OC_NAMESPACE }} oc_server: ${{ vars.OC_SERVER }} oc_token: ${{ secrets.OC_TOKEN }} - overwrite: ${{ matrix.overwrite }} + overwrite: true parameters: -p TAG=${{ inputs.tag }} -p ZONE=${{ inputs.target }} ${{ github.event_name == 'pull_request' && '-p MIN_REPLICAS=1' || '' }} ${{ github.event_name == 'pull_request' && '-p MAX_REPLICAS=1' || '' }} ${{ matrix.parameters }} + post_rollout: ${{ matrix.post_rollout || '' }} verification_path: ${{ matrix.verification_path }} verification_retry_attempts: 5 verification_retry_seconds: 20 diff --git a/common/openshift.backup.yml b/common/openshift.backup.yml new file mode 100644 index 000000000..c5ae2b3d5 --- /dev/null +++ b/common/openshift.backup.yml @@ -0,0 +1,239 @@ +apiVersion: template.openshift.io/v1 +kind: Template +labels: + app: ${NAME}-${ZONE} + app.kubernetes.io/part-of: ${NAME}-${ZONE} +parameters: + - name: NAME + description: Product name + value: nr-spar + - name: COMPONENT + description: Component name + value: database-backup + - name: ZONE + description: Deployment zone, e.g. pr-### or prod + required: true + - name: RESTORE_DIR + description: Directory to be used for restoring the backup + value: /tmp/restore + - name: REGISTRY + description: Container registry to import from (internal is image-registry.openshift-image-registry.svc:5000) + value: ghcr.io + - name: BACKUP_DIR + description: "The name of the root backup directory" + required: true + value: /tmp/backup + - name: NUM_BACKUPS + description: The number of backup files to be retained + required: false + value: "5" + - name: "JOB_SERVICE_ACCOUNT" + description: "Name of the Service Account To Exeucte the Job As." + value: "default" + required: true + - name: "SUCCESS_JOBS_HISTORY_LIMIT" + description: "The number of successful jobs that will be retained" + value: "5" + required: true + - name: "FAILED_JOBS_HISTORY_LIMIT" + description: "The number of failed jobs that will be retained" + value: "2" + required: true + - name: "JOB_BACKOFF_LIMIT" + description: "The number of attempts to try for a successful job outcome" + value: "0" + - name: PVC_SIZE + description: Volume space available for data, e.g. 512Mi, 2Gi. + value: 256Mi + - name: PG_DB_IMAGE + description: PostgreSQL Image (namespace/name:tag) to be used for backup + required: true + - name: TAG + description: Dummy param, for convenience + - name: MIN_REPLICAS + description: Dummy param, for convenience + - name: MAX_REPLICAS + description: Dummy param, for convenience +objects: + - kind: PersistentVolumeClaim + apiVersion: v1 + metadata: + name: ${NAME}-${ZONE}-${COMPONENT} + labels: + app: ${NAME}-${ZONE} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "${PVC_SIZE}" + storageClassName: netapp-file-standard + - kind: CronJob + apiVersion: "batch/v1" + metadata: + name: ${NAME}-${ZONE}-${COMPONENT} + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + schedule: "0 0 * * *" + concurrencyPolicy: "Replace" + successfulJobsHistoryLimit: ${{SUCCESS_JOBS_HISTORY_LIMIT}} + failedJobsHistoryLimit: ${{FAILED_JOBS_HISTORY_LIMIT}} + jobTemplate: + metadata: + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + backoffLimit: ${{JOB_BACKOFF_LIMIT}} + template: + metadata: + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + containers: + - name: ${NAME}-${ZONE}-${COMPONENT} + image: ${REGISTRY}/${PG_DB_IMAGE} + command: ["/bin/sh", "-c"] + args: + - | + pg_dump \ + -U ${POSTGRESQL_USER} \ + -h ${NAME}-${ZONE}-database \ + -d ${POSTGRESQL_DATABASE} \ + --data-only \ + --schema=nr-spar \ + --inserts \ + --no-comments \ + --on-conflict-do-nothing \ + --no-sync \ + --exclude-table=nr-spar.cone_collection_method_list \ + --exclude-table=nr-spar.gametic_methodology_list \ + --exclude-table=nr-spar.genetic_class_list \ + --exclude-table=nr-spar.genetic_worth_list \ + --exclude-table=nr-spar.method_of_payment_list \ + --exclude-table=nr-spar.seedlot_source_list \ + --exclude-table=nr-spar.seedlot_status_list \ + --exclude-table=nr-spar.etl_execution_log_hist \ + --exclude-table=nr-spar.etl_execution_map \ + --exclude-table=nr-spar.etl_execution_schedule \ + --file=${BACKUP_DIR}/backup_$(date +%Y-%m-%d).sql \ + && + find "${BACKUP_DIR}" -type f -mtime +$NUM_BACKUPS -exec rm -f {} \; && + cp -r ${BACKUP_DIR}/backup_$(date +%Y-%m-%d).sql ${RESTORE_DIR}/W0__restore.sql + volumeMounts: + - mountPath: "${BACKUP_DIR}" + name: ${NAME}-${ZONE}-${COMPONENT} + - mountPath: "${RESTORE_DIR}" + name: ${NAME}-${ZONE}-${COMPONENT} + env: + - name: RESTORE_DIR + value: "${RESTORE_DIR}" + - name: BACKUP_DIR + value: "${BACKUP_DIR}" + - name: NUM_BACKUPS + value: "${NUM_BACKUPS}" + - name: POSTGRESQL_DATABASE + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-name + - name: POSTGRESQL_USER + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-user + - name: POSTGRESQL_PASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-password + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-password + volumes: + - name: ${NAME}-${ZONE}-${COMPONENT} + persistentVolumeClaim: + claimName: ${NAME}-${ZONE}-${COMPONENT} + restartPolicy: "Never" + terminationGracePeriodSeconds: 30 + activeDeadlineSeconds: 1600 + dnsPolicy: "ClusterFirst" + serviceAccountName: "${JOB_SERVICE_ACCOUNT}" + serviceAccount: "${JOB_SERVICE_ACCOUNT}" + - kind: CronJob + apiVersion: "batch/v1" + metadata: + name: ${NAME}-${ZONE}-${COMPONENT}-restore + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + schedule: "0 0 31 2 *" + concurrencyPolicy: "Replace" + successfulJobsHistoryLimit: ${{SUCCESS_JOBS_HISTORY_LIMIT}} + failedJobsHistoryLimit: ${{FAILED_JOBS_HISTORY_LIMIT}} + jobTemplate: + metadata: + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + backoffLimit: ${{JOB_BACKOFF_LIMIT}} + template: + metadata: + labels: + app: ${NAME}-${ZONE} + cronjob: ${NAME}-${ZONE} + spec: + containers: + - name: ${NAME}-${ZONE}-${COMPONENT}-restore + image: ${REGISTRY}/${PG_DB_IMAGE} + command: ["/bin/sh", "-c"] + args: + - | + find ${RESTORE_DIR} -type f -name "*.sql" -print0 | sort -zV | + while IFS= read -r -d '' sql_file; do + echo "Running SQL file: $sql_file" + psql -h ${TARGET_HOST} -U ${POSTGRESQL_USER} -d ${POSTGRESQL_DATABASE} -f $sql_file + done + volumeMounts: + - mountPath: "${RESTORE_DIR}" + name: ${NAME}-${ZONE}-${COMPONENT} + env: + - name: RESTORE_DIR + value: "${RESTORE_DIR}" + - name: POSTGRESQL_DATABASE + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-name + - name: POSTGRESQL_USER + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-user + - name: POSTGRESQL_PASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-password + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-database + key: database-password + volumes: + - name: ${NAME}-${ZONE}-${COMPONENT} + persistentVolumeClaim: + claimName: ${NAME}-${ZONE}-${COMPONENT} + restartPolicy: "Never" + terminationGracePeriodSeconds: 30 + activeDeadlineSeconds: 1600 + dnsPolicy: "ClusterFirst" + serviceAccountName: "${JOB_SERVICE_ACCOUNT}" + serviceAccount: "${JOB_SERVICE_ACCOUNT}"