From 14ba7beb8dd8ee3ce2415aae8d3a84bb6cb52a57 Mon Sep 17 00:00:00 2001 From: IAGO HENRIQUE Date: Tue, 9 May 2023 11:37:07 -0300 Subject: [PATCH 1/4] Added upgrade offering notification and added upgrade offering to chg registry --- dbaas/api/task.py | 1 + .../task_auto_upgrade_vm_offering.py | 3 ++- .../auto_upgrade_offering_notification.html | 20 ++++++++++++++ .../auto_upgrade_offering_notification.txt | 19 ++++++++++++++ dbaas/util/email_notifications.py | 26 +++++++++++++++++-- 5 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.html create mode 100644 dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.txt diff --git a/dbaas/api/task.py b/dbaas/api/task.py index 98084fae6..f41620f25 100644 --- a/dbaas/api/task.py +++ b/dbaas/api/task.py @@ -160,6 +160,7 @@ class TaskAPI(viewsets.ReadOnlyModelViewSet): 'maintenance.tasks.restart_database', 'notification.tasks.change_database_persistence', 'maintenance.tasks.task_upgrade_disk_type', + 'maintenance.tasks.auto_upgrade_database_vm_offering', ] model = TaskHistory diff --git a/dbaas/maintenance/task_auto_upgrade_vm_offering.py b/dbaas/maintenance/task_auto_upgrade_vm_offering.py index 33da4fc7c..555bf879d 100644 --- a/dbaas/maintenance/task_auto_upgrade_vm_offering.py +++ b/dbaas/maintenance/task_auto_upgrade_vm_offering.py @@ -6,7 +6,7 @@ from physical.models import (Plan, DatabaseInfra, Instance, Pool) from util.providers import get_auto_upgrade_vm_settings from workflow.workflow import steps_for_instances -from util import get_vm_name +from util import get_vm_name, email_notifications LOG = logging.getLogger(__name__) @@ -75,6 +75,7 @@ def task_auto_upgrade_vm_offering(database, task, retry_from=None, resize_target last_vm_created = number_of_instances_before_task if not retry_from: + email_notifications.upgrade_offering_notification(database, resize_target) for i in range(number_of_instances): instance = None last_vm_created += 1 diff --git a/dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.html b/dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.html new file mode 100644 index 000000000..114aa6365 --- /dev/null +++ b/dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.html @@ -0,0 +1,20 @@ +

Automatic GCP offering upgrade for Database {{ database.name }}

+ +We would like to inform you that an automatic offer update is being carried out on DBaaS to meet your database {{database.name}} needs. +

+The update will be done automatically from {{ current_offering.name }} to {{ future_offering.name }}. +

+Please be aware that this upgrade may increase the costs. +Any questions please contact the DBDevops responsible for your team. +

+ +{% if database.team.email %} +You are receiving this email because in our records you are in team {{ database.team.name }}.
+If this is not right, contact the DBaaS system administrators. +{% else %} +

Team {{ database.team.name }} has no email set!

+{% endif %} +


+Regards,
+DBaaS notification robot
+{{domain}}
diff --git a/dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.txt b/dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.txt new file mode 100644 index 000000000..47bc1238b --- /dev/null +++ b/dbaas/notification/templates/email_extras/auto_upgrade_offering_notification.txt @@ -0,0 +1,19 @@ +Automatic GCP offering upgrade for Database {{ database.name }} + +We would like to inform you that an automatic offer update is being carried out on DBaaS to meet your database {{database.name}} needs. + +The update will be done automatically from {{ current_offering.name }} to {{ future_offering.name }}. + +Please be aware that this upgrade may increase the costs. +Any questions please contact the DBDevops responsible for your team. + +{% if database.team.email %} +You are receiving this email because in our records you are in team {{ database.team.name }}.
+If this is not right, contact the DBaaS system administrators. +{% else %} +Team {{ database.team.name }} has no email set! +{% endif %} + +Regards, +DBaaS notification robot +{{domain}} diff --git a/dbaas/util/email_notifications.py b/dbaas/util/email_notifications.py index 083c8b7e0..afe314e7b 100644 --- a/dbaas/util/email_notifications.py +++ b/dbaas/util/email_notifications.py @@ -157,8 +157,30 @@ def disk_resize_notification(database, new_disk, usage_percentage): ) -def schedule_task_notification(database, scheduled_task, is_new, - is_task_warning=False): +def upgrade_offering_notification(database, resize_target): + LOG.info('Notifying auto upgrade offering to database: {}'.format(database)) + + current_offering = database.databaseinfra.offering + future_offering = database.get_future_offering(resize_target) + + subject = _('[DBaaS] Database {} auto upgrade offering to {}').format(database, future_offering.name) + template = "auto_upgrade_offering_notification" + + context = { + 'domain': get_domain(), + 'database': database, + 'current_offering': current_offering, + 'future_offering': future_offering, + 'database_url': get_database_url(database.id) + } + + send_mail_template( + subject, template, email_from(), email_to(database.team), + fail_silently=False, attachments=None, context=context + ) + + +def schedule_task_notification(database, scheduled_task, is_new, is_task_warning=False): subject_tmpl = '[DBaaS] Automatic Task {} for Database {}' if is_task_warning: From 307e018b9a300b3b8d9d6a26d821ff4afc4ae0ca Mon Sep 17 00:00:00 2001 From: IAGO HENRIQUE Date: Tue, 16 May 2023 16:18:05 -0300 Subject: [PATCH 2/4] Fixed zabbix error: Session terminated with try and exception. --- dbaas/workflow/steps/util/zabbix.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/dbaas/workflow/steps/util/zabbix.py b/dbaas/workflow/steps/util/zabbix.py index 90d73494c..a108f151f 100644 --- a/dbaas/workflow/steps/util/zabbix.py +++ b/dbaas/workflow/steps/util/zabbix.py @@ -334,12 +334,20 @@ def __unicode__(self): def do(self): if not self.is_valid: return - self.zabbix_provider.disable_alarms() + try: + self.zabbix_provider.disable_alarms() + except: + self.provider_write = None + self.zabbix_provider.disable_alarms() def undo(self): if not self.is_valid: return - self.zabbix_provider.enable_alarms() + try: + self.zabbix_provider.enable_alarms() + except: + self.provider_write = None + self.zabbix_provider.enable_alarms() class EnableAlarms(ZabbixStep): @@ -354,11 +362,20 @@ def is_valid(self): def do(self): if not self.is_valid: return - - self.zabbix_provider.enable_alarms() + try: + self.zabbix_provider.enable_alarms() + except: + self.provider_write = None + self.zabbix_provider.enable_alarms() def undo(self): - self.zabbix_provider.disable_alarms() + if not self.is_valid: + return + try: + self.zabbix_provider.disable_alarms() + except: + self.provider_write = None + self.zabbix_provider.disable_alarms() class UpdateMonitoring(ZabbixStep): @@ -422,4 +439,4 @@ def do(self): host_name=self.instance.dns, macro='{$MONGO_SSL}', value=value - ) \ No newline at end of file + ) From ee1b930901b25182f6afc9774fa0daea3e9d1087 Mon Sep 17 00:00:00 2001 From: IAGO HENRIQUE Date: Tue, 16 May 2023 16:20:03 -0300 Subject: [PATCH 3/4] Added comments --- dbaas/workflow/steps/util/zabbix.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbaas/workflow/steps/util/zabbix.py b/dbaas/workflow/steps/util/zabbix.py index a108f151f..5bd94f668 100644 --- a/dbaas/workflow/steps/util/zabbix.py +++ b/dbaas/workflow/steps/util/zabbix.py @@ -334,6 +334,7 @@ def __unicode__(self): def do(self): if not self.is_valid: return + # Try and Exception to fix error Session terminated try: self.zabbix_provider.disable_alarms() except: @@ -343,6 +344,7 @@ def do(self): def undo(self): if not self.is_valid: return + # Try and Exception to fix error Session terminated try: self.zabbix_provider.enable_alarms() except: @@ -362,6 +364,7 @@ def is_valid(self): def do(self): if not self.is_valid: return + # Try and Exception to fix error Session terminated try: self.zabbix_provider.enable_alarms() except: @@ -371,6 +374,7 @@ def do(self): def undo(self): if not self.is_valid: return + # Try and Exception to fix error Session terminated try: self.zabbix_provider.disable_alarms() except: From d3767a75d0e49382ac92a572355c39a048e8c2ec Mon Sep 17 00:00:00 2001 From: Erkmann Date: Wed, 7 Jun 2023 09:25:56 -0300 Subject: [PATCH 4/4] AutoUpgrade - Change master modifications --- dbaas/drivers/base.py | 32 +++++++++++++++++++ dbaas/drivers/mongodb.py | 10 ++++++ dbaas/workflow/steps/util/vm.py | 54 +++++++++++++++++++++------------ 3 files changed, 77 insertions(+), 19 deletions(-) diff --git a/dbaas/drivers/base.py b/dbaas/drivers/base.py index 75f22a5e0..e4a7ffe0a 100644 --- a/dbaas/drivers/base.py +++ b/dbaas/drivers/base.py @@ -253,6 +253,9 @@ def is_replication_ok(self, instance): def switch_master(self, instance=None, preferred_slave_instance=None): raise NotImplementedError() + def switch_master_with_stepdowntime(self, instance=None, preferred_slave_instance=None, stepdown_time=60): + raise NotImplementedError() + def get_database_instances(self, ): driver_name = self.name.upper() instances = [instance @@ -369,6 +372,35 @@ def check_replication_and_switch(self, instance, attempts=100, "Could not switch master because of replication's delay" ) + def check_replication_and_switch_with_stepdown_time(self, instance, attempts=100, + check_is_master_attempts=5, + preferred_slave_instance=None, + stepdown_time=60): + LOG.info("Check Replication with StepDown time of %s seconds", stepdown_time) + from time import sleep + for attempt in range(0, attempts): + if self.is_replication_ok(instance): + self.switch_master_with_stepdowntime(instance, preferred_slave_instance, stepdown_time) + LOG.info("Switch master returned ok...") + + check_is_master_attempts_count = check_is_master_attempts + while self.check_instance_is_master(instance, + default_timeout=False): + if check_is_master_attempts_count == 0: + break + check_is_master_attempts_count -= 1 + sleep(10) + else: + return + + raise Exception("Could not change master") + + LOG.info("Waiting 10s to check replication...") + sleep(10) + raise Exception( + "Could not switch master because of replication's delay" + ) + def get_database_agents(self): """ Returns database agents list""" raise NotImplementedError() diff --git a/dbaas/drivers/mongodb.py b/dbaas/drivers/mongodb.py index e472bf426..ee6e10aee 100644 --- a/dbaas/drivers/mongodb.py +++ b/dbaas/drivers/mongodb.py @@ -546,6 +546,16 @@ def switch_master(self, instance=None, preferred_slave_instance=None): except pymongo.errors.AutoReconnect: pass + def switch_master_with_stepdowntime(self, instance=None, preferred_slave_instance=None, stepdown_time=60): + client = self.get_client(None) + try: + client.admin.command( + 'replSetStepDown', stepdown_time, + secondaryCatchUpPeriodSecs=60 + ) + except pymongo.errors.AutoReconnect: + pass + def get_database_agents(self): return [] diff --git a/dbaas/workflow/steps/util/vm.py b/dbaas/workflow/steps/util/vm.py index 4d25c8525..938781ea4 100644 --- a/dbaas/workflow/steps/util/vm.py +++ b/dbaas/workflow/steps/util/vm.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import logging from time import sleep from dbaas_credentials.models import CredentialType from util import get_credentials_for @@ -7,6 +8,8 @@ CHANGE_MASTER_ATTEMPS = 30 CHANGE_MASTER_SECONDS = 15 +LOG = logging.getLogger(__name__) + class HostStatus(object): @staticmethod @@ -159,33 +162,47 @@ class ChangeMasterTemporaryInstance(ChangeMaster): @property def is_valid(self): - if self.instance.temporary or self.check_master_is_temporary(): + master_temporary = self.check_master_is_temporary() + # so executa para a VM tepmoraria, e se a Master nao eh temporaria + if not self.instance.temporary or master_temporary: return False - return super(ChangeMasterTemporaryInstance, self).is_valid - def check_master_is_temporary(self): + return True + + def check_master_is_temporary(self, wait_seconds=0): + LOG.info("Checking master is temporary instance") + LOG.debug("Willl sleep for %s seconds before checking", wait_seconds) + sleep(wait_seconds) + master = self.driver.get_master_instance() - if master.temporary: - return True - return False + LOG.info("Master instance is %s", master) + LOG.info("Master is temporary? %s", master.temporary) + + if master is None or not master.temporary: + return False + + return True def change_master(self): error = None for _ in range(CHANGE_MASTER_ATTEMPS): - if self.is_slave: - return + error = None try: - self.driver.check_replication_and_switch(self.target_instance) - if not self.check_master_is_temporary(): + LOG.info("Trying to change master. Attempt %s", _) + self.driver.check_replication_and_switch_with_stepdown_time(self.target_instance, stepdown_time=300) + master_is_temporary = self.check_master_is_temporary(wait_seconds=60) + + if not master_is_temporary: raise Exception('Master is not the temporary instance') + + return except Exception as e: error = e sleep(CHANGE_MASTER_SECONDS) - else: - return - raise error + if error is not None: + raise error def do(self): if not self.is_valid: @@ -206,19 +223,18 @@ def change_master(self): error = None for _ in range(CHANGE_MASTER_ATTEMPS): - if self.is_slave: - return try: self.driver.check_replication_and_switch(self.target_instance) - if self.check_master_is_temporary(): + if self.check_master_is_temporary(wait_seconds=60): raise Exception('Master is the temporary instance') + + return except Exception as e: error = e sleep(CHANGE_MASTER_SECONDS) - else: - return - raise error + if error is not None: + raise error class ChangeMasterDatabaseMigrate(ChangeMaster):