Skip to content

Commit

Permalink
Merge pull request #796 from globocom/save-snapshot-on-destroy
Browse files Browse the repository at this point in the history
Save snapshot before destroying database, and persist backup on bucket
  • Loading branch information
nimbfire authored Jan 3, 2023
2 parents 49e1e6d + 4e3a1be commit e4ce7ef
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 88 deletions.
152 changes: 85 additions & 67 deletions dbaas/backup/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def make_instance_snapshot_backup(instance, error, group,
provider_class=VolumeProviderSnapshot,
target_volume=None,
current_hour=None,
task=None):
task=None,
persist=0):
LOG.info("Make instance backup for {}".format(instance))
provider = provider_class(instance)
infra = instance.databaseinfra
Expand Down Expand Up @@ -130,16 +131,14 @@ def make_instance_snapshot_backup(instance, error, group,
backup_hour_list = Configuration.get_by_name_as_list(
'make_database_backup_hour'
)
if (snapshot_final_status == Snapshot.WARNING and has_snapshot):
if snapshot_final_status == Snapshot.WARNING and has_snapshot:
if str(current_hour) in backup_hour_list:
raise Exception(
"Backup with WARNING already created today."
)
raise Exception("Backup with WARNING already created today.")
else:
for _ in range(backup_retry_attempts):
try:
response = None
response = provider.take_snapshot()
response = provider.take_snapshot(persist=persist)
break
except IndexError as e:
content, response = e
Expand Down Expand Up @@ -650,92 +649,57 @@ def _check_snapshot_limit(instances, task):
)


def _create_database_backup(instance, task, group, current_hour):
task.add_detail('\n{} - Starting backup for {}...'.format(
strftime("%d/%m/%Y %H:%M:%S"), instance))

error = {}
try:
snapshot = make_instance_snapshot_backup(
instance=instance,
error=error,
group=group,
current_hour=current_hour,
task=task
)
except Exception as e:
task.add_detail('\n{} - Error: {}'.format(strftime("%d/%m/%Y %H:%M:%S"), e))
return False

if 'errormsg' in error:
task.add_detail('\n{} - Error: {}'.format(strftime("%d/%m/%Y %H:%M:%S"), error['errormsg']))
return False

task.add_detail('{} - Backup for {} was successful'.format(
strftime("%d/%m/%Y %H:%M:%S"), instance))

return snapshot


def _get_infras_with_backup_today():
current_time = datetime.now()
current_hour = current_time.hour
infras_with_backup_today = DatabaseInfra.objects.filter(
instances__backup_instance__status=Snapshot.SUCCESS,
backup_hour__lt=current_hour,
plan__has_persistence=True,
instances__backup_instance__purge_at=None,
instances__backup_instance__end_at__year=current_time.year,
instances__backup_instance__end_at__month=current_time.month,
instances__backup_instance__end_at__day=current_time.day).distinct()
return infras_with_backup_today


@app.task(bind=True)
def make_database_backup(self, database, task, automatic, current_hour):
worker_name = get_worker_name()
task_history = TaskHistory.register(
request=self.request, worker_name=worker_name, task_history=task
)

def validate_create_backup(database, task, automatic, current_hour, force=False, persist=0):
LOG.info('Searching for RUNNING backup tasks for database %s', database)
running_tasks = TaskHistory.objects.filter(
task_status=TaskHistory.STATUS_RUNNING,
database_name=database.name,
task_name__contains='make_database_backup'
).exclude(id=task.id)
if running_tasks:
error = 'There is a running make_database_backup task for the same database'
LOG.warning(error)
task.set_status_warning(error, database)
return False
return True

LOG.info('Searching for WAITING backup tasks for database %s', database)
waiting_tasks = TaskHistory.objects.filter(
task_status=TaskHistory.STATUS_WAITING,
database_name=database.name,
task_name__contains='make_database_backup'
).exclude(id=task.id)
if waiting_tasks:
error = 'There is a waiting make_database_backup task for the same database'
LOG.warning(error)
task.set_status_warning(error, database)
return False
return True

if not database.pin_task(task):
LOG.info("Trying to lock database %s", database)
if not database.pin_task(task) and not database.lock.first():
LOG.error('Not able to lock database %s', database)
task.error_in_lock(database)
return False
return True

LOG.info('Searching for SUCCESSFUL today backups for database %s', database)
infras_with_backup_today = _get_infras_with_backup_today()
if database.infra in infras_with_backup_today and automatic:
if database.infra in infras_with_backup_today and automatic and not force:
error = 'There is already a successful backup for this database done today'
LOG.warning(error)
task.set_status_warning(error, database)
return False
return True

task_history.add_detail('{} - Starting database {} backup'.format(
task.add_detail('{} - Starting database {} backup'.format(
strftime("%d/%m/%Y %H:%M:%S"),
database))
LOG.info('Starting database %s backup', database)

LOG.info('Getting instances for database %s', database)
instances = _get_backup_instance(database, task)
if not instances:
task.set_status_error('Could not find eligible instances', database)
return False
error = 'Could not find eligible instances'
LOG.error(error)
task.set_status_error(error, database)
return True

_check_snapshot_limit(instances, task)

Expand All @@ -744,7 +708,7 @@ def make_database_backup(self, database, task, automatic, current_hour):

has_warning = False
for instance in instances:
snapshot = _create_database_backup(instance, task, group, current_hour)
snapshot = _create_database_backup(instance, task, group, current_hour, persist)

if not snapshot:
task.set_status_error(
Expand All @@ -761,14 +725,68 @@ def make_database_backup(self, database, task, automatic, current_hour):
if not has_warning:
has_warning = snapshot.has_warning

return has_warning


def _create_database_backup(instance, task, group, current_hour, persist):
task.add_detail('\n{} - Starting backup for {}...'.format(
strftime("%d/%m/%Y %H:%M:%S"), instance))

error = {}
try:
snapshot = make_instance_snapshot_backup(
instance=instance,
error=error,
group=group,
current_hour=current_hour,
task=task,
persist=persist
)
except Exception as e:
task.add_detail('\n{} - Error: {}'.format(strftime("%d/%m/%Y %H:%M:%S"), e))
return False

if 'errormsg' in error:
task.add_detail('\n{} - Error: {}'.format(strftime("%d/%m/%Y %H:%M:%S"), error['errormsg']))
return False

task.add_detail('{} - Backup for {} was successful'.format(
strftime("%d/%m/%Y %H:%M:%S"), instance))

return snapshot


@app.task(bind=True)
def make_database_backup(self, database, task, automatic, current_hour):
worker_name = get_worker_name()
task_history = TaskHistory.register(
request=self.request, worker_name=worker_name, task_history=task
)

has_warning = validate_create_backup(database, task_history, automatic, current_hour)

if has_warning:
task.set_status_warning('{} - Backup was warning'.format(strftime("%d/%m/%Y %H:%M:%S")), database)
else:
task.set_status_success('{} - Backup was successful'.format(strftime("%d/%m/%Y %H:%M:%S")), database)
task_history.set_status_warning('{} - Backup was warning'.format(strftime("%d/%m/%Y %H:%M:%S")), database)
elif not has_warning and task_history.task_status != TaskHistory.STATUS_ERROR:
task_history.set_status_success('{} - Backup was successful'.format(strftime("%d/%m/%Y %H:%M:%S")), database)

return True


def _get_infras_with_backup_today():
current_time = datetime.now()
current_hour = current_time.hour
infras_with_backup_today = DatabaseInfra.objects.filter(
instances__backup_instance__status=Snapshot.SUCCESS,
backup_hour__lt=current_hour,
plan__has_persistence=True,
instances__backup_instance__purge_at=None,
instances__backup_instance__end_at__year=current_time.year,
instances__backup_instance__end_at__month=current_time.month,
instances__backup_instance__end_at__day=current_time.day).distinct()
return infras_with_backup_today


@app.task(bind=True)
def remove_database_backup(self, task, snapshot):
worker_name = get_worker_name()
Expand Down
13 changes: 11 additions & 2 deletions dbaas/drivers/replication_topologies/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,11 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
)
}]
)}, {
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)}
]

def get_host_migrate_steps(self):
return [{
Expand Down Expand Up @@ -740,6 +743,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down Expand Up @@ -1352,6 +1358,9 @@ def get_deploy_steps(self):
# 'Update Host Disk Size': (
# 'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
# )
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down
9 changes: 9 additions & 0 deletions dbaas/drivers/replication_topologies/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down Expand Up @@ -592,6 +595,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down Expand Up @@ -1666,6 +1672,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down
9 changes: 9 additions & 0 deletions dbaas/drivers/replication_topologies/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down Expand Up @@ -486,6 +489,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down Expand Up @@ -809,6 +815,9 @@ def get_deploy_steps(self):
)}, {
'Update Host Disk Size': (
'workflow.steps.util.host_provider.UpdateHostRootVolumeSize',
),
'Save Snapshot': (
'workflow.steps.util.database.MakeSnapshot',
)
}]

Expand Down
3 changes: 2 additions & 1 deletion dbaas/logical/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@ def pin_task(self, task):
try:
with transaction.atomic():
DatabaseLock(database=self, task=task).save()
except Error:
except Error as e:
LOG.error(e)
return False
else:
return True
Expand Down
14 changes: 1 addition & 13 deletions dbaas/physical/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1992,21 +1992,9 @@ def instance_pre_delete(sender, **kwargs):
instance pre delete
"""

from backup.models import Snapshot
import datetime

# it used to mark purge_at as 'now' to all snapshots of the instance
instance = kwargs.get('instance')

LOG.debug("instance %s pre-delete" % (instance))

snapshots = Snapshot.objects.filter(
instance=instance, purge_at__isnull=True)

for snapshot in snapshots:
LOG.debug("Setting snapshopt %s purge_at time" % (snapshot))
snapshot.purge_at = datetime.datetime.now()
snapshot.save()

LOG.debug("instance pre-delete triggered")


Expand Down
Loading

0 comments on commit e4ce7ef

Please sign in to comment.