Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

BREAKING CHANGE: fix behavior when backup period less than 1day #724

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 52 additions & 64 deletions src/meta/meta_backup_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,55 +647,16 @@ void policy_context::continue_current_backup_unlocked()
bool policy_context::should_start_backup_unlocked()
{
uint64_t now = dsn_now_ms();
uint64_t recent_backup_start_time_ms = 0;
if (!_backup_history.empty()) {
recent_backup_start_time_ms = _backup_history.rbegin()->second.start_time_ms;
}

// the true start time of recent backup have drifted away with the origin start time of
// policy,
// so we should take the drift into consideration; if user change the start time of the
// policy,
// we just think the change of start time as drift
int32_t hour = 0, min = 0, sec = 0;
if (recent_backup_start_time_ms == 0) {
// the first time to backup, just consider the start time
::dsn::utils::time_ms_to_date_time(now, hour, min, sec);
if (_backup_history.empty()) {
// The first time to backup, just consider the start time
int32_t hour = 0, min = 0, sec = 0;
dsn::utils::time_ms_to_date_time(now, hour, min, sec);
return _policy.start_time.should_start_backup(hour, min);
} else {
uint64_t next_backup_time_ms =
recent_backup_start_time_ms + _policy.backup_interval_seconds * 1000;
if (_policy.start_time.hour != 24) {
// user have specify the time point to start backup, so we should take the the
// time-drift into consideration

// compute the time-drift
::dsn::utils::time_ms_to_date_time(recent_backup_start_time_ms, hour, min, sec);
int64_t time_dirft_ms = _policy.start_time.compute_time_drift_ms(hour, min);

if (time_dirft_ms >= 0) {
// hour:min(the true start time) >= policy.start_time :
// 1, user move up the start time of policy, such as 20:00 to 2:00, we just
// think this case as time drift
// 2, the true start time of backup is delayed, compared the origin start time
// of policy, we should process this case
// 3, the true start time of backup is the same with the origin start time of
// policy
next_backup_time_ms -= time_dirft_ms;
} else {
// hour:min(the true start time) < policy.start_time:
// 1, user delay the start time of policy, such as 2:00 to 23:00
//
// these case has already been handled, we do nothing
}
}
if (next_backup_time_ms <= now) {
::dsn::utils::time_ms_to_date_time(now, hour, min, sec);
return _policy.start_time.should_start_backup(hour, min);
} else {
return false;
}
}
uint64_t recent_backup_start_time_ms = _backup_history.rbegin()->second.start_time_ms;
uint64_t next_backup_time_ms =
recent_backup_start_time_ms + _policy.backup_interval_seconds * 1000;
return now >= next_backup_time_ms;
}

void policy_context::issue_new_backup_unlocked()
Expand All @@ -716,17 +677,17 @@ void policy_context::issue_new_backup_unlocked()
}

if (!should_start_backup_unlocked()) {
ddebug_f("{}: it's not the right time to start backup now, retry to issue new backup 1 "
zhangyifan27 marked this conversation as resolved.
Show resolved Hide resolved
"second later.",
_policy.policy_name);
tasking::enqueue(LPC_DEFAULT_CALLBACK,
&_tracker,
[this]() {
zauto_lock l(_lock);
issue_new_backup_unlocked();
},
0,
_backup_service->backup_option().issue_backup_interval_ms);
ddebug("%s: start issue new backup %" PRId64 "ms later",
_policy.policy_name.c_str(),
_backup_service->backup_option().issue_backup_interval_ms.count());
std::chrono::seconds(1));
return;
}

Expand Down Expand Up @@ -835,6 +796,12 @@ std::vector<backup_info> policy_context::get_backup_infos(int cnt)
return ret;
}

bool policy_context::has_backup_history()
{
zauto_lock l(_lock);
return _backup_history.size() > 0;
zhangyifan27 marked this conversation as resolved.
Show resolved Hide resolved
}

bool policy_context::is_under_backuping()
{
zauto_lock l(_lock);
Expand Down Expand Up @@ -1254,11 +1221,20 @@ void backup_service::add_backup_policy(dsn::message_ex *msg)
std::shared_ptr<policy_context> policy_context_ptr = _factory(this);
dassert(policy_context_ptr != nullptr, "invalid policy_context");
policy p;
if (!p.start_time.parse_from(request.start_time)) {
derror_f("invalid start time: {}, policy {} shouldn't be added.",
request.start_time,
request.policy_name);
response.err = ERR_INVALID_PARAMETERS;
response.hint_message = "invalid start time: " + request.start_time;
_meta_svc->reply_data(msg, response);
msg->release_ref();
neverchanje marked this conversation as resolved.
Show resolved Hide resolved
return;
}
p.policy_name = request.policy_name;
p.backup_provider_type = request.backup_provider_type;
p.backup_interval_seconds = request.backup_interval_seconds;
p.backup_history_count_to_keep = request.backup_history_count_to_keep;
p.start_time.parse_from(request.start_time);
p.app_ids = app_ids;
p.app_names = app_names;
policy_context_ptr->set_policy(std::move(p));
Expand Down Expand Up @@ -1433,14 +1409,13 @@ void backup_service::modify_backup_policy(configuration_modify_backup_policy_rpc
auto iter = _policy_states.find(request.policy_name);
if (iter == _policy_states.end()) {
response.err = ERR_INVALID_PARAMETERS;
response.hint_message = fmt::format("couldn't find policy {}.", request.policy_name);
context_ptr = nullptr;
} else {
context_ptr = iter->second;
return;
}
context_ptr = iter->second;
}
if (context_ptr == nullptr) {
return;
}

policy cur_policy = context_ptr->get_policy();

bool is_under_backup = context_ptr->is_under_backuping();
Expand Down Expand Up @@ -1540,15 +1515,28 @@ void backup_service::modify_backup_policy(configuration_modify_backup_policy_rpc
}

if (request.__isset.start_time) {
if (context_ptr->has_backup_history() || context_ptr->is_under_backuping()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding context_ptr->has_backup_history() condition, it means if the policy has already generated the backup, its start time can not be modified. Could you please explain why it should not be modified to me?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because start_time only indicates the first time to start backup, if we have already started a backup, we shouldn't modify it.

Copy link
Contributor

@hycdong hycdong Jan 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For once backup, start_time indicates the first time to start backup, for periodical backup, I think it should be allowed to be updated, it indicates this round time to start backup, it is controlled by last start time and backup interval. Making start_time allowed to be updated, it will make function flexible, I recommend remain it if it won't make code too hard to understand.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The start time of subsequent backups is only related to last_backup_start_time and backup_interval_secends, if users want to modify next backup time, they could modify backup_interval_secends.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you almost persuade me. Let me take an example to explain, there were a policy whose start time is 1:00, interval time is 1 day, if we would like update it to backup everyday 2:00. In this case, we wouldn't update interval time, if we don't provide updating start_time, we can only create another policy or update interval twice.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let‘s imagine how would user use this feature, maybe it's almost like how would you set you clock, there is no such a configurable "interval" concept, the interval should always be fixed value like day(or week, month, etc).
If user want to backup multiple times in one day, she should set multiple policies. On the other hand, I believe backup multiple times a day isn't a regular requirement, we don't need to make our design and code too complex to satisfy it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the design and code is complex. start_time is the first time to start a backup of a policy, backup_interval_seconds is interval between two backups, which is quite easy to understand. If users want to modify start_time, they should create another policy.

If we all agree that we should keep backup_interval_seconds, then any interval could be set, we shouldn't only support the fixed interval(day/week/month).

response.err = ERR_INVALID_PARAMETERS;
response.hint_message =
fmt::format("backup of policy {} has already started, shouldn't modify start_time.",
cur_policy.policy_name);
return;
}
backup_start_time t_start_time;
if (t_start_time.parse_from(request.start_time)) {
ddebug("%s: policy change start_time from (%s) to (%s)",
cur_policy.policy_name.c_str(),
cur_policy.start_time.to_string().c_str(),
t_start_time.to_string().c_str());
cur_policy.start_time = t_start_time;
have_modify_policy = true;
if (!t_start_time.parse_from(request.start_time)) {
response.err = ERR_INVALID_PARAMETERS;
response.hint_message =
fmt::format("invalid start_time: {}, policy {} shouldn't be modified.",
request.start_time,
cur_policy.policy_name);
return;
}
ddebug_f("{}: policy change start_time from {} to {}",
cur_policy.policy_name,
cur_policy.start_time.to_string(),
t_start_time.to_string());
cur_policy.start_time = t_start_time;
have_modify_policy = true;
}

if (have_modify_policy) {
Expand Down
65 changes: 13 additions & 52 deletions src/meta/meta_backup_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,10 @@ struct backup_info
backup_id, start_time_ms, end_time_ms, app_ids, app_names, info_status)
};

// Attention: backup_start_time == 24:00 is represent no limit for start_time, 24:00 is mainly saved
// for testing
//
// current, we don't support accurating to minute, only support accurating to hour, so
// we just set minute to 0
struct backup_start_time
{
int32_t hour; // [0 ~24)
int32_t minute; // [0 ~ 60)
int32_t hour;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to change the type of hour and minute to uint8_t

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dsn::utils::time_ms_to_date_time use int32_t for hour, min, sec, so we also use int32_t here.
Maybe we should consider use uint8_t to replace int32_t in dsn::utils::time_ms_to_date_time, if not , even if we changed interger type here, type conversions would still be needed.

int32_t minute;
backup_start_time() : hour(0), minute(0) {}
backup_start_time(int32_t h, int32_t m) : hour(h), minute(m) {}
std::string to_string() const
Expand All @@ -97,58 +92,23 @@ struct backup_start_time
<< std::setfill('0') << std::to_string(minute);
return ss.str();
}
// NOTICE: this function will modify hour and minute, if time is invalid, this func will set
// hour = 24, minute = 0

// If start_time is valid, return true.
bool parse_from(const std::string &time)
{
if (::sscanf(time.c_str(), "%d:%d", &hour, &minute) != 2) {
if (sscanf(time.c_str(), "%d:%d", &hour, &minute) != 2) {
return false;
} else {
if (hour > 24) {
hour = 24;
minute = 0;
return false;
}

if (hour == 24 && minute != 0) {
minute = 0;
return false;
}

if (minute >= 60) {
hour = 24;
minute = 0;
return false;
}
}
return true;
}

// return the interval between new_hour:new_min and start_time,
// namely new_hour:new_min - start_time;
// unit is ms
int64_t compute_time_drift_ms(int32_t new_hour, int32_t new_min)
{
int64_t res = 0;
// unit is hour
res += (new_hour - hour);
// unit is minute
res *= 60;
res += (new_min - minute);
// unit is ms
return (res * 60 * 1000);
return (hour >= 0 && hour < 24 && minute >= 0 && minute < 60);
}

// judge whether we should start backup base current time
bool should_start_backup(int32_t cur_hour, int32_t cur_min)
{
if (hour == 24) {
// erase the restrict of backup_start_time, just for testing
if (hour == 24 && minute == 0) {
// only for tests
Comment on lines +107 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is too tricky. I don't want any test code appears in such way.

Try this: https://github.com/XiaoMi/rdsn/blob/master/include/dsn/utility/fail_point.h

FAIL_POINT_INJECT_F(should_start_backup, []() -> bool {
  return true;
});

When you in test calling fail::cfg("should_start_backup", return()), and this func will return true.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we didn't call should_start_backup directly in tests, we called issue_new_backup_unlocked and expected starting a new backup in policy_context_test.test_app_dropped_during_backup.

return true;
}
// NOTICE : if you want more precisely, you can use cur_min to implement
// now, we just ignore
return (cur_hour == hour);
return (cur_hour == hour) && (cur_min == minute);
hycdong marked this conversation as resolved.
Show resolved Hide resolved
}
DEFINE_JSON_SERIALIZATION(hour, minute)
};
Expand All @@ -175,10 +135,10 @@ class policy : public policy_info
backup_start_time start_time;
policy()
: app_ids(),
backup_interval_seconds(0),
backup_history_count_to_keep(6),
backup_interval_seconds(-1),
backup_history_count_to_keep(3),
is_disable(false),
start_time(24, 0) // default is 24:00, namely no limit
start_time(0, 0)
{
}

Expand Down Expand Up @@ -234,6 +194,7 @@ class policy_context
void add_backup_history(const backup_info &info);
std::vector<backup_info> get_backup_infos(int cnt);
bool is_under_backuping();
bool has_backup_history();
mock_virtual void start();
// function above will called be others, before call these function, should lock the _lock of
// policy_context, otherwise maybe lead deadlock
Expand Down
Loading