Skip to content

Commit

Permalink
Disable monitoring of servers not responding to ping
Browse files Browse the repository at this point in the history
This commit also prevents shunned nodes to come back online if they are missing pings. Related to #1416
Because it reduces the number of checks, it may also be relevant to #1417
  • Loading branch information
renecannao committed Mar 17, 2018
1 parent 64e0eef commit 6141f96
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 24 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ deps/pcre/pcre/
deps/curl/curl/
deps/curl/curl-7.57.0/

# google-coredumper
deps/google-coredumper/google-coredumper/

#protobuf
deps/protobuf/protobuf-2.6.1/

Expand Down
2 changes: 1 addition & 1 deletion include/MySQL_HostGroups_Manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class MySQL_HostGroups_Manager {
void read_only_action(char *hostname, int port, int read_only);
unsigned int get_servers_table_version();
void wait_servers_table_version(unsigned, unsigned);
void shun_and_killall(char *hostname, int port);
bool shun_and_killall(char *hostname, int port);
void set_server_current_latency_us(char *hostname, int port, unsigned int _current_latency_us);
unsigned long long Get_Memory_Stats();

Expand Down
1 change: 1 addition & 0 deletions include/MySQL_Monitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ class MySQL_Monitor {
void * monitor_replication_lag();
void * run();
void populate_monitor_mysql_server_group_replication_log();
bool server_responds_to_ping(char *address, int port);
};

#endif /* __CLASS_MYSQL_MONITOR_H */
14 changes: 11 additions & 3 deletions lib/MySQL_HostGroups_Manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2109,11 +2109,14 @@ void MySQL_HostGroups_Manager::read_only_action(char *hostname, int port, int re
// shun_and_killall
// this function is called only from MySQL_Monitor::monitor_ping()
// it temporary disables a host that is not responding to pings, and mark the host in a way that when used the connection will be dropped
void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) {
// return true if the status was changed
bool MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) {
time_t t = time(NULL);
bool ret = false;
wrlock();
MySrvC *mysrvc=NULL;
for (unsigned int i=0; i<MyHostGroups->len; i++) {
MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i);
for (unsigned int i=0; i<MyHostGroups->len; i++) {
MyHGC *myhgc=(MyHGC *)MyHostGroups->index(i);
unsigned int j;
unsigned int l=myhgc->mysrvs->cnt();
if (l) {
Expand All @@ -2126,6 +2129,9 @@ void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) {
break;
}
case MYSQL_SERVER_STATUS_ONLINE:
if (mysrvc->status == MYSQL_SERVER_STATUS_ONLINE) {
ret = true;
}
mysrvc->status=MYSQL_SERVER_STATUS_SHUNNED;
case MYSQL_SERVER_STATUS_OFFLINE_SOFT:
mysrvc->shunned_automatic=true;
Expand All @@ -2135,11 +2141,13 @@ void MySQL_HostGroups_Manager::shun_and_killall(char *hostname, int port) {
default:
break;
}
mysrvc->time_last_detected_error = t;
}
}
}
}
wrunlock();
return ret;
}

// set_server_current_latency_us
Expand Down
88 changes: 68 additions & 20 deletions lib/MySQL_Monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,12 +1227,16 @@ void * MySQL_Monitor::monitor_connect() {
}
for (std::vector<SQLite3_row *>::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) {
SQLite3_row *r=*it;
MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2]));
mmsd->mondb=monitordb;
WorkItem* item;
item=new WorkItem(mmsd,monitor_connect_thread);
GloMyMon->queue.add(item);
usleep(us);
bool rc_ping = true;
rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1]));
if (rc_ping) { // only if server is responding to pings
MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2]));
mmsd->mondb=monitordb;
WorkItem* item;
item=new WorkItem(mmsd,monitor_connect_thread);
GloMyMon->queue.add(item);
usleep(us);
}
if (GloMyMon->shutdown) return NULL;
}
}
Expand Down Expand Up @@ -1405,8 +1409,11 @@ void * MySQL_Monitor::monitor_ping() {
if (resultset) {
if (resultset->rows_count) {
// disable host
proxy_error("Server %s:%s missed %d heartbeats, shunning it and killing all the connections\n", addresses[j], ports[j], max_failures);
MyHGM->shun_and_killall(addresses[j],atoi(ports[j]));
bool rc_shun = false;
rc_shun = MyHGM->shun_and_killall(addresses[j],atoi(ports[j]));
if (rc_shun) {
proxy_error("Server %s:%s missed %d heartbeats, shunning it and killing all the connections. Disabling other checks until the node comes back online.\n", addresses[j], ports[j], max_failures);
}
}
delete resultset;
resultset=NULL;
Expand Down Expand Up @@ -1504,6 +1511,39 @@ void * MySQL_Monitor::monitor_ping() {
return NULL;
}



bool MySQL_Monitor::server_responds_to_ping(char *address, int port) {
bool ret = true; // default
char *error=NULL;
int cols=0;
int affected_rows=0;
SQLite3_result *resultset=NULL;
char *new_query=NULL;
new_query=(char *)"SELECT 1 FROM (SELECT hostname,port,ping_error FROM mysql_server_ping_log WHERE hostname='%s' AND port=%d ORDER BY time_start_us DESC LIMIT %d) a WHERE ping_error IS NOT NULL AND ping_error NOT LIKE 'Access denied for user%%' GROUP BY hostname,port HAVING COUNT(*)=%d";
char *buff=(char *)malloc(strlen(new_query)+strlen(address)+32);
int max_failures = mysql_thread___monitor_ping_max_failures;
sprintf(buff,new_query,address,port,max_failures,max_failures);
monitordb->execute_statement(buff, &error , &cols , &affected_rows , &resultset);
if (!error) {
if (resultset) {
if (resultset->rows_count) {
ret = false;
}
delete resultset;
resultset=NULL;
}
} else {
proxy_error("Error on %s : %s\n", buff, error);
}
if (resultset) {
delete resultset;
resultset=NULL;
}
free(buff);
return ret;
}

void * MySQL_Monitor::monitor_read_only() {
// initialize the MySQL Thread (note: this is not a real thread, just the structures associated with it)
unsigned int MySQL_Monitor__thread_MySQL_Thread_Variables_version;
Expand Down Expand Up @@ -1554,12 +1594,16 @@ void * MySQL_Monitor::monitor_read_only() {
}
for (std::vector<SQLite3_row *>::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) {
SQLite3_row *r=*it;
MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2]));
mmsd->mondb=monitordb;
WorkItem* item;
item=new WorkItem(mmsd,monitor_read_only_thread);
GloMyMon->queue.add(item);
usleep(us);
bool rc_ping = true;
rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1]));
if (rc_ping) { // only if server is responding to pings
MySQL_Monitor_State_Data *mmsd=new MySQL_Monitor_State_Data(r->fields[0],atoi(r->fields[1]), NULL, atoi(r->fields[2]));
mmsd->mondb=monitordb;
WorkItem* item;
item=new WorkItem(mmsd,monitor_read_only_thread);
GloMyMon->queue.add(item);
usleep(us);
}
if (GloMyMon->shutdown) return NULL;
}
}
Expand Down Expand Up @@ -1790,12 +1834,16 @@ void * MySQL_Monitor::monitor_replication_lag() {
}
for (std::vector<SQLite3_row *>::iterator it = resultset->rows.begin() ; it != resultset->rows.end(); ++it) {
SQLite3_row *r=*it;
MySQL_Monitor_State_Data *mmsd = new MySQL_Monitor_State_Data(r->fields[1], atoi(r->fields[2]), NULL, atoi(r->fields[4]), atoi(r->fields[0]));
mmsd->mondb=monitordb;
WorkItem* item;
item=new WorkItem(mmsd,monitor_replication_lag_thread);
GloMyMon->queue.add(item);
usleep(us);
bool rc_ping = true;
rc_ping = server_responds_to_ping(r->fields[0],atoi(r->fields[1]));
if (rc_ping) { // only if server is responding to pings
MySQL_Monitor_State_Data *mmsd = new MySQL_Monitor_State_Data(r->fields[1], atoi(r->fields[2]), NULL, atoi(r->fields[4]), atoi(r->fields[0]));
mmsd->mondb=monitordb;
WorkItem* item;
item=new WorkItem(mmsd,monitor_replication_lag_thread);
GloMyMon->queue.add(item);
usleep(us);
}
if (GloMyMon->shutdown) return NULL;
}
}
Expand Down

0 comments on commit 6141f96

Please sign in to comment.