From 9c56fafbd1905952fcb1db4e5b96194af8c9095a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 16:24:30 +0100 Subject: [PATCH 1/3] Fix worker threads stalling race condition on RESUME command During a RESUME operation, if a 'MySQL_Thread' is bootstrapping listeners (in `MySQL_Thread::run_BootstrapListener`) and detect that `MySQL_Threads_Handler::bootstrapping_listeners` is 'false', the thread prematurely shuts down its own bootstrapping flag from 'mypolls' (`ProxySQL_Poll::bootstrapping_listeners`). Since this thread wont ever bootstrap its corresponding listening sockets, the other working threads will be stalled waiting on it, eventually triggering the watchdog and crashing the instance. Since the bootstrapping operation is sequential, it's expected that all the threads but the one starting their listening sockets are in an active wait. A sensible delay has been introduced to reduce the overhead of such wait. --- include/ProxySQL_Poll.h | 1 - lib/MySQL_Thread.cpp | 16 +++++----------- lib/ProxySQL_Poll.cpp | 1 - 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/ProxySQL_Poll.h b/include/ProxySQL_Poll.h index 5cfa3d1a48..8fc08063ca 100644 --- a/include/ProxySQL_Poll.h +++ b/include/ProxySQL_Poll.h @@ -34,7 +34,6 @@ class ProxySQL_Poll { MySQL_Data_Stream **myds; unsigned long long *last_recv; unsigned long long *last_sent; - std::atomic bootstrapping_listeners; volatile int pending_listener_add; volatile int pending_listener_del; unsigned int poll_timeout; diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index e903612138..4da5d9750e 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -3303,15 +3303,11 @@ void MySQL_Thread::run_BootstrapListener() { if (n) { poll_listener_add(n); assert(__sync_bool_compare_and_swap(&mypolls.pending_listener_add,n,0)); - } else { - if (GloMTH->bootstrapping_listeners == false) { - // we stop looping - mypolls.bootstrapping_listeners = false; - } } -#ifdef DEBUG - usleep(5+rand()%10); -#endif + // The delay for the active-wait is a fraction of 'poll_timeout'. Since other + // threads may be waiting on poll for further operations, checks are meaningless + // until that timeout expires (other workers make progress). + usleep(std::min(std::max(mysql_thread___poll_timeout/20, 10000), 40000) + (rand() % 2000)); } } @@ -3432,9 +3428,7 @@ void MySQL_Thread::run() { #endif // IDLE_THREADS pthread_mutex_unlock(&thread_mutex); - if (unlikely(mypolls.bootstrapping_listeners == true)) { - run_BootstrapListener(); - } + run_BootstrapListener(); // flush mysql log file GloMyLogger->flush(); diff --git a/lib/ProxySQL_Poll.cpp b/lib/ProxySQL_Poll.cpp index 8876e5f373..f6fa32b246 100644 --- a/lib/ProxySQL_Poll.cpp +++ b/lib/ProxySQL_Poll.cpp @@ -64,7 +64,6 @@ ProxySQL_Poll::ProxySQL_Poll() { len=0; pending_listener_add=0; pending_listener_del=0; - bootstrapping_listeners = true; size=MIN_POLL_LEN; fds=(struct pollfd *)malloc(size*sizeof(struct pollfd)); myds=(MySQL_Data_Stream **)malloc(size*sizeof(MySQL_Data_Stream *)); From 8774b3167547301311e527e8f32efde27ea0aa22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 16:53:28 +0100 Subject: [PATCH 2/3] Prevent busy-waiting in active wait during 'listener_del' Since the operation of stopping each worker thread listeners is performed during 'maintenance_loops', the active wait taking place in 'listener_del' is likely to take some time. A sensible delay has been added to reduce unneccesary load. --- lib/MySQL_Thread.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/MySQL_Thread.cpp b/lib/MySQL_Thread.cpp index 4da5d9750e..70a019af77 100644 --- a/lib/MySQL_Thread.cpp +++ b/lib/MySQL_Thread.cpp @@ -1200,7 +1200,11 @@ int MySQL_Threads_Handler::listener_del(const char *iface) { } for (i=0;imypolls.pending_listener_del,0)); + while(__sync_fetch_and_add(&thr->mypolls.pending_listener_del,0)) { + // Since 'listeners_stop' is performed in 'maintenance_loops' by the + // workers this active-wait is likely to take some time. + usleep(std::min(std::max(mysql_thread___poll_timeout/20, 10000), 40000)); + } } MLM->del(idx); #ifdef SO_REUSEPORT From 321ffe0e8d561da8bbcef8e469d82e2fb1a38e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 22 Nov 2024 20:03:39 +0100 Subject: [PATCH 3/3] Fix potential invalid syntax in TAP test queries This is a follow-up of commit #19c8f8698 --- test/tap/tests/reg_test_4402-mysql_fields-t.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/tap/tests/reg_test_4402-mysql_fields-t.cpp b/test/tap/tests/reg_test_4402-mysql_fields-t.cpp index ff0d3e5869..12dac63f5d 100644 --- a/test/tap/tests/reg_test_4402-mysql_fields-t.cpp +++ b/test/tap/tests/reg_test_4402-mysql_fields-t.cpp @@ -106,7 +106,9 @@ int main(int argc, char** argv) { // to check table alias issue: { - const std::string& query = "SELECT data FROM testdb.dummy_table AS " + generate_random_string(length); + // NOTE: The randomly generated string should be escaped \`\`, otherwise could collide + // with SQL reserved words, causing an invalid test failure. + const std::string& query = "SELECT data FROM testdb.dummy_table AS `" + generate_random_string(length) + "`"; MYSQL_QUERY__(proxysql, query.c_str()); MYSQL_RES* res = mysql_use_result(proxysql);