From 3b973523c7c265857cfc286f1a9068137d8a3774 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Thu, 17 Aug 2023 22:38:29 +0000 Subject: [PATCH 01/15] Extended MySQL query generator to support stored procedures --- src/replica/DatabaseMySQLGenerator.cc | 11 +++++++++++ src/replica/DatabaseMySQLGenerator.h | 26 ++++++++++++++++++++++++++ src/replica/testQueryGenerator.cc | 6 +++++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/replica/DatabaseMySQLGenerator.cc b/src/replica/DatabaseMySQLGenerator.cc index 6e8cffa7df..54aa89cb12 100644 --- a/src/replica/DatabaseMySQLGenerator.cc +++ b/src/replica/DatabaseMySQLGenerator.cc @@ -58,6 +58,8 @@ Sql Sql::TIMESTAMPDIFF(string const& resolution, SqlId const& lhs, SqlId const& return Sql("TIMESTAMPDIFF(" + resolution + "," + lhs.str + "," + rhs.str + ")"); } +Sql Sql::QSERV_MANAGER(DoNotProcess const& sqlVal) { return Sql("QSERV_MANAGER(" + sqlVal.str + ")"); } + QueryGenerator::QueryGenerator(shared_ptr conn) : _conn(conn) {} string QueryGenerator::escape(string const& str) const { return _conn == nullptr ? str : _conn->escape(str); } @@ -124,6 +126,15 @@ string QueryGenerator::showVars(SqlVarScope scope, string const& pattern) const throwOnInvalidScope(__func__, scope); } +string QueryGenerator::call(DoNotProcess const& packedProcAndArgs) const { + if (packedProcAndArgs.str.empty()) { + string const msg = "QueryGenerator::" + string(__func__) + + " the packed procedure and its arguments can not be empty."; + throw invalid_argument(msg); + } + return "CALL " + packedProcAndArgs.str; +} + string QueryGenerator::_setVars(SqlVarScope scope, string const& packedVars) const { if (packedVars.empty()) { string const msg = "QueryGenerator::" + string(__func__) + diff --git a/src/replica/DatabaseMySQLGenerator.h b/src/replica/DatabaseMySQLGenerator.h index 2d321d2dc9..30bbee33e2 100644 --- a/src/replica/DatabaseMySQLGenerator.h +++ b/src/replica/DatabaseMySQLGenerator.h @@ -100,6 +100,12 @@ class Sql : public DoNotProcess { /// @param lhs Preprocessed identifier of the left column to be selected. /// @param rhs Preprocessed identifier of the left column to be selected. static Sql TIMESTAMPDIFF(std::string const& resolution, SqlId const& lhs, SqlId const& rhs); + + /// @param sqlVal A value of the required parameter of the procedure. The value is required to + /// be preprocessed. + /// @return an object representing the procedure "QSERV_MANAGER()" + static Sql QSERV_MANAGER(DoNotProcess const& sqlVal); + /// @param str_ the input string explicit Sql(std::string const& str_) : DoNotProcess(str_) {} @@ -263,6 +269,8 @@ class QueryGenerator { return Sql::TIMESTAMPDIFF(resolution, id(lhs), id(rhs)); } + Sql QSERV_MANAGER(std::string const& v) const { return Sql::QSERV_MANAGER(val(v)); } + // Generator: [cond1 [AND cond2 [...]]] /// The end of variadic recursion @@ -1051,6 +1059,24 @@ class QueryGenerator { return _setVars(scope, packPairs(Fargs...)); } + /** + * @brief Generator for calling stored procedures. + * + * For the following sample input: + * @code + * call(QSERV_MANAGER("abc")); + * @endcode + * The generator will produce this statement: + * @code + * CALL QSERV_MANAGER('abc') + * @code + * + * @param packedProcAndArgs The well-formed SQL for the procedure and its parameters + * @return Well-formed SQL statement. + * @throws std::invalid_argument If the input parameter is empty. + */ + std::string call(DoNotProcess const& packedProcAndArgs) const; + private: /// @return A string that's ready to be included into the queries. template diff --git a/src/replica/testQueryGenerator.cc b/src/replica/testQueryGenerator.cc index 7271892901..fcaae82aff 100644 --- a/src/replica/testQueryGenerator.cc +++ b/src/replica/testQueryGenerator.cc @@ -134,6 +134,8 @@ BOOST_AUTO_TEST_CASE(QueryGeneratorTest) { {"TIMESTAMPDIFF(SECOND,`submitted`,NOW())", g.TIMESTAMPDIFF("SECOND", "submitted", Sql::NOW).str}, {"TIMESTAMPDIFF(SECOND,`table`.`submitted`,`table`.`completed`)", g.TIMESTAMPDIFF("SECOND", g.id("table", "submitted"), g.id("table", "completed")).str}, + {"QSERV_MANAGER('abc')", Sql::QSERV_MANAGER(g.val("abc")).str}, + {"QSERV_MANAGER('abc')", g.QSERV_MANAGER("abc").str}, // Values {"1", g.val(true).str}, @@ -423,7 +425,9 @@ BOOST_AUTO_TEST_CASE(QueryGeneratorTest) { {"SET `var1`=1", g.setVars(SqlVarScope::SESSION, make_pair("var1", 1))}, {"SET GLOBAL `var2`=2,`var3`='abc'", - g.setVars(SqlVarScope::GLOBAL, make_pair("var2", 2), make_pair("var3", "abc"))}}; + g.setVars(SqlVarScope::GLOBAL, make_pair("var2", 2), make_pair("var3", "abc"))}, + + {"CALL QSERV_MANAGER('abc')", g.call(g.QSERV_MANAGER("abc"))}}; for (auto&& test : tests) { BOOST_CHECK_EQUAL(test.first, test.second); From 25755324027aa7b0790fcd551d7800d8fde88b55 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Fri, 18 Aug 2023 01:44:18 +0000 Subject: [PATCH 02/15] Relaxed syntax requirements for the connections strings The new implementation allows not to provide database names in the connection strings. The connection string parser was extended with an additional parameter allowing to privide the default database name if the one is found missing in the input connection string. --- src/replica/DatabaseMySQLTypes.cc | 13 +++--- src/replica/DatabaseMySQLTypes.h | 14 +++++-- src/replica/testConnectionParams.cc | 63 ++++++++++++++++++++--------- 3 files changed, 59 insertions(+), 31 deletions(-) diff --git a/src/replica/DatabaseMySQLTypes.cc b/src/replica/DatabaseMySQLTypes.cc index a333f498a9..359c174762 100644 --- a/src/replica/DatabaseMySQLTypes.cc +++ b/src/replica/DatabaseMySQLTypes.cc @@ -58,10 +58,12 @@ ConnectionParams::ConnectionParams(string const& host_, uint16_t port_, string c ConnectionParams ConnectionParams::parse(string const& params, string const& defaultHost, uint16_t defaultPort, string const& defaultUser, - string const& defaultPassword) { + string const& defaultPassword, string const& defaultDatabase) { string const context = "ConnectionParams::" + string(__func__) + " "; - regex re("^[ ]*mysql://([^:]+)?(:([^:]?.*[^@]?))?@([^:^/]+)?(:([0-9]+))?(/([^ ]+))[ ]*$", + // Further details on the syntax of the connection strings can be found at + // the declaration section of the method ConnectionParams::parse. + regex re("^[ ]*mysql://([^:]+)?(:([^:]?.*[^@]?))?@([^:^/]+)?(:([0-9]+))?(/([^ ]*))?[ ]*$", regex::extended); smatch match; @@ -86,11 +88,8 @@ ConnectionParams ConnectionParams::parse(string const& params, string const& def string const port = match[6].str(); connectionParams.port = port.empty() ? defaultPort : (uint16_t)stoul(port); - // no default option for the database - connectionParams.database = match[8].str(); - if (connectionParams.database.empty()) { - throw invalid_argument(context + "database name not found in the encoded parameters string"); - } + string const database = match[8].str(); + connectionParams.database = database.empty() ? defaultDatabase : database; LOGS(_log, LOG_LVL_DEBUG, context << connectionParams); diff --git a/src/replica/DatabaseMySQLTypes.h b/src/replica/DatabaseMySQLTypes.h index aeb838d42e..7ef879553a 100644 --- a/src/replica/DatabaseMySQLTypes.h +++ b/src/replica/DatabaseMySQLTypes.h @@ -56,24 +56,30 @@ class ConnectionParams { * by values of parameters found in the input encoded string. The string is * expected to have the following syntax: * @code - * mysql://[user][:password]@[host][:port]/database + * mysql://[user][:password]@[host][:port][/database] + * @code + * The minimal (though, totally useless) URI would be: + * @code + * mysql://@ * @code * * @note - * 1) all (but the database) keywords are mandatory - * 2) default values for other parameters (if missing in the string) will be assumed. + * 1) all attributes are optional + * 2) default values for the missing attributes will be assumed * * @param params connection parameters packed into a string * @param defaultHost default value for a host name * @param defaultPort default port number * @param defaultUser default value for a database user account * @param defaultPassword default value for a database user account + * @param defaultDatabase default value for the database name * @throw std::invalid_argument if the string can't be parsed */ static ConnectionParams parse(std::string const& params, std::string const& defaultHost = "localhost", uint16_t defaultPort = 3306, std::string const& defaultUser = FileUtils::getEffectiveUser(), - std::string const& defaultPassword = std::string()); + std::string const& defaultPassword = std::string(), + std::string const& defaultDatabase = std::string()); /// Initialize connection parameters with default values ConnectionParams(); diff --git a/src/replica/testConnectionParams.cc b/src/replica/testConnectionParams.cc index a19a2a3d15..5432e1ee27 100644 --- a/src/replica/testConnectionParams.cc +++ b/src/replica/testConnectionParams.cc @@ -77,23 +77,24 @@ BOOST_AUTO_TEST_CASE(ConnectionParamsTest) { assigned = normallyConstructed; BOOST_CHECK(assigned == normallyConstructed); - // The minimal connection string. Only the name of a database is - // required. The rest is filled with the default values passed into - // the constructor. These default values will be used through the rest + // The minimal connection string in which all fields but the general structure of + // the URI are missing. The missing fileds will be filled with the default values passed + // into the constructor. These default values will be used through the rest // of the parser tests. string const defaultHost = "Host-A"; uint16_t const defaultPort = 23306; string const defaultUser = "qserv"; string const defaultPassword = "CHANGEME"; + string const defaultDatabase = "test"; database::mysql::ConnectionParams parsed; BOOST_REQUIRE_NO_THROW({ - string const conn = "mysql://@/test"; + string const conn = "mysql://@"; LOGS_INFO("ConnectionParamsTest input: '" << conn << "'"); try { parsed = database::mysql::ConnectionParams::parse(conn, defaultHost, defaultPort, defaultUser, - defaultPassword); + defaultPassword, defaultDatabase); } catch (exception const& ex) { LOGS_INFO("ConnectionParamsTest unexpected exception: " << ex.what()); throw; @@ -104,12 +105,12 @@ BOOST_AUTO_TEST_CASE(ConnectionParamsTest) { BOOST_CHECK(parsed.port == defaultPort); BOOST_CHECK(parsed.user == defaultUser); BOOST_CHECK(parsed.password == defaultPassword); - BOOST_CHECK(parsed.database == "test"); + BOOST_CHECK(parsed.database == defaultDatabase); BOOST_CHECK(parsed.toString() == "mysql://" + defaultUser + ":xxxxxx@" + defaultHost + ":" + - to_string(defaultPort) + "/test"); + to_string(defaultPort) + "/" + defaultDatabase); BOOST_CHECK(parsed.toString(showPassword) == "mysql://" + defaultUser + ":" + defaultPassword + "@" + defaultHost + ":" + to_string(defaultPort) + - "/test"); + "/" + defaultDatabase); }); // Similar to the previous one except spaces added at both ends of @@ -117,11 +118,11 @@ BOOST_AUTO_TEST_CASE(ConnectionParamsTest) { database::mysql::ConnectionParams equallyParsed; BOOST_REQUIRE_NO_THROW({ - string const conn = " mysql://@/test "; + string const conn = " mysql://@/ "; LOGS_INFO("ConnectionParamsTest input: '" << conn << "'"); try { - equallyParsed = database::mysql::ConnectionParams::parse(conn, defaultHost, defaultPort, - defaultUser, defaultPassword); + equallyParsed = database::mysql::ConnectionParams::parse( + conn, defaultHost, defaultPort, defaultUser, defaultPassword, defaultDatabase); } catch (exception const& ex) { LOGS_INFO("ConnectionParamsTest unexpected exception: " << ex.what()); throw; @@ -132,12 +133,12 @@ BOOST_AUTO_TEST_CASE(ConnectionParamsTest) { BOOST_CHECK(equallyParsed.port == defaultPort); BOOST_CHECK(equallyParsed.user == defaultUser); BOOST_CHECK(equallyParsed.password == defaultPassword); - BOOST_CHECK(equallyParsed.database == "test"); + BOOST_CHECK(equallyParsed.database == defaultDatabase); BOOST_CHECK(equallyParsed.toString() == "mysql://" + defaultUser + ":xxxxxx@" + defaultHost + ":" + - to_string(defaultPort) + "/test"); - BOOST_CHECK(equallyParsed.toString(showPassword) == "mysql://" + defaultUser + ":" + defaultPassword + - "@" + defaultHost + ":" + - to_string(defaultPort) + "/test"); + to_string(defaultPort) + "/" + defaultDatabase); + BOOST_CHECK(equallyParsed.toString(showPassword) == + "mysql://" + defaultUser + ":" + defaultPassword + "@" + defaultHost + ":" + + to_string(defaultPort) + "/" + defaultDatabase); }); BOOST_CHECK(equallyParsed == parsed); @@ -236,6 +237,29 @@ BOOST_AUTO_TEST_CASE(ConnectionParamsTest) { BOOST_CHECK(parsed.toString(showPassword) == "mysql://" + defaultUser + ":" + defaultPassword + "@Host-B:13306/test"); }); + BOOST_REQUIRE_NO_THROW({ + // Missing database + string const conn = " mysql://qsreplica:CHANGEMETOO@Host-B:13306/ "; + LOGS_INFO("ConnectionParamsTest input: '" << conn << "'"); + database::mysql::ConnectionParams parsed; + try { + parsed = database::mysql::ConnectionParams::parse(conn, defaultHost, defaultPort, defaultUser, + defaultPassword, defaultDatabase); + } catch (exception const& ex) { + LOGS_INFO("ConnectionParamsTest unexpected exception: " << ex.what()); + throw; + } + LOGS_INFO("ConnectionParamsTest parsed: '" << parsed.toString() << "'"); + LOGS_INFO("ConnectionParamsTest parsed: '" << parsed.toString(showPassword) << "'"); + BOOST_CHECK(parsed.host == "Host-B"); + BOOST_CHECK(parsed.port == 13306); + BOOST_CHECK(parsed.user == "qsreplica"); + BOOST_CHECK(parsed.password == "CHANGEMETOO"); + BOOST_CHECK(parsed.database == defaultDatabase); + BOOST_CHECK(parsed.toString() == "mysql://qsreplica:xxxxxx@Host-B:13306/" + defaultDatabase); + BOOST_CHECK(parsed.toString(showPassword) == + "mysql://qsreplica:CHANGEMETOO@Host-B:13306/" + defaultDatabase); + }); // Parsing a connection string with all components provided @@ -261,14 +285,13 @@ BOOST_AUTO_TEST_CASE(ConnectionParamsTest) { BOOST_CHECK(parsed.toString(showPassword) == "mysql://qsreplica:CHANGEMETOO@Host-B:13306/test"); }); - // Test exception throwing if the database name is missing in - // a connection string. Note that exceptions are intercepted and - // thrown again to improve the error reporting (what causes + // Test exception throwing if '@' is missing in a connection string. Note that + // exceptions are intercepted and thrown again to improve the error reporting (what causes // the exceptions). BOOST_CHECK_THROW( { - string const conn = "mysql://@"; + string const conn = "mysql://"; LOGS_INFO("ConnectionParamsTest input: '" << conn << "'"); database::mysql::ConnectionParams parsed; try { From b898453ada5504609570d40929d606d1c48731ab Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Thu, 17 Aug 2023 22:39:37 +0000 Subject: [PATCH 03/15] Added configuration parameter (url) for the MySQL proxy The change was made to the Configuraton servive of the Replication/Ingest system. --- src/replica/Configuration.cc | 19 +++++++++++++++++++ src/replica/Configuration.h | 20 ++++++++++++++++++++ src/replica/testConfiguration.cc | 2 ++ 3 files changed, 41 insertions(+) diff --git a/src/replica/Configuration.cc b/src/replica/Configuration.cc index 34e5bdb6ee..3e8a17fc81 100644 --- a/src/replica/Configuration.cc +++ b/src/replica/Configuration.cc @@ -72,6 +72,7 @@ unsigned int Configuration::_databaseTransactionTimeoutSec = 3600; bool Configuration::_schemaUpgradeWait = true; unsigned int Configuration::_schemaUpgradeWaitTimeoutSec = 3600; string Configuration::_qservCzarDbUrl = "mysql://qsmaster@localhost:3306/qservMeta"; +string Configuration::_qservCzarProxyUrl = "mysql://qsmaster@localhost:4040/"; string Configuration::_qservWorkerDbUrl = "mysql://qsmaster@localhost:3306/qservw_worker"; replica::Mutex Configuration::_classMtx; @@ -97,6 +98,24 @@ database::mysql::ConnectionParams Configuration::qservCzarDbParams(string const& return connectionParams(_qservCzarDbUrl, database); } +void Configuration::setQservCzarProxyUrl(string const& url) { + if (url.empty()) { + throw invalid_argument("Configuration::" + string(__func__) + " empty string is not allowed."); + } + replica::Lock const lock(_classMtx, _context(__func__)); + _qservCzarProxyUrl = url; +} + +string Configuration::qservCzarProxyUrl() { + replica::Lock const lock(_classMtx, _context(__func__)); + return _qservCzarProxyUrl; +} + +database::mysql::ConnectionParams Configuration::qservCzarProxyParams(string const& database) { + replica::Lock const lock(_classMtx, _context(__func__)); + return connectionParams(_qservCzarProxyUrl, database); +} + void Configuration::setQservWorkerDbUrl(string const& url) { if (url.empty()) { throw invalid_argument("Configuration::" + string(__func__) + " empty string is not allowed."); diff --git a/src/replica/Configuration.h b/src/replica/Configuration.h index 2a02189b8a..f85b48ccd0 100644 --- a/src/replica/Configuration.h +++ b/src/replica/Configuration.h @@ -148,6 +148,25 @@ class Configuration { /// @param url A connection string for accessing Qserv czar's database. static void setQservCzarDbUrl(std::string const& url); + /** + * Return a connection object for the czar's MySQL proxy service with the name of + * a database optionally rewritten from the one stored in the corresponding URL. + * This is done for the sake of convenience of clients to ensure a specific + * database is set as the default context. + * @param database The optional name of a database to assume if a non-empty + * string was provided. + * @return The parsed connection object with the name of the database optionally + * overwritten. + */ + static database::mysql::ConnectionParams qservCzarProxyParams( + std::string const& database = std::string()); + + /// @return A connection string for accessing Qserv czar's proxy. + static std::string qservCzarProxyUrl(); + + /// @param url A connection string for accessing Qserv czar's proxy. + static void setQservCzarProxyUrl(std::string const& url); + /** * Return a connection object for the worker's MySQL service with the name of * a database optionally rewritten from the one stored in the corresponding URL. @@ -778,6 +797,7 @@ class Configuration { static bool _schemaUpgradeWait; static unsigned int _schemaUpgradeWaitTimeoutSec; static std::string _qservCzarDbUrl; + static std::string _qservCzarProxyUrl; static std::string _qservWorkerDbUrl; // For implementing static synchronized methods. diff --git a/src/replica/testConfiguration.cc b/src/replica/testConfiguration.cc index d140df5608..9a639f6208 100644 --- a/src/replica/testConfiguration.cc +++ b/src/replica/testConfiguration.cc @@ -65,6 +65,7 @@ BOOST_AUTO_TEST_CASE(ConfigurationTestStaticParameters) { LOGS_INFO("Testing static parameters"); BOOST_CHECK_THROW(Configuration::setQservCzarDbUrl(""), std::invalid_argument); + BOOST_CHECK_THROW(Configuration::setQservCzarProxyUrl(""), std::invalid_argument); BOOST_CHECK_THROW(Configuration::setQservWorkerDbUrl(""), std::invalid_argument); BOOST_REQUIRE_NO_THROW(Configuration::setDatabaseAllowReconnect(true)); @@ -154,6 +155,7 @@ BOOST_AUTO_TEST_CASE(ConfigurationTestReadingGeneralParameters) { BOOST_CHECK(config->get("database", "qserv-master-user") == "qsmaster"); BOOST_CHECK(config->qservCzarDbUrl() == "mysql://qsmaster@localhost:3306/qservMeta"); + BOOST_CHECK(config->qservCzarProxyUrl() == "mysql://qsmaster@localhost:4040/"); BOOST_CHECK(config->qservWorkerDbUrl() == "mysql://qsmaster@localhost:3306/qservw_worker"); BOOST_CHECK(config->get("database", "services-pool-size") == 2); From af7a4279d2e9da38d95ca801371c90a797855a66 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 30 Aug 2023 01:19:58 +0000 Subject: [PATCH 04/15] Move class replica::AsyncTimer into the utility module Refactored dependencies accordingly. The move allows re-using the class by other Qserv modules w/o adding a direct depedency onto the module 'replica' which was the original location of the class. Fixed a bug in the implementation of the timer to allow subsequent restarts. Unblocking implementation of the callback calling mechanism. Extended a model of the callbacks to allow ordering automatic restarts of the timer via the return value (boolean type) of the callbacks. Minor refactpring of the implementation. --- src/replica/CMakeLists.txt | 2 - src/replica/testHttpAsyncReq.cc | 54 ++++++++++++++------------ src/replica/testIngestRequestMgr.cc | 24 ++++++------ src/{replica => util}/AsyncTimer.cc | 27 ++++++++----- src/{replica => util}/AsyncTimer.h | 59 +++++++++++++++++++---------- src/util/CMakeLists.txt | 1 + 6 files changed, 98 insertions(+), 69 deletions(-) rename src/{replica => util}/AsyncTimer.cc (83%) rename src/{replica => util}/AsyncTimer.h (68%) diff --git a/src/replica/CMakeLists.txt b/src/replica/CMakeLists.txt index 28e8855cf1..424609f2a9 100644 --- a/src/replica/CMakeLists.txt +++ b/src/replica/CMakeLists.txt @@ -23,8 +23,6 @@ target_sources(replica PRIVATE ApplicationColl.h ApplicationTypes.cc ApplicationTypes.h - AsyncTimer.cc - AsyncTimer.h CheckSumApp.cc CheckSumApp.h ChunkLocker.cc diff --git a/src/replica/testHttpAsyncReq.cc b/src/replica/testHttpAsyncReq.cc index fee65f6da7..6b705359f6 100644 --- a/src/replica/testHttpAsyncReq.cc +++ b/src/replica/testHttpAsyncReq.cc @@ -42,9 +42,9 @@ #include "qhttp/Response.h" #include "qhttp/Server.h" #include "qhttp/Status.h" -#include "replica/AsyncTimer.h" #include "replica/HttpAsyncReq.h" #include "replica/Mutex.h" +#include "util/AsyncTimer.h" // Boost unit test header #define BOOST_TEST_MODULE HttpAsyncReq @@ -140,10 +140,10 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_simple) { // The deadline timer limits the duration of the test to prevent the test from // being stuck for longer than expected. - auto const testAbortTimer = - AsyncTimer::create(io_service, chrono::milliseconds(100), [](auto expirationIvalMs) { + auto const testAbortTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(100), [](auto expirationIvalMs) -> bool { LOGS_INFO("HttpAsyncReq_simple: test exceeded the time budget of " << expirationIvalMs.count() - << " ms"); + << "ms"); std::exit(1); }); testAbortTimer->start(); @@ -203,10 +203,10 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_body_limit_error) { // The deadline timer limits the duration of the test to prevent the test from // being stuck for longer than expected. - auto const testAbortTimer = - AsyncTimer::create(io_service, chrono::milliseconds(100), [](auto expirationIvalMs) { + auto const testAbortTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(100), [](auto expirationIvalMs) -> bool { LOGS_INFO("HttpAsyncReq_body_limit_error: test exceeded the time budget of " - << expirationIvalMs.count() << " ms"); + << expirationIvalMs.count() << "ms"); std::exit(1); }); testAbortTimer->start(); @@ -259,10 +259,10 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_expired) { // The deadline timer limits the duration of the test to prevent the test from // being stuck for longer than expected. - auto const testAbortTimer = - AsyncTimer::create(io_service, chrono::milliseconds(3000), [](auto expirationIvalMs) { + auto const testAbortTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(3000), [](auto expirationIvalMs) -> bool { LOGS_INFO("HttpAsyncReq_expired: test exceeded the time budget of " - << expirationIvalMs.count() << " ms"); + << expirationIvalMs.count() << "ms"); std::exit(1); }); testAbortTimer->start(); @@ -309,10 +309,10 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_cancelled) { // The deadline timer limits the duration of the test to prevent the test from // being stuck for longer than expected. - auto const testAbortTimer = - AsyncTimer::create(io_service, chrono::milliseconds(3000), [](auto expirationIvalMs) { + auto const testAbortTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(3000), [](auto expirationIvalMs) -> bool { LOGS_INFO("HttpAsyncReq_simple: test exceeded the time budget of " << expirationIvalMs.count() - << " ms"); + << "ms"); std::exit(1); }); testAbortTimer->start(); @@ -339,9 +339,11 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_cancelled) { req->start(); // The deadline timer for cancelling the request - auto const cancelReqTimer = - AsyncTimer::create(io_service, chrono::milliseconds(1000), - [&req](auto expirationIvalMs) { BOOST_CHECK(req->cancel()); }); + auto const cancelReqTimer = util::AsyncTimer::create(io_service, chrono::milliseconds(1000), + [&req](auto expirationIvalMs) -> bool { + BOOST_CHECK(req->cancel()); + return false; + }); cancelReqTimer->start(); thread serviceThread([&io_service]() { io_service.run(); }); @@ -357,10 +359,10 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_cancelled_before_started) { // The deadline timer limits the duration of the test to prevent the test from // being stuck for longer than expected. - auto const testAbortTimer = - AsyncTimer::create(io_service, chrono::milliseconds(300), [](auto expirationIvalMs) { + auto const testAbortTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(300), [](auto expirationIvalMs) -> bool { LOGS_INFO("HttpAsyncReq_cancelled_before_started: test exceeded the time budget of " - << expirationIvalMs.count() << " ms"); + << expirationIvalMs.count() << "ms"); std::exit(1); }); testAbortTimer->start(); @@ -419,10 +421,10 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_delayed_server_start) { // The deadline timer limits the duration of the test to prevent the test from // being stuck for longer than expected. - auto const testAbortTimer = - AsyncTimer::create(io_service, chrono::milliseconds(5000), [](auto expirationIvalMs) { + auto const testAbortTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(5000), [](auto expirationIvalMs) -> bool { LOGS_INFO("HttpAsyncReq_delayed_server_start: test exceeded the time budget of " - << expirationIvalMs.count() << " ms"); + << expirationIvalMs.count() << "ms"); std::exit(1); }); testAbortTimer->start(); @@ -439,9 +441,11 @@ BOOST_AUTO_TEST_CASE(HttpAsyncReq_delayed_server_start) { shared_ptr req; // Delay server startup before expiration of the timer - auto const serverStartDelayTimer = - AsyncTimer::create(io_service, chrono::milliseconds(3000), - [&httpServer, &req](auto expirationIvalMs) { httpServer.start(); }); + auto const serverStartDelayTimer = util::AsyncTimer::create( + io_service, chrono::milliseconds(3000), [&httpServer, &req](auto expirationIvalMs) -> bool { + httpServer.start(); + return false; + }); serverStartDelayTimer->start(); // Submit a request. diff --git a/src/replica/testIngestRequestMgr.cc b/src/replica/testIngestRequestMgr.cc index dd196cbb6f..26a787696d 100644 --- a/src/replica/testIngestRequestMgr.cc +++ b/src/replica/testIngestRequestMgr.cc @@ -32,11 +32,11 @@ #include "boost/asio.hpp" // Qserv headers -#include "replica/AsyncTimer.h" #include "replica/IngestRequest.h" #include "replica/IngestRequestMgr.h" #include "replica/IngestResourceMgrT.h" #include "replica/TransactionContrib.h" +#include "util/AsyncTimer.h" #include "util/TimeUtils.h" // LSST headers @@ -130,11 +130,12 @@ BOOST_AUTO_TEST_CASE(IngestRequestMgrSimpleTest) { // of the manager. The timer will be fired before each such operation and // be cancelled after completing the one. chrono::milliseconds const expirationIvalMs(1000); - auto const timer = AsyncTimer::create(io_service, expirationIvalMs, [](auto expirationIvalMs) { - LOGS_INFO("IngestRequestMgr_simple: test exceeded the time budget of " << expirationIvalMs.count() - << "ms"); - std::exit(1); - }); + auto const timer = + util::AsyncTimer::create(io_service, expirationIvalMs, [](auto expirationIvalMs) -> bool { + LOGS_INFO("IngestRequestMgr_simple: test exceeded the time budget of " + << expirationIvalMs.count() << "ms"); + std::exit(1); + }); // Instantiate the manager. shared_ptr requestScheduler; @@ -280,11 +281,12 @@ BOOST_AUTO_TEST_CASE(IngestRequestMgrComplexTest) { // of the manager. The timer will be fired before each such operation and // be cancelled after completing the one. chrono::milliseconds const expirationIvalMs(10); - auto const timer = AsyncTimer::create(io_service, expirationIvalMs, [](auto expirationIvalMs) { - LOGS_INFO("IngestRequestMgr_simple: test exceeded the time budget of " << expirationIvalMs.count() - << "ms"); - std::exit(1); - }); + auto const timer = + util::AsyncTimer::create(io_service, expirationIvalMs, [](auto expirationIvalMs) -> bool { + LOGS_INFO("IngestRequestMgr_simple: test exceeded the time budget of " + << expirationIvalMs.count() << "ms"); + std::exit(1); + }); shared_ptr const resourceMgr = IngestResourceMgrT::create(); shared_ptr const requestScheduler = IngestRequestMgr::test(resourceMgr); diff --git a/src/replica/AsyncTimer.cc b/src/util/AsyncTimer.cc similarity index 83% rename from src/replica/AsyncTimer.cc rename to src/util/AsyncTimer.cc index a5ce9de27a..b024449eb6 100644 --- a/src/replica/AsyncTimer.cc +++ b/src/util/AsyncTimer.cc @@ -20,14 +20,14 @@ */ // Class header -#include "replica/AsyncTimer.h" +#include "util/AsyncTimer.h" // System headers #include using namespace std; -namespace lsst::qserv::replica { +namespace lsst::qserv::util { shared_ptr AsyncTimer::create(boost::asio::io_service& io_service, chrono::milliseconds expirationIvalMs, @@ -52,15 +52,17 @@ AsyncTimer::~AsyncTimer() { _timer.cancel(ec); } -void AsyncTimer::start() { - replica::Lock lock(_mtx, "AsyncTimer::" + string(__func__)); +bool AsyncTimer::start() { + lock_guard lock(_mtx); + if (_onFinish == nullptr) return false; _timer.expires_from_now(boost::posix_time::milliseconds(_expirationIvalMs.count())); _timer.async_wait( [self = shared_from_this()](boost::system::error_code const& ec) { self->_expired(ec); }); + return true; } bool AsyncTimer::cancel() { - replica::Lock lock(_mtx, "AsyncTimer::" + string(__func__)); + lock_guard lock(_mtx); if (nullptr == _onFinish) return false; _onFinish = nullptr; _timer.cancel(); @@ -68,11 +70,16 @@ bool AsyncTimer::cancel() { } void AsyncTimer::_expired(boost::system::error_code const& ec) { - replica::Lock lock(_mtx, "AsyncTimer::" + string(__func__)); if (ec == boost::asio::error::operation_aborted) return; - if (nullptr == _onFinish) return; - _onFinish(_expirationIvalMs); - _onFinish = nullptr; + CallbackType onFinish; + { + lock_guard lock(_mtx); + onFinish = _onFinish; + } + if (onFinish != nullptr) { + bool const restart = onFinish(_expirationIvalMs); + if (restart) start(); + } } -} // namespace lsst::qserv::replica +} // namespace lsst::qserv::util diff --git a/src/replica/AsyncTimer.h b/src/util/AsyncTimer.h similarity index 68% rename from src/replica/AsyncTimer.h rename to src/util/AsyncTimer.h index 42f8ed80fb..2ce28a9db0 100644 --- a/src/replica/AsyncTimer.h +++ b/src/util/AsyncTimer.h @@ -18,23 +18,21 @@ * the GNU General Public License along with this program. If not, * see . */ -#ifndef LSST_QSERV_REPLICA_ASYNCTIMER_H -#define LSST_QSERV_REPLICA_ASYNCTIMER_H +#ifndef LSST_QSERV_UTIL_ASYNCTIMER_H +#define LSST_QSERV_UTIL_ASYNCTIMER_H // System headers #include #include #include +#include #include // Third party headers #include "boost/asio.hpp" -// Qserv headers -#include "replica/Mutex.h" - // This header declarations -namespace lsst::qserv::replica { +namespace lsst::qserv::util { /** * Class AsyncTimer represents a simple asynchronous timer for initiating time-based @@ -62,12 +60,17 @@ namespace lsst::qserv::replica { * timer->cancel(); * ... * @endcode + * @note The call back method gets called in the non-blocking context which allows + * the callback handler to restart or cancel the timer. */ class AsyncTimer : public std::enable_shared_from_this { public: - /// The function type for notifications on the completion of the operation. - /// The only parameter of the function is a value of the expiration interval. - typedef std::function CallbackType; + /** + * The function type for notifications on the completion of the operation. + * The only parameter of the function is a value of the expiration interval. + * The function should return 'true' if the timer has to be started again. + */ + typedef std::function CallbackType; /** * The factory method. @@ -81,18 +84,32 @@ class AsyncTimer : public std::enable_shared_from_this { AsyncTimer(AsyncTimer const&) = delete; AsyncTimer& operator=(AsyncTimer const&) = delete; - /// Non-trivial destrictor is needed to cancel the deadline timer when the current - /// object gets destroyed in the end of a code block, or when the application - /// is exiting. + /** + * Non-trivial destrictor is needed to cancel the deadline timer when + * the current object gets destroyed in the end of a code block, or when + * the application is exiting. + */ ~AsyncTimer(); - /// Start (or restart of already running) the timer. - /// If the timer gets restarted then it will begin counting again the interval - /// specified in the class's constructor. - void start(); + std::chrono::milliseconds const& expirationIvalMs() const { return _expirationIvalMs; } - /// Cancel the timer. - /// @return 'false' if the time expired or was already canceled. + /** + * Start (or restart if running) the timer. + * + * If the timer gets restarted then it will begin counting again the interval + * specified in the class's constructor. + * @note The timer could be also restarted automatically by the user-provided + * callbacks returning 'true'. In most use cases that would be the preferred + * scenario. + * @return 'true' if the timer started, or 'false' if the timer was + * already cancelled + */ + bool start(); + + /** + * Cancel the timer. + * @return 'false' if the time expired or it was already canceled. + */ bool cancel(); private: @@ -114,9 +131,9 @@ class AsyncTimer : public std::enable_shared_from_this { /// The mutex for enforcing thread safety of the class public API /// and internal operations. - mutable replica::Mutex _mtx; + mutable std::mutex _mtx; }; -} // namespace lsst::qserv::replica +} // namespace lsst::qserv::util -#endif // LSST_QSERV_REPLICA_ASYNCTIMER_H +#endif // LSST_QSERV_UTIL_ASYNCTIMER_H diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 6140d9302d..af365a07f5 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(util OBJECT) target_sources(util PRIVATE + AsyncTimer.cc BlockPost.cc Bug.cc CmdLineParser.cc From 6522e9a9c6b3f06aab6b1c81bef24922e030cddd Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Fri, 1 Sep 2023 01:55:33 +0000 Subject: [PATCH 05/15] Moved czar::CzarConfig into its own module cconfig --- src/CMakeLists.txt | 1 + src/cconfig/CMakeLists.txt | 14 ++++++++++++++ src/{czar => cconfig}/CzarConfig.cc | 10 ++++------ src/{czar => cconfig}/CzarConfig.h | 10 +++++----- src/ccontrol/CMakeLists.txt | 2 ++ src/ccontrol/UserQueryFactory.cc | 11 ++++++----- src/ccontrol/UserQueryResources.cc | 4 ++-- src/ccontrol/UserQuerySelect.cc | 4 ++-- src/czar/CMakeLists.txt | 2 +- src/czar/Czar.cc | 4 ++-- src/czar/Czar.h | 4 ++-- src/qana/CMakeLists.txt | 1 + src/qdisp/CMakeLists.txt | 2 ++ src/qmeta/CMakeLists.txt | 2 ++ src/qproc/CMakeLists.txt | 2 ++ src/qproc/TaskMsgFactory.cc | 4 ++-- src/query/CMakeLists.txt | 1 + src/rproc/CMakeLists.txt | 2 ++ src/rproc/InfileMerger.cc | 10 +++++----- 19 files changed, 58 insertions(+), 32 deletions(-) create mode 100644 src/cconfig/CMakeLists.txt rename src/{czar => cconfig}/CzarConfig.cc (97%) rename src/{czar => cconfig}/CzarConfig.h (98%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c4ce354f95..de169cf31d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,6 +43,7 @@ add_custom_target(clang-format-check #----------------------------------------------------------------------------- add_subdirectory(admin) +add_subdirectory(cconfig) add_subdirectory(ccontrol) add_subdirectory(css) add_subdirectory(czar) diff --git a/src/cconfig/CMakeLists.txt b/src/cconfig/CMakeLists.txt new file mode 100644 index 0000000000..daf51d290e --- /dev/null +++ b/src/cconfig/CMakeLists.txt @@ -0,0 +1,14 @@ +add_library(cconfig OBJECT) + +target_sources(cconfig PRIVATE + CzarConfig.cc +) + +target_include_directories(cconfig PRIVATE + ${XROOTD_INCLUDE_DIRS} +) + +target_link_libraries(cconfig PUBLIC + log + XrdSsiLib +) diff --git a/src/czar/CzarConfig.cc b/src/cconfig/CzarConfig.cc similarity index 97% rename from src/czar/CzarConfig.cc rename to src/cconfig/CzarConfig.cc index c4d68072d4..03f77ee0d4 100644 --- a/src/czar/CzarConfig.cc +++ b/src/cconfig/CzarConfig.cc @@ -22,9 +22,7 @@ */ // Class header -#include "czar/CzarConfig.h" - -// System headers +#include "cconfig/CzarConfig.h" // Third party headers #include "XrdSsi/XrdSsiLogger.hh" @@ -39,7 +37,7 @@ namespace { -LOG_LOGGER _log = LOG_GET("lsst.qserv.czar.CzarConfig"); +LOG_LOGGER _log = LOG_GET("lsst.qserv.cconfig.CzarConfig"); void QservLogger(struct timeval const& mtime, unsigned long tID, const char* msg, int mlen) { static log4cxx::spi::LocationInfo xrdLoc("client", "", 0); @@ -56,7 +54,7 @@ void QservLogger(struct timeval const& mtime, unsigned long tID, const char* msg bool dummy = XrdSsiLogger::SetMCB(QservLogger, XrdSsiLogger::mcbClient); } // namespace -namespace lsst::qserv::czar { +namespace lsst::qserv::cconfig { std::mutex CzarConfig::_mtxOnInstance; @@ -126,4 +124,4 @@ std::ostream& operator<<(std::ostream& out, CzarConfig const& czarConfig) { return out; } -} // namespace lsst::qserv::czar +} // namespace lsst::qserv::cconfig diff --git a/src/czar/CzarConfig.h b/src/cconfig/CzarConfig.h similarity index 98% rename from src/czar/CzarConfig.h rename to src/cconfig/CzarConfig.h index 31bf39a3fb..4a6280f015 100644 --- a/src/czar/CzarConfig.h +++ b/src/cconfig/CzarConfig.h @@ -21,8 +21,8 @@ * see . */ -#ifndef LSST_QSERV_CZAR_CZARCONFIG_H -#define LSST_QSERV_CZAR_CZARCONFIG_H +#ifndef LSST_QSERV_CCONFIG_CZARCONFIG_H +#define LSST_QSERV_CCONFIG_CZARCONFIG_H // System headers #include @@ -35,7 +35,7 @@ #include "mysql/MySqlConfig.h" #include "util/ConfigStore.h" -namespace lsst::qserv::czar { +namespace lsst::qserv::cconfig { /** * Provide all configuration parameters for a Qserv Czar instance @@ -242,6 +242,6 @@ class CzarConfig { int const _notifyWorkersOnCzarRestart; ///< Sent by czar::Czar }; -} // namespace lsst::qserv::czar +} // namespace lsst::qserv::cconfig -#endif // LSST_QSERV_CZAR_CZARCONFIG_H +#endif // LSST_QSERV_CCONFIG_CZARCONFIG_H diff --git a/src/ccontrol/CMakeLists.txt b/src/ccontrol/CMakeLists.txt index e037a93592..60a042e2cd 100644 --- a/src/ccontrol/CMakeLists.txt +++ b/src/ccontrol/CMakeLists.txt @@ -27,6 +27,7 @@ target_sources(ccontrol PRIVATE target_link_libraries(ccontrol PUBLIC boost_regex + cconfig log parser replica @@ -39,6 +40,7 @@ FUNCTION(ccontrol_tests) FOREACH(TEST IN ITEMS ${ARGV}) add_executable(${TEST} ${TEST}.cc) target_link_libraries(${TEST} PUBLIC + cconfig ccontrol czar parser diff --git a/src/ccontrol/UserQueryFactory.cc b/src/ccontrol/UserQueryFactory.cc index 870f28a928..2838baddc6 100644 --- a/src/ccontrol/UserQueryFactory.cc +++ b/src/ccontrol/UserQueryFactory.cc @@ -36,6 +36,7 @@ #include "lsst/log/Log.h" // Qserv headers +#include "cconfig/CzarConfig.h" #include "ccontrol/ConfigError.h" #include "ccontrol/ConfigMap.h" #include "ccontrol/ParseRunner.h" @@ -51,7 +52,6 @@ #include "ccontrol/UserQueryType.h" #include "css/CssAccess.h" #include "css/KvInterfaceImplMem.h" -#include "czar/CzarConfig.h" #include "mysql/MySqlConfig.h" #include "parser/ParseException.h" #include "qdisp/Executive.h" @@ -151,7 +151,7 @@ bool qmetaHasDataForSelectCountStarQuery(query::SelectStmt::Ptr const& stmt, auto const& fromTable = tableRefPtr->getTable(); rowsTable = fromDb + "__" + fromTable + "__rows"; // TODO consider using QMetaSelect instead of making a new connection. - auto cnx = sql::SqlConnectionFactory::make(czar::CzarConfig::instance()->getMySqlQmetaConfig()); + auto cnx = sql::SqlConnectionFactory::make(cconfig::CzarConfig::instance()->getMySqlQmetaConfig()); sql::SqlErrorObject err; auto tableExists = cnx->tableExists(rowsTable, err); LOGS(_log, LOG_LVL_DEBUG, @@ -162,7 +162,7 @@ bool qmetaHasDataForSelectCountStarQuery(query::SelectStmt::Ptr const& stmt, std::shared_ptr makeUserQuerySharedResources( std::shared_ptr const& dbModels, std::string const& czarName) { - std::shared_ptr const czarConfig = czar::CzarConfig::instance(); + auto const czarConfig = cconfig::CzarConfig::instance(); return std::make_shared( css::CssAccess::createFromConfig(czarConfig->getCssConfigMap(), czarConfig->getEmptyChunkPath()), czarConfig->getMySqlResultConfig(), @@ -178,8 +178,9 @@ std::shared_ptr makeUserQuerySharedResources( //////////////////////////////////////////////////////////////////////// UserQueryFactory::UserQueryFactory(qproc::DatabaseModels::Ptr const& dbModels, std::string const& czarName) : _userQuerySharedResources(makeUserQuerySharedResources(dbModels, czarName)), - _useQservRowCounterOptimization(true) { - std::shared_ptr const czarConfig = czar::CzarConfig::instance(); + _useQservRowCounterOptimization(true), + _asioIoService() { + auto const czarConfig = cconfig::CzarConfig::instance(); _executiveConfig = std::make_shared( czarConfig->getXrootdFrontendUrl(), czarConfig->getQMetaSecondsBetweenChunkUpdates()); diff --git a/src/ccontrol/UserQueryResources.cc b/src/ccontrol/UserQueryResources.cc index 8bef7c4030..028a04994e 100644 --- a/src/ccontrol/UserQueryResources.cc +++ b/src/ccontrol/UserQueryResources.cc @@ -27,7 +27,7 @@ #include "qmeta/QMeta.h" // qserv headers -#include "czar/CzarConfig.h" +#include "cconfig/CzarConfig.h" #include "util/SemaMgr.h" namespace lsst::qserv::ccontrol { @@ -50,7 +50,7 @@ UserQuerySharedResources::UserQuerySharedResources( resultDbConn(resultDbConn_), databaseModels(dbModels_), interactiveChunkLimit(interactiveChunkLimit_), - semaMgrConnections(new util::SemaMgr(czar::CzarConfig::instance()->getResultMaxConnections())) { + semaMgrConnections(new util::SemaMgr(cconfig::CzarConfig::instance()->getResultMaxConnections())) { // register czar in QMeta // TODO: check that czar with the same name is not active already? qMetaCzarId = queryMetadata->registerCzar(czarName); diff --git a/src/ccontrol/UserQuerySelect.cc b/src/ccontrol/UserQuerySelect.cc index 79d08994cc..e06f430f0d 100644 --- a/src/ccontrol/UserQuerySelect.cc +++ b/src/ccontrol/UserQuerySelect.cc @@ -76,10 +76,10 @@ #include "lsst/log/Log.h" // Qserv headers +#include "cconfig/CzarConfig.h" #include "ccontrol/MergingHandler.h" #include "ccontrol/TmpTableName.h" #include "ccontrol/UserQueryError.h" -#include "czar/CzarConfig.h" #include "global/constants.h" #include "global/LogContext.h" #include "global/MsgReceiver.h" @@ -388,7 +388,7 @@ QueryState UserQuerySelect::join() { operation = proto::QueryManagement::CANCEL; state = ERROR; } - std::shared_ptr const czarConfig = czar::CzarConfig::instance(); + auto const czarConfig = cconfig::CzarConfig::instance(); if (czarConfig->notifyWorkersOnQueryFinish()) { try { xrdreq::QueryManagementAction::notifyAllWorkers(czarConfig->getXrootdFrontendUrl(), operation, diff --git a/src/czar/CMakeLists.txt b/src/czar/CMakeLists.txt index 381275de3f..df99637634 100644 --- a/src/czar/CMakeLists.txt +++ b/src/czar/CMakeLists.txt @@ -2,7 +2,6 @@ add_library(czar OBJECT) target_sources(czar PRIVATE Czar.cc - CzarConfig.cc MessageTable.cc ) @@ -11,6 +10,7 @@ target_include_directories(czar PRIVATE ) target_link_libraries(czar PUBLIC + cconfig log XrdSsiLib ) diff --git a/src/czar/Czar.cc b/src/czar/Czar.cc index 084db248b1..45db5d69af 100644 --- a/src/czar/Czar.cc +++ b/src/czar/Czar.cc @@ -36,10 +36,10 @@ #include "lsst/log/Log.h" // Qserv headers +#include "cconfig/CzarConfig.h" #include "ccontrol/ConfigMap.h" #include "ccontrol/UserQuerySelect.h" #include "ccontrol/UserQueryType.h" -#include "czar/CzarConfig.h" #include "czar/CzarErrors.h" #include "czar/MessageTable.h" #include "global/LogContext.h" @@ -89,7 +89,7 @@ Czar::Ptr Czar::createCzar(string const& configPath, string const& czarName) { // Constructors Czar::Czar(string const& configPath, string const& czarName) : _czarName(czarName), - _czarConfig(CzarConfig::create(configPath)), + _czarConfig(cconfig::CzarConfig::create(configPath)), _idCounter(), _uqFactory(), _clientToQuery(), diff --git a/src/czar/Czar.h b/src/czar/Czar.h index 54a2b71e5e..f559fcf1a3 100644 --- a/src/czar/Czar.h +++ b/src/czar/Czar.h @@ -47,7 +47,7 @@ namespace lsst::qserv { -namespace czar { +namespace cconfig { class CzarConfig; } @@ -149,7 +149,7 @@ class Czar { typedef std::map> IdToQuery; std::string const _czarName; ///< Unique czar name - std::shared_ptr const _czarConfig; + std::shared_ptr const _czarConfig; std::atomic _idCounter; ///< Query/task identifier for next query std::unique_ptr _uqFactory; diff --git a/src/qana/CMakeLists.txt b/src/qana/CMakeLists.txt index ccf5634e4c..c9df3d8ada 100644 --- a/src/qana/CMakeLists.txt +++ b/src/qana/CMakeLists.txt @@ -25,6 +25,7 @@ FUNCTION(qana_tests) FOREACH(TEST IN ITEMS ${ARGV}) add_executable(${TEST} ${TEST}.cc) target_link_libraries(${TEST} PUBLIC + cconfig ccontrol czar parser diff --git a/src/qdisp/CMakeLists.txt b/src/qdisp/CMakeLists.txt index 1f34bdd971..a0b7ee338a 100644 --- a/src/qdisp/CMakeLists.txt +++ b/src/qdisp/CMakeLists.txt @@ -20,6 +20,7 @@ target_include_directories(qdisp PRIVATE ) target_link_libraries(qdisp PUBLIC + cconfig log XrdSsiLib ) @@ -31,6 +32,7 @@ target_include_directories(testQDisp PRIVATE ) target_link_libraries(testQDisp + cconfig ccontrol czar parser diff --git a/src/qmeta/CMakeLists.txt b/src/qmeta/CMakeLists.txt index 4e0b3f24d2..4d49fea7ca 100644 --- a/src/qmeta/CMakeLists.txt +++ b/src/qmeta/CMakeLists.txt @@ -9,6 +9,7 @@ target_sources(qserv_meta PRIVATE ) target_link_libraries(qserv_meta PUBLIC + cconfig qdisp qserv_common log @@ -21,6 +22,7 @@ install(DIRECTORY schema/ DESTINATION ${CMAKE_INSTALL_PREFIX}/qserv/smig/qmeta/s add_executable(testQMeta testQMeta.cc) target_link_libraries(testQMeta + cconfig qserv_meta Boost::unit_test_framework Threads::Threads diff --git a/src/qproc/CMakeLists.txt b/src/qproc/CMakeLists.txt index 1a022694f6..a27ad4db98 100644 --- a/src/qproc/CMakeLists.txt +++ b/src/qproc/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(qproc PRIVATE ) target_link_libraries(qproc PRIVATE + cconfig log sphgeom ) @@ -20,6 +21,7 @@ FUNCTION(qproc_tests) FOREACH(TEST IN ITEMS ${ARGV}) add_executable(${TEST} ${TEST}.cc ../tests/QueryAnaHelper.cc) target_link_libraries(${TEST} PUBLIC + cconfig ccontrol czar parser diff --git a/src/qproc/TaskMsgFactory.cc b/src/qproc/TaskMsgFactory.cc index 90a091d355..80f86dbd3a 100644 --- a/src/qproc/TaskMsgFactory.cc +++ b/src/qproc/TaskMsgFactory.cc @@ -41,7 +41,7 @@ #include "lsst/log/Log.h" // Qserv headers -#include "czar/CzarConfig.h" +#include "cconfig/CzarConfig.h" #include "global/intTypes.h" #include "qmeta/types.h" #include "qproc/ChunkQuerySpec.h" @@ -85,7 +85,7 @@ std::shared_ptr TaskMsgFactory::_makeMsg(ChunkQuerySpec const& c taskMsg->set_scanpriority(chunkQuerySpec.scanInfo.scanRating); taskMsg->set_scaninteractive(chunkQuerySpec.scanInteractive); - taskMsg->set_maxtablesize_mb(czar::CzarConfig::instance()->getMaxTableSizeMB()); + taskMsg->set_maxtablesize_mb(cconfig::CzarConfig::instance()->getMaxTableSizeMB()); // per-chunk taskMsg->set_chunkid(chunkQuerySpec.chunkId); diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 90c351aad2..6fcfbbb332 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -49,6 +49,7 @@ FUNCTION(query_tests) FOREACH(TEST IN ITEMS ${ARGV}) add_executable(${TEST} ${TEST}.cc) target_link_libraries(${TEST} PUBLIC + cconfig ccontrol czar parser diff --git a/src/rproc/CMakeLists.txt b/src/rproc/CMakeLists.txt index 91910dd95e..4c96284c92 100644 --- a/src/rproc/CMakeLists.txt +++ b/src/rproc/CMakeLists.txt @@ -8,6 +8,7 @@ target_sources(rproc PRIVATE target_link_libraries(rproc PUBLIC boost_regex + cconfig log ) @@ -18,6 +19,7 @@ FUNCTION(rproc_tests) FOREACH(TEST IN ITEMS ${ARGV}) add_executable(${TEST} ${TEST}.cc) target_link_libraries(${TEST} PUBLIC + cconfig ccontrol czar parser diff --git a/src/rproc/InfileMerger.cc b/src/rproc/InfileMerger.cc index f928ec3f61..9fc02eb669 100644 --- a/src/rproc/InfileMerger.cc +++ b/src/rproc/InfileMerger.cc @@ -54,8 +54,7 @@ #include "lsst/log/Log.h" // Qserv headers -#include "czar/Czar.h" -#include "czar/CzarConfig.h" +#include "cconfig/CzarConfig.h" #include "global/intTypes.h" #include "proto/WorkerResponse.h" #include "proto/ProtoImporter.h" @@ -73,6 +72,7 @@ #include "util/Bug.h" #include "util/IterableFormatter.h" #include "util/StringHash.h" +#include "util/Timer.h" namespace { // File-scope helpers @@ -120,11 +120,11 @@ InfileMerger::InfileMerger(InfileMergerConfig const& c, std::shared_ptrgetMaxSqlConnectionAttempts()), - _maxResultTableSizeBytes(czar::CzarConfig::instance()->getMaxTableSizeMB() * MB_SIZE_BYTES), + _maxSqlConnectionAttempts(cconfig::CzarConfig::instance()->getMaxSqlConnectionAttempts()), + _maxResultTableSizeBytes(cconfig::CzarConfig::instance()->getMaxTableSizeMB() * MB_SIZE_BYTES), _semaMgrConn(semaMgrConn) { _fixupTargetName(); - _setEngineFromStr(czar::CzarConfig::instance()->getResultEngine()); + _setEngineFromStr(cconfig::CzarConfig::instance()->getResultEngine()); if (_dbEngine == MYISAM) { LOGS(_log, LOG_LVL_INFO, "Engine is MYISAM, serial"); if (!_setupConnectionMyIsam()) { From ff22018f98ae1069383db6e04f60b0e5698a15cd Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 23 Aug 2023 02:07:51 +0000 Subject: [PATCH 06/15] Improved and extended implementation of the Czar monitoring Extend the Histogram class to report the histogram label (id). Added a histogram to capture the performance of the result file reads at Czar. Added counters for the number of on-going result file reads (file-based result delivery protocol only) and merges. Fixed database merge histogram load. Added integral counters for metrics reset at the start up time of Czar. Added the timestamp (milliseconds) attribute to the JSON object that returns a state of the monitoring parameters. The timestamps are going to be used for the performance monitoring of Qserv. --- src/ccontrol/MergingHandler.cc | 56 ++++++++++++++++++++++ src/qdisp/CzarStats.cc | 69 +++++++++++++++++--------- src/qdisp/CzarStats.h | 88 +++++++++++++++++++++++++++++----- src/qdisp/Executive.cc | 6 +++ src/rproc/InfileMerger.cc | 77 +++++++++++++++++++++-------- src/rproc/InfileMerger.h | 4 +- src/util/Histogram.h | 2 + 7 files changed, 244 insertions(+), 58 deletions(-) diff --git a/src/ccontrol/MergingHandler.cc b/src/ccontrol/MergingHandler.cc index 45e76291bf..759b139d99 100644 --- a/src/ccontrol/MergingHandler.cc +++ b/src/ccontrol/MergingHandler.cc @@ -27,6 +27,7 @@ // System headers #include #include +#include #include // Third-party headers @@ -37,11 +38,13 @@ // Qserv headers #include "ccontrol/msgCode.h" +#include "global/clock_defs.h" #include "global/debugUtil.h" #include "global/MsgReceiver.h" #include "proto/ProtoHeaderWrap.h" #include "proto/ProtoImporter.h" #include "proto/WorkerResponse.h" +#include "qdisp/CzarStats.h" #include "qdisp/JobQuery.h" #include "replica/HttpClient.h" #include "rproc/InfileMerger.h" @@ -90,6 +93,27 @@ string xrootUrl2path(string const& xrootUrl) { throw runtime_error("MergingHandler::" + string(__func__) + " illegal file resource url: " + xrootUrl); } +/** + * Instances of this class are used to update statistic counter on starting + * and finishing operations with the result files. + */ +class ResultFileTracker { +public: + ResultFileTracker() { lsst::qserv::qdisp::CzarStats::get()->addResultFile(); } + ~ResultFileTracker() { lsst::qserv::qdisp::CzarStats::get()->deleteResultFile(); } +}; + +// The logging function employed by the transmit rate tracker to report +// the data transfer rates in a histogram. The histogram is used in +// the performance monitoring of the application. +lsst::qserv::TimeCountTracker::CALLBACKFUNC const reportFileRecvRate = + [](lsst::qserv::TIMEPOINT start, lsst::qserv::TIMEPOINT end, double bytes, bool success) { + if (!success) return; + if (chrono::duration const seconds = end - start; seconds.count() > 0) { + lsst::qserv::qdisp::CzarStats::get()->addFileReadRate(bytes / seconds.count()); + } + }; + bool readXrootFileResourceAndMerge(lsst::qserv::proto::Result const& result, function const& messageIsReady) { string const context = "MergingHandler::" + string(__func__) + " "; @@ -97,6 +121,9 @@ bool readXrootFileResourceAndMerge(lsst::qserv::proto::Result const& result, // Extract data from the input result object before modifying the one. string const xrootUrl = result.fileresource_xroot(); + // Track the file while the control flow is staying within the function. + ResultFileTracker const resultFileTracker; + // The algorithm will read the input file to locate result objects containing rows // and call the provided callback for each such row. XrdCl::File file; @@ -118,6 +145,9 @@ bool readXrootFileResourceAndMerge(lsst::qserv::proto::Result const& result, bool success = true; try { while (true) { + // This starts a timer of the data transmit rate tracker. + auto transmitRateTracker = make_unique>(reportFileRecvRate); + // Read the frame header that carries a size of the subsequent message. uint32_t msgSizeBytes = 0; uint32_t bytesRead = 0; @@ -170,6 +200,14 @@ bool readXrootFileResourceAndMerge(lsst::qserv::proto::Result const& result, offset += bytesRead; bytes2read -= bytesRead; } + + // Destroying the tracker will result in stopping the tracker's timer and + // reporting the file read rate before proceeding to the merge. + transmitRateTracker->addToValue(msgSizeBytes); + transmitRateTracker->setSuccess(); + transmitRateTracker.reset(); + + // Proceed to the result merge success = messageIsReady(buf.get(), msgSizeBytes); if (!success) break; } @@ -201,6 +239,12 @@ bool readHttpFileAndMerge(lsst::qserv::proto::Result const& result, // Extract data from the input result object before modifying the one. string const httpUrl = result.fileresource_http(); + // Track the file while the control flow is staying within the function. + ResultFileTracker const resultFileTracker; + + // The data transmit rate tracker is set up before reading each data message. + unique_ptr> transmitRateTracker; + // A location of the next byte to be read from the input file. The variable // is used for error reporting. uint64_t offset = 0; @@ -256,6 +300,9 @@ bool readHttpFileAndMerge(lsst::qserv::proto::Result const& result, msgBufSize = msgSizeBytes; msgBuf.reset(new char[msgBufSize]); } + // Starts the tracker to measure the performance of the network I/O. + transmitRateTracker = + make_unique>(reportFileRecvRate); } } else { // Continue or finish reading the message body. @@ -268,6 +315,15 @@ bool readHttpFileAndMerge(lsst::qserv::proto::Result const& result, if (msgBufNext == msgSizeBytes) { // Done reading message body. msgBufNext = 0; + + // Destroying the tracker will result in stopping the tracker's timer and + // reporting the file read rate before proceeding to the merge. + if (transmitRateTracker != nullptr) { + transmitRateTracker->addToValue(msgSizeBytes); + transmitRateTracker->setSuccess(); + transmitRateTracker.reset(); + } + // Parse and evaluate the message. bool const success = messageIsReady(msgBuf.get(), msgSizeBytes); if (!success) { diff --git a/src/qdisp/CzarStats.cc b/src/qdisp/CzarStats.cc index c4c65e0117..bc0760a244 100644 --- a/src/qdisp/CzarStats.cc +++ b/src/qdisp/CzarStats.cc @@ -24,19 +24,18 @@ // Class header #include "qdisp/CzarStats.h" -#include -#include -#include +// System headers +#include -// qserv headers +// Qserv headers #include "qdisp/QdispPool.h" #include "util/Bug.h" +#include "util/TimeUtils.h" // LSST headers #include "lsst/log/Log.h" using namespace std; - using namespace std::chrono_literals; namespace { @@ -56,12 +55,17 @@ void CzarStats::setup(qdisp::QdispPool::Ptr const& qdispPool) { _globalCzarStats = Ptr(new CzarStats(qdispPool)); } -CzarStats::CzarStats(qdisp::QdispPool::Ptr const& qdispPool) : _qdispPool(qdispPool) { - auto bucketValsRates = {1'000.0, 1'000'000.0, 500'000'000.0, 1'000'000'000.0}; - _histTrmitRecvRate = util::HistogramRolling::Ptr( - new util::HistogramRolling("TransmitRecvRateBytesPerSec", bucketValsRates, 1h, 10000)); +CzarStats::CzarStats(qdisp::QdispPool::Ptr const& qdispPool) + : _qdispPool(qdispPool), _startTimeMs(util::TimeUtils::now()) { + auto bucketValsRates = {128'000.0, 512'000.0, 1'024'000.0, 16'000'000.0, + 128'000'000.0, 256'000'000.0, 512'000'000.0, 768'000'000.0, + 1'000'000'000.0, 2'000'000'000.0, 4'000'000'000.0, 8'000'000'000.0}; + _histXRootDSSIRecvRate = util::HistogramRolling::Ptr( + new util::HistogramRolling("XRootDSSIRecvRateBytesPerSec", bucketValsRates, 1h, 10000)); _histMergeRate = util::HistogramRolling::Ptr( - new util::HistogramRolling("MergeRateRateBytesPerSec", bucketValsRates, 1h, 10000)); + new util::HistogramRolling("MergeRateBytesPerSec", bucketValsRates, 1h, 10000)); + _histFileReadRate = util::HistogramRolling::Ptr( + new util::HistogramRolling("FileReadRateBytesPerSec", bucketValsRates, 1h, 10000)); auto bucketValsTimes = {0.1, 1.0, 10.0, 100.0, 1000.0}; _histRespSetup = util::HistogramRolling::Ptr( new util::HistogramRolling("RespSetupTime", bucketValsTimes, 1h, 10000)); @@ -97,35 +101,54 @@ void CzarStats::endQueryRespConcurrentProcessing(TIMEPOINT start, TIMEPOINT end) _histRespProcessing->addEntry(end, secs.count()); } -void CzarStats::addTrmitRecvRate(double bytesPerSec) { - _histTrmitRecvRate->addEntry(bytesPerSec); +void CzarStats::addXRootDSSIRecvRate(double bytesPerSec) { + _histXRootDSSIRecvRate->addEntry(bytesPerSec); LOGS(_log, LOG_LVL_TRACE, - "czarstats::addTrmitRecvRate " << bytesPerSec << " " << _histTrmitRecvRate->getString("")); + "CzarStats::" << __func__ << " " << bytesPerSec << " " << _histXRootDSSIRecvRate->getString("")); } void CzarStats::addMergeRate(double bytesPerSec) { _histMergeRate->addEntry(bytesPerSec); LOGS(_log, LOG_LVL_TRACE, - "czarstats::addTrmitRecvRate " << bytesPerSec << " " << _histMergeRate->getString("") << " jsonA=" - << getTransmitStatsJson() << " jsonB=" << getQdispStatsJson()); + "CzarStats::" << __func__ << " " << bytesPerSec << " " << _histMergeRate->getString("") + << " jsonA=" << getTransmitStatsJson() << " jsonB=" << getQdispStatsJson()); +} + +void CzarStats::addFileReadRate(double bytesPerSec) { + _histFileReadRate->addEntry(bytesPerSec); + LOGS(_log, LOG_LVL_TRACE, + "CzarStats::" << __func__ << " " << bytesPerSec << " " << _histFileReadRate->getString("")); } nlohmann::json CzarStats::getQdispStatsJson() const { nlohmann::json js; js["QdispPool"] = _qdispPool->getJson(); - js["queryRespConcurrentSetupCount"] = static_cast(_queryRespConcurrentSetup); - js["queryRespConcurrentWaitCount"] = static_cast(_queryRespConcurrentWait); - js["queryRespConcurrentProcessingCount"] = static_cast(_queryRespConcurrentProcessing); - js["histRespSetup"] = _histRespSetup->getJson(); - js["histRespWait"] = _histRespWait->getJson(); - js["histRespProcessing"] = _histRespProcessing->getJson(); + js["queryRespConcurrentSetupCount"] = _queryRespConcurrentSetup.load(); + js["queryRespConcurrentWaitCount"] = _queryRespConcurrentWait.load(); + js["queryRespConcurrentProcessingCount"] = _queryRespConcurrentProcessing.load(); + js[_histRespSetup->label()] = _histRespSetup->getJson(); + js[_histRespWait->label()] = _histRespWait->getJson(); + js[_histRespProcessing->label()] = _histRespProcessing->getJson(); + js["totalQueries"] = _totalQueries.load(); + js["totalJobs"] = _totalJobs.load(); + js["totalResultFiles"] = _totalResultFiles.load(); + js["totalResultMerges"] = _totalResultMerges.load(); + js["totalBytesRecv"] = _totalBytesRecv.load(); + js["totalRowsRecv"] = _totalRowsRecv.load(); + js["numQueries"] = _numQueries.load(); + js["numJobs"] = _numJobs.load(); + js["numResultFiles"] = _numResultFiles.load(); + js["numResultMerges"] = _numResultMerges.load(); + js["startTimeMs"] = _startTimeMs; + js["snapshotTimeMs"] = util::TimeUtils::now(); return js; } nlohmann::json CzarStats::getTransmitStatsJson() const { nlohmann::json js; - js["TransmitRecvRate"] = _histTrmitRecvRate->getJson(); - js["histMergeRate"] = _histMergeRate->getJson(); + js[_histXRootDSSIRecvRate->label()] = _histXRootDSSIRecvRate->getJson(); + js[_histMergeRate->label()] = _histMergeRate->getJson(); + js[_histFileReadRate->label()] = _histFileReadRate->getJson(); return js; } diff --git a/src/qdisp/CzarStats.h b/src/qdisp/CzarStats.h index b5fe8f7333..63b8b1e010 100644 --- a/src/qdisp/CzarStats.h +++ b/src/qdisp/CzarStats.h @@ -25,7 +25,6 @@ #define LSST_QSERV_QDISP_CZARSTATS_H // System headers -#include #include #include #include @@ -72,12 +71,15 @@ class CzarStats : std::enable_shared_from_this { /// @throws Bug if get() is called before setup() static Ptr get(); - /// Add a bytes per second entry for transmits received - void addTrmitRecvRate(double bytesPerSec); + /// Add a bytes per second entry for query result transmits received over XRootD/SSI + void addXRootDSSIRecvRate(double bytesPerSec); - /// Add a bytes per second entry for merges + /// Add a bytes per second entry for result merges void addMergeRate(double bytesPerSec); + /// Add a bytes per second entry for query results read from files + void addFileReadRate(double bytesPerSec); + /// Increase the count of requests being setup. void startQueryRespConcurrentSetup() { ++_queryRespConcurrentSetup; } /// Decrease the count and add the time taken to the histogram. @@ -88,6 +90,48 @@ class CzarStats : std::enable_shared_from_this { /// Decrease the count and add the time taken to the histogram. void endQueryRespConcurrentWait(TIMEPOINT start, TIMEPOINT end); + /// Increment the total number of queries by 1 + void addQuery() { + ++_totalQueries; + ++_numQueries; + } + + /// Decrement the total number of queries by 1 + void deleteQuery() { --_numQueries; } + + /// Increment the total number of incomplete jobs by 1 + void addJob() { + ++_totalJobs; + ++_numJobs; + } + + /// Decrememnt the total number of incomplete jobs by the specified number + void deleteJobs(uint64_t num = 1) { _numJobs -= num; } + + /// Increment the total number of the operatons with result files by 1 + void addResultFile() { + ++_totalResultFiles; + ++_numResultFiles; + } + + /// Decrement the total number of the operatons with result files by 1 + void deleteResultFile() { --_numResultFiles; } + + /// Increment the total number of the on-going result merges by 1 + void addResultMerge() { + ++_totalResultMerges; + ++_numResultMerges; + } + + /// Decrement the total number of the on-going result merges by 1 + void deleteResultMerge() { --_numResultMerges; } + + /// Increment the total number of bytes received from workers + void addTotalBytesRecv(uint64_t bytes) { _totalBytesRecv += bytes; } + + /// Increment the total number of rows received from workers + void addTotalRowsRecv(uint64_t rows) { _totalRowsRecv += rows; } + /// Increase the count of requests being processed. void startQueryRespConcurrentProcessing() { ++_queryRespConcurrentProcessing; } /// Decrease the count and add the time taken to the histogram. @@ -107,18 +151,38 @@ class CzarStats : std::enable_shared_from_this { /// Connection to get information about the czar's pool of dispatch threads. std::shared_ptr _qdispPool; - /// Histogram for tracking receive rate in bytes per second. - util::HistogramRolling::Ptr _histTrmitRecvRate; + /// The start up time (milliseconds since the UNIX EPOCH) of the status collector. + uint64_t const _startTimeMs = 0; + + /// Histogram for tracking XROOTD/SSI receive rate in bytes per second. + util::HistogramRolling::Ptr _histXRootDSSIRecvRate; /// Histogram for tracking merge rate in bytes per second. util::HistogramRolling::Ptr _histMergeRate; - std::atomic _queryRespConcurrentSetup{0}; ///< Number of request currently being setup - util::HistogramRolling::Ptr _histRespSetup; ///< Histogram for setup time - std::atomic _queryRespConcurrentWait{0}; ///< Number of requests currently waiting - util::HistogramRolling::Ptr _histRespWait; ///< Histogram for wait time - std::atomic _queryRespConcurrentProcessing{0}; ///< Number of requests currently processing - util::HistogramRolling::Ptr _histRespProcessing; ///< Histogram for processing time + /// Histogram for tracking result file read rate in bytes per second. + util::HistogramRolling::Ptr _histFileReadRate; + + std::atomic _queryRespConcurrentSetup{0}; ///< Number of request currently being setup + util::HistogramRolling::Ptr _histRespSetup; ///< Histogram for setup time + std::atomic _queryRespConcurrentWait{0}; ///< Number of requests currently waiting + util::HistogramRolling::Ptr _histRespWait; ///< Histogram for wait time + std::atomic _queryRespConcurrentProcessing{0}; ///< Number of requests currently processing + util::HistogramRolling::Ptr _histRespProcessing; ///< Histogram for processing time + + // Integrated totals (since the start time of Czar) + std::atomic _totalQueries{0}; ///< The total number of queries + std::atomic _totalJobs{0}; ///< The total number of registered jobs across all queries + std::atomic _totalResultFiles{0}; ///< The total number of the result files ever read + std::atomic _totalResultMerges{0}; ///< The total number of the results merges ever attempted + std::atomic _totalBytesRecv{0}; ///< The total number of bytes received from workers + std::atomic _totalRowsRecv{0}; ///< The total number of rows received from workers + + // Running counters + std::atomic _numQueries{0}; ///< The current number of queries being processed + std::atomic _numJobs{0}; ///< The current number of incomplete jobs across all queries + std::atomic _numResultFiles{0}; ///< The current number of the result files being read + std::atomic _numResultMerges{0}; ///< The current number of the results being merged }; } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/Executive.cc b/src/qdisp/Executive.cc index a88cf1cf24..bc2696c8cb 100644 --- a/src/qdisp/Executive.cc +++ b/src/qdisp/Executive.cc @@ -60,6 +60,7 @@ #include "ccontrol/msgCode.h" #include "global/LogContext.h" #include "global/ResourceUnit.h" +#include "qdisp/CzarStats.h" #include "qdisp/JobQuery.h" #include "qdisp/MessageStore.h" #include "qdisp/QueryRequest.h" @@ -108,9 +109,12 @@ Executive::Executive(ExecutiveConfig const& c, shared_ptr const& m _secondsBetweenQMetaUpdates = chrono::seconds(_config.secondsBetweenChunkUpdates); _setup(); _setupLimit(); + qdisp::CzarStats::get()->addQuery(); } Executive::~Executive() { + qdisp::CzarStats::get()->deleteQuery(); + qdisp::CzarStats::get()->deleteJobs(_incompleteJobs.size()); // Real XrdSsiService objects are unowned, but mocks are allocated in _setup. delete dynamic_cast(_xrdSsiService); } @@ -446,6 +450,7 @@ bool Executive::_track(int jobId, shared_ptr const& r) { } _incompleteJobs[jobId] = r; size = _incompleteJobs.size(); + qdisp::CzarStats::get()->addJob(); } LOGS(_log, LOG_LVL_DEBUG, "Success TRACKING size=" << size); return true; @@ -464,6 +469,7 @@ void Executive::_unTrack(int jobId) { untracked = true; incompleteJobs = _incompleteJobs.size(); if (_incompleteJobs.empty()) _allJobsComplete.notify_all(); + qdisp::CzarStats::get()->deleteJobs(1); } auto sz = _incompleteJobs.size(); logSome = (sz < 50) || (sz % 1000 == 0) || !untracked; diff --git a/src/rproc/InfileMerger.cc b/src/rproc/InfileMerger.cc index 9fc02eb669..421bd71bf0 100644 --- a/src/rproc/InfileMerger.cc +++ b/src/rproc/InfileMerger.cc @@ -107,6 +107,25 @@ size_t const MB_SIZE_BYTES = 1024 * 1024; string lastMysqlError(MYSQL* mysql) { return "error: " + string(mysql_error(mysql)) + ", errno: " + to_string(mysql_errno(mysql)); } + +/** + * Instances of this class are used to update statistic counter on starting + * and finishing operations with merging results into the database. + */ +class ResultMergeTracker { +public: + ResultMergeTracker() { lsst::qserv::qdisp::CzarStats::get()->addResultMerge(); } + ~ResultMergeTracker() { lsst::qserv::qdisp::CzarStats::get()->deleteResultMerge(); } +}; + +lsst::qserv::TimeCountTracker::CALLBACKFUNC const reportMergeRate = + [](lsst::qserv::TIMEPOINT start, lsst::qserv::TIMEPOINT end, double bytes, bool success) { + if (!success) return; + if (chrono::duration const seconds = end - start; seconds.count() > 0) { + lsst::qserv::qdisp::CzarStats::get()->addMergeRate(bytes / seconds.count()); + } + }; + } // anonymous namespace namespace lsst::qserv::rproc { @@ -255,11 +274,12 @@ bool InfileMerger::merge(std::shared_ptr const& response) semaLock.reset(new util::SemaLock(*_semaMgrConn)); } - TimeCountTracker::CALLBACKFUNC cbf = [](TIMEPOINT start, TIMEPOINT end, double sum, + TimeCountTracker::CALLBACKFUNC cbf = [](TIMEPOINT start, TIMEPOINT end, double bytes, bool success) { - qdisp::CzarStats::Ptr cStats = qdisp::CzarStats::get(); - std::chrono::duration secs = end - start; - cStats->addTrmitRecvRate(sum / secs.count()); + if (!success) return; + if (std::chrono::duration const seconds = end - start; seconds.count() > 0) { + qdisp::CzarStats::get()->addXRootDSSIRecvRate(bytes / seconds.count()); + } }; auto tct = make_shared>(cbf); @@ -299,33 +319,28 @@ bool InfileMerger::merge(std::shared_ptr const& response) tct->addToValue(resultSize); tct->setSuccess(); tct.reset(); // stop transmit recieve timer before merging happens. + + qdisp::CzarStats::get()->addTotalBytesRecv(resultSize); + qdisp::CzarStats::get()->addTotalRowsRecv(response->result.rowcount()); + // Stop here (if requested) after collecting stats on the amount of data collected // from workers. if (_config.debugNoMerge) { return true; } - TimeCountTracker::CALLBACKFUNC cbfMerge = [](TIMEPOINT start, TIMEPOINT end, double sum, - bool success) { - qdisp::CzarStats::Ptr cStats = qdisp::CzarStats::get(); - std::chrono::duration secs = end - start; - cStats->addMergeRate(sum / secs.count()); - }; - TimeCountTracker tctMerge(cbfMerge); - auto start = std::chrono::system_clock::now(); switch (_dbEngine) { case MYISAM: - ret = _applyMysqlMyIsam(infileStatement); + ret = _applyMysqlMyIsam(infileStatement, resultSize); break; case INNODB: // Fallthrough case MEMORY: - ret = _applyMysqlInnoDb(infileStatement); + ret = _applyMysqlInnoDb(infileStatement, resultSize); break; default: throw std::invalid_argument("InfileMerger::_dbEngine is unknown =" + engineToStr(_dbEngine)); } - tctMerge.addToValue(resultSize); auto end = std::chrono::system_clock::now(); auto mergeDur = std::chrono::duration_cast(end - start); LOGS(_log, LOG_LVL_DEBUG, @@ -333,8 +348,6 @@ bool InfileMerger::merge(std::shared_ptr const& response) << " used=" << _semaMgrConn->getUsedCount() << ")"); if (not ret) { LOGS(_log, LOG_LVL_ERROR, "InfileMerger::merge mysql applyMysql failure"); - } else { - tctMerge.setSuccess(); } _invalidJobAttemptMgr.decrConcurrentMergeCount(); @@ -343,7 +356,7 @@ bool InfileMerger::merge(std::shared_ptr const& response) return ret; } -bool InfileMerger::_applyMysqlMyIsam(std::string const& query) { +bool InfileMerger::_applyMysqlMyIsam(std::string const& query, size_t resultSize) { std::unique_lock lock(_mysqlMutex); for (int j = 0; !_mysqlConn.connected(); ++j) { // should have connected during construction @@ -361,14 +374,25 @@ bool InfileMerger::_applyMysqlMyIsam(std::string const& query) { } } + // Track the operation while the control flow is staying within the function. + ::ResultMergeTracker const resultMergeTracker; + + // This starts a timer of the result merge rate tracker. The tracker will report + // the counter (if set) upon leaving the method. + lsst::qserv::TimeCountTracker mergeRateTracker(::reportMergeRate); + int rc = mysql_real_query(_mysqlConn.getMySql(), query.data(), query.size()); - if (rc == 0) return true; + if (rc == 0) { + mergeRateTracker.addToValue(resultSize); + mergeRateTracker.setSuccess(); + return true; + } LOGS(_log, LOG_LVL_ERROR, "InfileMerger::_applyMysqlMyIsam mysql_real_query() " + ::lastMysqlError(_mysqlConn.getMySql())); return false; } -bool InfileMerger::_applyMysqlInnoDb(std::string const& query) { +bool InfileMerger::_applyMysqlInnoDb(std::string const& query, size_t resultSize) { mysql::MySqlConnection mySConn(_config.mySqlConfig); if (!mySConn.connected()) { if (!_setupConnectionInnoDb(mySConn)) { @@ -377,8 +401,19 @@ bool InfileMerger::_applyMysqlInnoDb(std::string const& query) { } } + // Track the operation while the control flow is staying within the function. + ::ResultMergeTracker const resultMergeTracker; + + // This starts a timer of the result merge rate tracker. The tracker will report + // the counter (if set) upon leaving the method. + lsst::qserv::TimeCountTracker mergeRateTracker(::reportMergeRate); + int rc = mysql_real_query(mySConn.getMySql(), query.data(), query.size()); - if (rc == 0) return true; + if (rc == 0) { + mergeRateTracker.addToValue(resultSize); + mergeRateTracker.setSuccess(); + return true; + } LOGS(_log, LOG_LVL_ERROR, "InfileMerger::_applyMysqlInnoDb mysql_real_query() " + ::lastMysqlError(mySConn.getMySql())); return false; diff --git a/src/rproc/InfileMerger.h b/src/rproc/InfileMerger.h index 77ab5b7079..bde53e482d 100644 --- a/src/rproc/InfileMerger.h +++ b/src/rproc/InfileMerger.h @@ -232,8 +232,8 @@ class InfileMerger { size_t getTotalResultSize() const; private: - bool _applyMysqlMyIsam(std::string const& query); - bool _applyMysqlInnoDb(std::string const& query); + bool _applyMysqlMyIsam(std::string const& query, size_t resultSize); + bool _applyMysqlInnoDb(std::string const& query, size_t resultSize); bool _merge(std::shared_ptr& response); void _setupRow(); bool _applySql(std::string const& sql); diff --git a/src/util/Histogram.h b/src/util/Histogram.h index 2e53cae6c6..8eaa2ab6ab 100644 --- a/src/util/Histogram.h +++ b/src/util/Histogram.h @@ -89,6 +89,8 @@ class Histogram { double const _maxVal; }; + std::string const& label() const { return _label; } + double getAvg() const; ///< Return the average value of all current entries. double getTotal() const; ///< Return the total value of all entries. From 102aecf90fc699a388bfae860faf3a5bda47176f Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 30 Aug 2023 01:02:16 +0000 Subject: [PATCH 07/15] Instrumented Qserv Czar to capture query progess history The history is recorded in the transient monitoring store (class qserv::CzarStats) from where it could be sampled at run time via the Qserv mysql-proxy service. --- src/cconfig/CzarConfig.cc | 4 +- src/cconfig/CzarConfig.h | 16 ++++ src/ccontrol/UserQueryFactory.cc | 23 +++++- src/ccontrol/UserQueryFactory.h | 14 ++++ src/qdisp/CzarStats.cc | 128 ++++++++++++++++++++++++------- src/qdisp/CzarStats.h | 70 ++++++++++++++++- src/qdisp/Executive.cc | 49 ++++++++++-- src/qdisp/Executive.h | 29 +++++-- src/qdisp/testQDisp.cc | 6 +- 9 files changed, 296 insertions(+), 43 deletions(-) diff --git a/src/cconfig/CzarConfig.cc b/src/cconfig/CzarConfig.cc index 03f77ee0d4..9590e7a577 100644 --- a/src/cconfig/CzarConfig.cc +++ b/src/cconfig/CzarConfig.cc @@ -111,7 +111,9 @@ CzarConfig::CzarConfig(util::ConfigStore const& configStore) _qdispVectMinRunningSizes(configStore.get("qdisppool.vectMinRunningSizes", "0:1:3:3")), _qReqPseudoFifoMaxRunning(configStore.getInt("qdisppool.qReqPseudoFifoMaxRunning", 300)), _notifyWorkersOnQueryFinish(configStore.getInt("tuning.notifyWorkersOnQueryFinish", 1)), - _notifyWorkersOnCzarRestart(configStore.getInt("tuning.notifyWorkersOnCzarRestart", 1)) {} + _notifyWorkersOnCzarRestart(configStore.getInt("tuning.notifyWorkersOnCzarRestart", 1)), + _czarStatsUpdateIvalSec(configStore.getInt("tuning.czarStatsUpdateIvalSec", 1)), + _czarStatsRetainPeriodSec(configStore.getInt("tuning.czarStatsRetainPeriodSec", 24 * 3600)) {} std::ostream& operator<<(std::ostream& out, CzarConfig const& czarConfig) { out << "[cssConfigMap=" << util::printable(czarConfig._cssConfigMap) diff --git a/src/cconfig/CzarConfig.h b/src/cconfig/CzarConfig.h index 4a6280f015..9b5be03fe8 100644 --- a/src/cconfig/CzarConfig.h +++ b/src/cconfig/CzarConfig.h @@ -194,6 +194,18 @@ class CzarConfig { /// and the newer queries. bool notifyWorkersOnCzarRestart() const { return _notifyWorkersOnCzarRestart != 0; } + /// @return The desired sampling frequency of the Czar monitoring which is + /// based on tracking state changes in various entities. If 0 is returned by + /// the method then the monitoring will be disabled. + unsigned int czarStatsUpdateIvalSec() const { return _czarStatsUpdateIvalSec; } + + /// @return The maximum retain period for keeping in memory the relevant metrics + /// captured by the Czar monitoring system. If 0 is returned by the method then + /// query history archiving will be disabled. + /// @note Setting the limit too high may be potentially result in runing onto + /// the OOM situation. + unsigned int czarStatsRetainPeriodSec() const { return _czarStatsRetainPeriodSec; } + private: CzarConfig(util::ConfigStore const& ConfigStore); @@ -240,6 +252,10 @@ class CzarConfig { // Events sent to workers int const _notifyWorkersOnQueryFinish; ///< Sent by cccontrol::UserQuerySelect int const _notifyWorkersOnCzarRestart; ///< Sent by czar::Czar + + // Parameters used for monitoring Czar + unsigned int const _czarStatsUpdateIvalSec; ///< Used by qdisp::Executive + unsigned int const _czarStatsRetainPeriodSec; ///< Used by qdisp::CzarStats }; } // namespace lsst::qserv::cconfig diff --git a/src/ccontrol/UserQueryFactory.cc b/src/ccontrol/UserQueryFactory.cc index 2838baddc6..ba650c8399 100644 --- a/src/ccontrol/UserQueryFactory.cc +++ b/src/ccontrol/UserQueryFactory.cc @@ -193,6 +193,24 @@ UserQueryFactory::UserQueryFactory(qproc::DatabaseModels::Ptr const& dbModels, s // Add logging context with czar ID qmeta::CzarId qMetaCzarId = _userQuerySharedResources->qMetaCzarId; LOG_MDC_INIT([qMetaCzarId]() { LOG_MDC("CZID", std::to_string(qMetaCzarId)); }); + + // BOOST ASIO service is started to process asynchronous timer requests + // in the dedicated thread. However, before starting the thread we need + // to attach the ASIO's "work" object to the ASIO I/O service. This is needed + // to keep the latter busy and prevent the servicing thread from exiting before + // the destruction of this class due to a lack of async requests. + _asioWork.reset(new boost::asio::io_service::work(_asioIoService)); + + // Start the timer servicing thread + _asioTimerThread.reset(new std::thread([&]() { _asioIoService.run(); })); +} + +UserQueryFactory::~UserQueryFactory() { + // Shut down all ongoing (if any) operations on the I/O service + // to unblock the servicing thread. + _asioWork.reset(); + _asioIoService.stop(); + _asioTimerThread->join(); } UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::string const& defaultDb, @@ -288,8 +306,9 @@ UserQuery::Ptr UserQueryFactory::newUserQuery(std::string const& aQuery, std::st std::shared_ptr executive; std::shared_ptr infileMergerConfig; if (sessionValid) { - executive = qdisp::Executive::create(*_executiveConfig, messageStore, qdispSharedResources, - _userQuerySharedResources->queryStatsData, qs); + executive = + qdisp::Executive::create(*_executiveConfig, messageStore, qdispSharedResources, + _userQuerySharedResources->queryStatsData, qs, _asioIoService); infileMergerConfig = std::make_shared(_userQuerySharedResources->mysqlResultConfig); infileMergerConfig->debugNoMerge = _debugNoMerge; diff --git a/src/ccontrol/UserQueryFactory.h b/src/ccontrol/UserQueryFactory.h index b813544fa4..35d4819ef3 100644 --- a/src/ccontrol/UserQueryFactory.h +++ b/src/ccontrol/UserQueryFactory.h @@ -34,8 +34,10 @@ // System headers #include #include +#include // Third-party headers +#include "boost/asio.hpp" #include "boost/utility.hpp" // Local headers @@ -69,6 +71,10 @@ class UserQueryFactory : private boost::noncopyable { public: UserQueryFactory(std::shared_ptr const& dbModels, std::string const& czarName); + /// Non-trivial destructor is needed to stop the BOOST ASIO I/O service + /// and join with the timer servicing thread. + ~UserQueryFactory(); + /// @param query: Query text /// @param defaultDb: Default database name, may be empty /// @param qdispPool: Thread pool handling qdisp jobs. @@ -85,6 +91,14 @@ class UserQueryFactory : private boost::noncopyable { std::shared_ptr _executiveConfig; bool _useQservRowCounterOptimization; bool _debugNoMerge = false; + // BOOST ASIO service is started to process asynchronous timer requests + // in the dedicated thread. The thread is started by the c-tor of the class. + // The "work" object _asioWork is attached to the ASIO I/O service in order to + // keep the latter busy and prevent the servicing thread from exiting before + // the destruction of this class. + boost::asio::io_service _asioIoService; + std::unique_ptr _asioWork; + std::unique_ptr _asioTimerThread; }; } // namespace lsst::qserv::ccontrol diff --git a/src/qdisp/CzarStats.cc b/src/qdisp/CzarStats.cc index bc0760a244..b1d534ac0b 100644 --- a/src/qdisp/CzarStats.cc +++ b/src/qdisp/CzarStats.cc @@ -28,6 +28,7 @@ #include // Qserv headers +#include "cconfig/CzarConfig.h" #include "qdisp/QdispPool.h" #include "util/Bug.h" #include "util/TimeUtils.h" @@ -120,36 +121,111 @@ void CzarStats::addFileReadRate(double bytesPerSec) { "CzarStats::" << __func__ << " " << bytesPerSec << " " << _histFileReadRate->getString("")); } +void CzarStats::trackQueryProgress(QueryId qid) { + if (qid == 0) return; + uint64_t const currentTimestampMs = util::TimeUtils::now(); + std::lock_guard const lock(_queryProgressMtx); + if (auto itr = _queryNumIncompleteJobs.find(qid); itr != _queryNumIncompleteJobs.end()) return; + _queryNumIncompleteJobs[qid].emplace_back(currentTimestampMs, 0); +} + +void CzarStats::updateQueryProgress(QueryId qid, int numUnfinishedJobs) { + if (qid == 0) return; + uint64_t const currentTimestampMs = util::TimeUtils::now(); + std::lock_guard const lock(_queryProgressMtx); + if (auto itr = _queryNumIncompleteJobs.find(qid); itr != _queryNumIncompleteJobs.end()) { + auto&& history = itr->second; + if (history.empty() || (history.back().numJobs != numUnfinishedJobs)) { + history.emplace_back(currentTimestampMs, numUnfinishedJobs); + } + } else { + _queryNumIncompleteJobs[qid].emplace_back(currentTimestampMs, numUnfinishedJobs); + } +} + +void CzarStats::untrackQueryProgress(QueryId qid) { + if (qid == 0) return; + unsigned int const lastSeconds = cconfig::CzarConfig::instance()->czarStatsRetainPeriodSec(); + uint64_t const minTimestampMs = util::TimeUtils::now() - 1000 * lastSeconds; + std::lock_guard const lock(_queryProgressMtx); + if (lastSeconds == 0) { + // The query gets removed instantaniously if archiving is not enabled. + if (auto itr = _queryNumIncompleteJobs.find(qid); itr != _queryNumIncompleteJobs.end()) { + _queryNumIncompleteJobs.erase(qid); + } + } else { + // Erase queries with the last recorded timestamp that's older + // than the specified cut-off time. + for (auto&& [qid, history] : _queryNumIncompleteJobs) { + if (history.empty()) continue; + if (history.back().timestampMs < minTimestampMs) _queryNumIncompleteJobs.erase(qid); + } + } +} + +CzarStats::QueryProgress CzarStats::getQueryProgress(QueryId qid, unsigned int lastSeconds) const { + uint64_t const minTimestampMs = util::TimeUtils::now() - 1000 * lastSeconds; + std::lock_guard const lock(_queryProgressMtx); + QueryProgress result; + if (qid == 0) { + if (lastSeconds == 0) { + // Full histories of all registered queries + result = _queryNumIncompleteJobs; + } else { + // Age restricted histories of all registered queries + for (auto&& [qid, history] : _queryNumIncompleteJobs) { + for (auto&& point : history) { + if (point.timestampMs >= minTimestampMs) result[qid].push_back(point); + } + } + } + } else { + if (auto itr = _queryNumIncompleteJobs.find(qid); itr != _queryNumIncompleteJobs.end()) { + auto&& history = itr->second; + if (lastSeconds == 0) { + // Full history of the specified query + result[qid] = history; + } else { + // Age restricted history of the specified query + for (auto&& point : history) { + if (point.timestampMs >= minTimestampMs) result[qid].push_back(point); + } + } + } + } + return result; +} + nlohmann::json CzarStats::getQdispStatsJson() const { - nlohmann::json js; - js["QdispPool"] = _qdispPool->getJson(); - js["queryRespConcurrentSetupCount"] = _queryRespConcurrentSetup.load(); - js["queryRespConcurrentWaitCount"] = _queryRespConcurrentWait.load(); - js["queryRespConcurrentProcessingCount"] = _queryRespConcurrentProcessing.load(); - js[_histRespSetup->label()] = _histRespSetup->getJson(); - js[_histRespWait->label()] = _histRespWait->getJson(); - js[_histRespProcessing->label()] = _histRespProcessing->getJson(); - js["totalQueries"] = _totalQueries.load(); - js["totalJobs"] = _totalJobs.load(); - js["totalResultFiles"] = _totalResultFiles.load(); - js["totalResultMerges"] = _totalResultMerges.load(); - js["totalBytesRecv"] = _totalBytesRecv.load(); - js["totalRowsRecv"] = _totalRowsRecv.load(); - js["numQueries"] = _numQueries.load(); - js["numJobs"] = _numJobs.load(); - js["numResultFiles"] = _numResultFiles.load(); - js["numResultMerges"] = _numResultMerges.load(); - js["startTimeMs"] = _startTimeMs; - js["snapshotTimeMs"] = util::TimeUtils::now(); - return js; + nlohmann::json result; + result["QdispPool"] = _qdispPool->getJson(); + result["queryRespConcurrentSetupCount"] = _queryRespConcurrentSetup.load(); + result["queryRespConcurrentWaitCount"] = _queryRespConcurrentWait.load(); + result["queryRespConcurrentProcessingCount"] = _queryRespConcurrentProcessing.load(); + result[_histRespSetup->label()] = _histRespSetup->getJson(); + result[_histRespWait->label()] = _histRespWait->getJson(); + result[_histRespProcessing->label()] = _histRespProcessing->getJson(); + result["totalQueries"] = _totalQueries.load(); + result["totalJobs"] = _totalJobs.load(); + result["totalResultFiles"] = _totalResultFiles.load(); + result["totalResultMerges"] = _totalResultMerges.load(); + result["totalBytesRecv"] = _totalBytesRecv.load(); + result["totalRowsRecv"] = _totalRowsRecv.load(); + result["numQueries"] = _numQueries.load(); + result["numJobs"] = _numJobs.load(); + result["numResultFiles"] = _numResultFiles.load(); + result["numResultMerges"] = _numResultMerges.load(); + result["startTimeMs"] = _startTimeMs; + result["snapshotTimeMs"] = util::TimeUtils::now(); + return result; } nlohmann::json CzarStats::getTransmitStatsJson() const { - nlohmann::json js; - js[_histXRootDSSIRecvRate->label()] = _histXRootDSSIRecvRate->getJson(); - js[_histMergeRate->label()] = _histMergeRate->getJson(); - js[_histFileReadRate->label()] = _histFileReadRate->getJson(); - return js; + nlohmann::json result; + result[_histXRootDSSIRecvRate->label()] = _histXRootDSSIRecvRate->getJson(); + result[_histMergeRate->label()] = _histMergeRate->getJson(); + result[_histFileReadRate->label()] = _histFileReadRate->getJson(); + return result; } } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/CzarStats.h b/src/qdisp/CzarStats.h index 63b8b1e010..6a2c10ef2c 100644 --- a/src/qdisp/CzarStats.h +++ b/src/qdisp/CzarStats.h @@ -27,14 +27,17 @@ // System headers #include #include +#include #include #include #include #include #include #include +#include -// qserv headers +// Qserv headers +#include "global/intTypes.h" #include "util/Histogram.h" #include "util/Mutex.h" @@ -57,6 +60,17 @@ class CzarStats : std::enable_shared_from_this { public: using Ptr = std::shared_ptr; + class HistoryPoint { + public: + HistoryPoint(uint64_t timestampMs_ = 0, int numJobs_ = 0) + : timestampMs(timestampMs_), numJobs(numJobs_) {} + HistoryPoint(HistoryPoint const&) = default; + HistoryPoint& operator=(HistoryPoint const&) = default; + uint64_t timestampMs = 0; + int numJobs = 0; + }; + using QueryProgress = std::unordered_map>; + CzarStats() = delete; CzarStats(CzarStats const&) = delete; CzarStats& operator=(CzarStats const&) = delete; @@ -134,9 +148,57 @@ class CzarStats : std::enable_shared_from_this { /// Increase the count of requests being processed. void startQueryRespConcurrentProcessing() { ++_queryRespConcurrentProcessing; } + /// Decrease the count and add the time taken to the histogram. void endQueryRespConcurrentProcessing(TIMEPOINT start, TIMEPOINT end); + /** + * Begin tracking the specified query. + * @note The method won't do anything if the identifier is set to 0. + * @param qid The unique identifier of a query affected by the operation. + */ + void trackQueryProgress(QueryId qid); + + /** + * Update the query counter(s). + * @note The method won't do anything if the identifier is set to 0. + * The method will only record changes in the counter of jobs if + * the provided number differs from the previously recorded value. + * @param qid The unique identifier of a query affected by the operation. + * @param numUnfinishedJobs The number of unfinished jobs. + */ + void updateQueryProgress(QueryId qid, int numUnfinishedJobs); + + /** + * Finish tracking the specified query or "garbage" collect older + * entries in the collection. + * @note The method won't do anything if the identifier is set to 0. + * The behaviour of the method depends on a value of the configuraton + * parameter cconfig::CzarConfig::czarStatsRetainPeriodSec() that governs + * the query history archiving in memory. If archiving is not enabled then + * the specified query gets instantaniously removed from the collection. + * Otherwise (if the archiving is enabled) the age of each registered + * (being "tracked") query gets evaluated at each call of this method and + * queries that are found outdated (based on teh age of the last recorded + * event of a query) would be removed from the collection. + * @param qid The unique identifier of a query affected by the operation. + * @see cconfig::CzarConfig::czarStatsRetainPeriodSec() + */ + void untrackQueryProgress(QueryId qid); + + /** + * Get info on a progress of the registered queries. + * @param qid The optional unique identifier of a query. + * If 0 is specified as a value of the parameter then all queries will + * be evaluated (given the age restrictin mentioned in the parameter + * lastSeconds) + * @param lastSeconds The optional age of the entries to be reported. + * The "age" is interpreted as "-lastSeconds" from a value of the current + * time when the method gets called. If 0 is specified as a value of + * the parameter then all entries of the select queries will be reported. + */ + QueryProgress getQueryProgress(QueryId qid = 0, unsigned int lastSeconds = 0) const; + /// Get a json object describing the current state of the query dispatch thread pool. nlohmann::json getQdispStatsJson() const; @@ -145,6 +207,7 @@ class CzarStats : std::enable_shared_from_this { private: CzarStats(std::shared_ptr const& qdispPool); + static Ptr _globalCzarStats; ///< Pointer to the global instance. static util::Mutex _globalMtx; ///< Protects `_globalCzarStats` @@ -183,6 +246,11 @@ class CzarStats : std::enable_shared_from_this { std::atomic _numJobs{0}; ///< The current number of incomplete jobs across all queries std::atomic _numResultFiles{0}; ///< The current number of the result files being read std::atomic _numResultMerges{0}; ///< The current number of the results being merged + + // Query progress stats are recorded along with timestamps when changes + // in previously captured counters are detected. + mutable util::Mutex _queryProgressMtx; ///< Protects _queryNumIncompleteJobs + QueryProgress _queryNumIncompleteJobs; }; } // namespace lsst::qserv::qdisp diff --git a/src/qdisp/Executive.cc b/src/qdisp/Executive.cc index bc2696c8cb..1cfdc26add 100644 --- a/src/qdisp/Executive.cc +++ b/src/qdisp/Executive.cc @@ -57,6 +57,7 @@ #include "lsst/log/Log.h" // Qserv headers +#include "cconfig/CzarConfig.h" #include "ccontrol/msgCode.h" #include "global/LogContext.h" #include "global/ResourceUnit.h" @@ -71,6 +72,7 @@ #include "qmeta/Exceptions.h" #include "qmeta/QStatus.h" #include "query/SelectStmt.h" +#include "util/AsyncTimer.h" #include "util/Bug.h" #include "util/EventThread.h" @@ -117,20 +119,57 @@ Executive::~Executive() { qdisp::CzarStats::get()->deleteJobs(_incompleteJobs.size()); // Real XrdSsiService objects are unowned, but mocks are allocated in _setup. delete dynamic_cast(_xrdSsiService); + if (_asyncTimer != nullptr) { + _asyncTimer->cancel(); + qdisp::CzarStats::get()->untrackQueryProgress(_id); + } } Executive::Ptr Executive::create(ExecutiveConfig const& c, shared_ptr const& ms, SharedResources::Ptr const& sharedResources, shared_ptr const& qMeta, - shared_ptr const& querySession) { - Executive::Ptr exec(new Executive(c, ms, sharedResources, qMeta, - querySession)); // make_shared dislikes private constructor. - return exec; + shared_ptr const& querySession, + boost::asio::io_service& asioIoService) { + LOGS(_log, LOG_LVL_DEBUG, "Executive::" << __func__); + Executive::Ptr ptr(new Executive(c, ms, sharedResources, qMeta, querySession)); + + // Start the query progress monitoring timer (if enabled). The query status + // will be sampled on each expiration event of the timer. Note that the timer + // gets restarted automatically for as long as the context (the current + // Executive object) still exists. + // + // IMPORTANT: The weak pointer dependency (unlike the regular shared pointer) + // is required here to allow destroying the Executive object without explicitly + // stopping the timer. + auto const czarStatsUpdateIvalSec = cconfig::CzarConfig::instance()->czarStatsUpdateIvalSec(); + if (czarStatsUpdateIvalSec > 0) { + ptr->_asyncTimer = util::AsyncTimer::create( + asioIoService, std::chrono::milliseconds(czarStatsUpdateIvalSec * 1000), + [self = std::weak_ptr(ptr)](auto expirationIvalMs) -> bool { + auto ptr = self.lock(); + LOGS(_log, LOG_LVL_DEBUG, + "Executive::" << __func__ << " expirationIvalMs: " << expirationIvalMs.count() + << " ms"); + if (ptr != nullptr) { + ptr->_updateStats(); + return true; + } + return false; + }); + ptr->_asyncTimer->start(); + } + return ptr; +} + +void Executive::_updateStats() const { + LOGS(_log, LOG_LVL_DEBUG, "Executive::" << __func__); + qdisp::CzarStats::get()->updateQueryProgress(_id, getNumInflight()); } void Executive::setQueryId(QueryId id) { _id = id; _idStr = QueryIdHelper::makeIdStr(_id); + qdisp::CzarStats::get()->trackQueryProgress(_id); } /// Add a new job to executive queue, if not already in. Not thread-safe. @@ -383,7 +422,7 @@ void Executive::_squashSuperfluous() { LOGS(_log, LOG_LVL_DEBUG, "Executive::squashSuperfluous done"); } -int Executive::getNumInflight() { +int Executive::getNumInflight() const { unique_lock lock(_incompleteJobsMutex); return _incompleteJobs.size(); } diff --git a/src/qdisp/Executive.h b/src/qdisp/Executive.h index 136967c298..09cea8bb0f 100644 --- a/src/qdisp/Executive.h +++ b/src/qdisp/Executive.h @@ -33,6 +33,9 @@ #include #include +// Third-party headers +#include "boost/asio.hpp" + // Qserv headers #include "global/intTypes.h" #include "global/ResourceUnit.h" @@ -62,10 +65,16 @@ class QuerySession; } namespace qdisp { - class JobQuery; class MessageStore; class PseudoFifo; +} // namespace qdisp + +namespace util { +class AsyncTimer; +} + +namespace qdisp { struct ExecutiveConfig { typedef std::shared_ptr Ptr; @@ -91,7 +100,8 @@ class Executive : public std::enable_shared_from_this { static Executive::Ptr create(ExecutiveConfig const& c, std::shared_ptr const& ms, SharedResources::Ptr const& sharedResources, std::shared_ptr const& qMeta, - std::shared_ptr const& querySession); + std::shared_ptr const& querySession, + boost::asio::io_service& asioIoService); ~Executive(); @@ -123,8 +133,8 @@ class Executive : public std::enable_shared_from_this { void setScanInteractive(bool interactive) { _scanInteractive = interactive; } - /// @return number of items in flight. - int getNumInflight(); // non-const, requires a mutex. + /// @return number of jobs in flight. + int getNumInflight() const; /// @return a description of the current execution progress. std::string getProgressDesc() const; @@ -183,6 +193,10 @@ class Executive : public std::enable_shared_from_this { // for debugging void _printState(std::ostream& os); + /// The method performs the non-blocking sampling of the query monitoring stats. + /// The stats are pushed to qdisp::CzarStats. + void _updateStats() const; + ExecutiveConfig _config; ///< Personal copy of config std::atomic _empty{true}; std::shared_ptr _messageStore; ///< MessageStore for logging @@ -209,9 +223,9 @@ class Executive : public std::enable_shared_from_this { util::Flag _cancelled{false}; ///< Has execution been cancelled. // Mutexes - std::mutex _incompleteJobsMutex; ///< protect incompleteJobs map. + mutable std::mutex _incompleteJobsMutex; ///< protect incompleteJobs map. - /** Used to record execution errors */ + /// Used to record execution errors mutable std::mutex _errorsMutex; std::condition_variable _allJobsComplete; @@ -236,7 +250,8 @@ class Executive : public std::enable_shared_from_this { std::atomic _totalResultRows{0}; std::weak_ptr _querySession; - int64_t _limit = 0; ///< Limit to number of rows to return. 0 means no limit. + std::shared_ptr _asyncTimer; ///< for non-blocking updates of stats + int64_t _limit = 0; ///< Limit to number of rows to return. 0 means no limit. /// true if query can be returned as soon as _limit rows have been read. bool _limitSquashApplies = false; diff --git a/src/qdisp/testQDisp.cc b/src/qdisp/testQDisp.cc index d5933d372c..3905c43266 100644 --- a/src/qdisp/testQDisp.cc +++ b/src/qdisp/testQDisp.cc @@ -25,6 +25,9 @@ #include #include +// Third-party headers +#include "boost/asio.hpp" + // Boost unit test header #define BOOST_TEST_MODULE Qdisp_1 #include @@ -171,6 +174,7 @@ class SetupTest { qdisp::SharedResources::Ptr sharedResources; qdisp::Executive::Ptr ex; std::shared_ptr jqTest; // used only when needed + boost::asio::io_service asioIoService; SetupTest(const char* request) { qrMsg = request; @@ -183,7 +187,7 @@ class SetupTest { sharedResources = qdisp::SharedResources::create(qdispPool, pseudoFifo); std::shared_ptr qStatus; // No updating QStatus, nullptr - ex = qdisp::Executive::create(*conf, ms, sharedResources, qStatus, nullptr); + ex = qdisp::Executive::create(*conf, ms, sharedResources, qStatus, nullptr, asioIoService); } ~SetupTest() {} }; From d3a821f3c0268715cb7689a353c3674795cf5a2c Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Wed, 16 Aug 2023 01:55:57 +0000 Subject: [PATCH 08/15] Processing Czar monitoring requests via MySQL proxy The requests are made by calling a special stored procedure that is recognized by Qserv. The procedure allows a single parameter (a command) that is interpreted by Czar and processed accordingly. --- src/ccontrol/UserQueryQservManager.cc | 140 ++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 17 deletions(-) diff --git a/src/ccontrol/UserQueryQservManager.cc b/src/ccontrol/UserQueryQservManager.cc index 0866f63fbc..6a3861112e 100644 --- a/src/ccontrol/UserQueryQservManager.cc +++ b/src/ccontrol/UserQueryQservManager.cc @@ -24,13 +24,25 @@ // Class header #include "ccontrol/UserQueryQservManager.h" +// System headers +#include +#include + +// Third party headers +#include + // LSST headers #include "lsst/log/Log.h" // Qserv headers +#include "qdisp/CzarStats.h" #include "qdisp/MessageStore.h" #include "sql/SqlBulkInsert.h" #include "sql/SqlConnection.h" +#include "util/StringHelper.h" + +using namespace std; +using json = nlohmann::json; namespace { @@ -40,40 +52,134 @@ LOG_LOGGER _log = LOG_GET("lsst.qserv.ccontrol.UserQueryQservManager"); namespace lsst::qserv::ccontrol { -UserQueryQservManager::UserQueryQservManager(std::shared_ptr const& queryResources, - std::string const& value) +UserQueryQservManager::UserQueryQservManager(shared_ptr const& queryResources, + string const& value) : _value(value), _resultTableName("qserv_manager_" + queryResources->userQueryId), - _messageStore(std::make_shared()), + _messageStore(make_shared()), _resultDbConn(queryResources->resultDbConn), _resultDb(queryResources->resultDb) {} void UserQueryQservManager::submit() { - // create result table, one could use formCreateTable() method - // to build statement but it does not set NULL flag on TIMESTAMP columns - std::string createTable = "CREATE TABLE " + _resultTableName + - "(response BLOB)"; // The columns must match resColumns, below. + LOGS(_log, LOG_LVL_TRACE, "processing command: " << _value); + + // Remove quotes around a value of the input parameter. Also parse the command. + // Some commands may have optional parameters. + // Note that (single or double) quotes are required by SQL when calling + // the stored procedure. The quotes are preserved AS-IS by the Qserv query parser. + string command; + vector params; + if (_value.size() > 2) { + string const space = " "; + string const quotesRemoved = _value.substr(1, _value.size() - 2); + for (auto&& str : util::StringHelper::splitString(quotesRemoved, space)) { + // This is just in case if the splitter won't recognise consequtive spaces. + if (str.empty() || (str == space)) continue; + if (command.empty()) { + command = str; + } else { + params.push_back(str); + } + } + } + + // Create the table as per the command. + string createTable; + vector resColumns; // This must match the schema in the CREATE TABLE statement. + if (command == "query_proc_stats") { + createTable = "CREATE TABLE " + _resultTableName + "(`stats` BLOB)"; + resColumns.push_back("stats"); + } else if (command == "query_info") { + createTable = "CREATE TABLE " + _resultTableName + + "(`queryId` BIGINT NOT NULL, `timestamp_ms` BIGINT NOT NULL, `num_jobs` INT NOT NULL)"; + resColumns.push_back("queryId"); + resColumns.push_back("timestamp_ms"); + resColumns.push_back("num_jobs"); + } else { + createTable = "CREATE TABLE " + _resultTableName + "(`result` BLOB)"; + resColumns.push_back("result"); + } LOGS(_log, LOG_LVL_TRACE, "creating result table: " << createTable); sql::SqlErrorObject errObj; if (!_resultDbConn->runQuery(createTable, errObj)) { LOGS(_log, LOG_LVL_ERROR, "failed to create result table: " << errObj.errMsg()); - std::string message = "Internal failure, failed to create result table: " + errObj.errMsg(); + string const message = "Internal failure, failed to create result table: " + errObj.errMsg(); _messageStore->addMessage(-1, "SQL", 1051, message, MessageSeverity::MSG_ERROR); _qState = ERROR; return; } - // For now just insert the parsed argument to QSERV_MANAGER into the result table. + // Prepare data for the command. + // note that the output string(s) should be quoted. + auto const stats = qdisp::CzarStats::get(); + list> rows; + if (command == "query_proc_stats") { + json const result = json::object({{"qdisp_stats", stats->getQdispStatsJson()}, + {"transmit_stats", stats->getTransmitStatsJson()}}); + vector row = {"'" + result.dump() + "'"}; + rows.push_back(move(row)); + } else if (command == "query_info") { + // The optonal query identifier and the number of the last seconds in a history + // of queries may be provided to narrow a scope of the operation: + // + // query_info + // query_info + // query_info + // + // Where any value may be set to 0 to indicate the default behavior. Any extra + // parameters will be ignored. + // + QueryId selectQueryId = 0; // any query + unsigned int lastSeconds = 0; // any timestamps + try { + if (params.size() > 0) selectQueryId = stoull(params[0]); + if (params.size() > 1) lastSeconds = stoul(params[1]); + } catch (exception const& ex) { + string const message = + "failed to parse values of parameter from " + _value + ", ex: " + string(ex.what()); + LOGS(_log, LOG_LVL_ERROR, message); + _messageStore->addMessage(-1, "SQL", 1051, message, MessageSeverity::MSG_ERROR); + _qState = ERROR; + return; + } - std::vector resColumns( - {"response"}); // this must match the schema in the CREATE TABLE statement above. + // The original order of timestams within queries will be preserved as if + // the following query was issued: + // + // SELECT + // `queryId`, + // `timestamp_ms`, + // `num_jobs` + // FROM + // `table` + // ORDER BY + // `queryId`, + // `timestamp_ms` ASC + // + for (auto&& [queryId, history] : stats->getQueryProgress(selectQueryId, lastSeconds)) { + string const queryIdStr = to_string(queryId); + for (auto&& point : history) { + vector row = {queryIdStr, to_string(point.timestampMs), to_string(point.numJobs)}; + rows.push_back(move(row)); + } + } + } else { + // Return a value of the original command (which includeds quotes). + vector row = {_value}; + rows.push_back(move(row)); + } + + // Ingest row(s) into the table. + bool success = true; sql::SqlBulkInsert bulkInsert(_resultDbConn.get(), _resultTableName, resColumns); - std::vector values = {_value}; - bool success = bulkInsert.addRow(values, errObj); + for (auto const& row : rows) { + success = success && bulkInsert.addRow(row, errObj); + if (!success) break; + } if (success) success = bulkInsert.flush(errObj); - if (not success) { + if (!success) { LOGS(_log, LOG_LVL_ERROR, "error updating result table: " << errObj.errMsg()); - std::string message = "Internal failure, error updating result table: " + errObj.errMsg(); + string const message = "Internal failure, error updating result table: " + errObj.errMsg(); _messageStore->addMessage(-1, "SQL", 1051, message, MessageSeverity::MSG_ERROR); _qState = ERROR; return; @@ -81,8 +187,8 @@ void UserQueryQservManager::submit() { _qState = SUCCESS; } -std::string UserQueryQservManager::getResultQuery() const { - std::string ret = "SELECT * FROM " + _resultDb + "." + _resultTableName; +string UserQueryQservManager::getResultQuery() const { + string ret = "SELECT * FROM " + _resultDb + "." + _resultTableName; return ret; } From e49651d39ea2fc9dd8a2421a009d7e263a7c747e Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Fri, 8 Sep 2023 03:01:37 +0000 Subject: [PATCH 09/15] Temporary fix to the connection management of the query manager The fix was introduced after discovering that the shared connection usage approach in the original design of this and related classes is highly unreliable. A connection shared by the queries seems to be sensitive to the service startup ordering and occasional disconnects. As a result of that the shared connection could just stop working or it could not be alive from the very start of Czar. It's also possible that the shared connection object is not properly synchronized to allow using it in the multi-threaded environment (which would be a bug). The last theory will be further investigated later. --- src/ccontrol/UserQueryQservManager.cc | 13 ++++++++++--- src/ccontrol/UserQueryQservManager.h | 1 - 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/ccontrol/UserQueryQservManager.cc b/src/ccontrol/UserQueryQservManager.cc index 6a3861112e..b5a8ed237e 100644 --- a/src/ccontrol/UserQueryQservManager.cc +++ b/src/ccontrol/UserQueryQservManager.cc @@ -35,10 +35,12 @@ #include "lsst/log/Log.h" // Qserv headers +#include "cconfig/CzarConfig.h" #include "qdisp/CzarStats.h" #include "qdisp/MessageStore.h" #include "sql/SqlBulkInsert.h" #include "sql/SqlConnection.h" +#include "sql/SqlConnectionFactory.h" #include "util/StringHelper.h" using namespace std; @@ -57,12 +59,17 @@ UserQueryQservManager::UserQueryQservManager(shared_ptr cons : _value(value), _resultTableName("qserv_manager_" + queryResources->userQueryId), _messageStore(make_shared()), - _resultDbConn(queryResources->resultDbConn), _resultDb(queryResources->resultDb) {} void UserQueryQservManager::submit() { LOGS(_log, LOG_LVL_TRACE, "processing command: " << _value); + // IMPORTANT: make a new connection each time since a state of the database service + // is not deterministic and the SQL library available to Czar is not terribly reliable + // (not able to properly handle disconnects). + auto const czarConfig = cconfig::CzarConfig::instance(); + auto const resultDbConn = sql::SqlConnectionFactory::make(czarConfig->getMySqlResultConfig()); + // Remove quotes around a value of the input parameter. Also parse the command. // Some commands may have optional parameters. // Note that (single or double) quotes are required by SQL when calling @@ -101,7 +108,7 @@ void UserQueryQservManager::submit() { } LOGS(_log, LOG_LVL_TRACE, "creating result table: " << createTable); sql::SqlErrorObject errObj; - if (!_resultDbConn->runQuery(createTable, errObj)) { + if (!resultDbConn->runQuery(createTable, errObj)) { LOGS(_log, LOG_LVL_ERROR, "failed to create result table: " << errObj.errMsg()); string const message = "Internal failure, failed to create result table: " + errObj.errMsg(); _messageStore->addMessage(-1, "SQL", 1051, message, MessageSeverity::MSG_ERROR); @@ -171,7 +178,7 @@ void UserQueryQservManager::submit() { // Ingest row(s) into the table. bool success = true; - sql::SqlBulkInsert bulkInsert(_resultDbConn.get(), _resultTableName, resColumns); + sql::SqlBulkInsert bulkInsert(resultDbConn.get(), _resultTableName, resColumns); for (auto const& row : rows) { success = success && bulkInsert.addRow(row, errObj); if (!success) break; diff --git a/src/ccontrol/UserQueryQservManager.h b/src/ccontrol/UserQueryQservManager.h index 365047c785..fee3e56248 100644 --- a/src/ccontrol/UserQueryQservManager.h +++ b/src/ccontrol/UserQueryQservManager.h @@ -85,7 +85,6 @@ class UserQueryQservManager : public UserQuery { std::string const _value; std::string _resultTableName; std::shared_ptr _messageStore; - std::shared_ptr _resultDbConn; QueryState _qState{UNKNOWN}; std::string _resultDb; }; From 54ce44d2029fdbbec2ecbfe930fd2d0e2a5b1a4b Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Thu, 17 Aug 2023 23:59:19 +0000 Subject: [PATCH 10/15] Added the REST service for pulling Czar status --- admin/local/docker/compose/docker-compose.yml | 1 + .../lsst/qserv/admin/replicationInterface.py | 2 +- src/replica/HttpMetaModule.cc | 2 +- src/replica/HttpProcessor.cc | 19 ++- src/replica/HttpQservMonitorModule.cc | 152 +++++++++++++++--- src/replica/HttpQservMonitorModule.h | 36 ++++- src/replica/MasterControllerHttpApp.cc | 12 +- src/replica/MasterControllerHttpApp.h | 5 +- 8 files changed, 190 insertions(+), 39 deletions(-) diff --git a/admin/local/docker/compose/docker-compose.yml b/admin/local/docker/compose/docker-compose.yml index 2d8fb7a898..5ba3057190 100644 --- a/admin/local/docker/compose/docker-compose.yml +++ b/admin/local/docker/compose/docker-compose.yml @@ -378,6 +378,7 @@ services: --qserv-czar-db=mysql://root:CHANGEME@czar-db:3306/qservMeta --log-cfg-file=/config-etc/log/log-repl-controller.cnf -- + --qserv-czar-proxy=mysql://qsmaster@czar-proxy:4040 --instance-id=qserv_proj --auth-key=replauthkey --admin-auth-key=repladminauthkey diff --git a/src/admin/python/lsst/qserv/admin/replicationInterface.py b/src/admin/python/lsst/qserv/admin/replicationInterface.py index 4d15f3146f..ae62d09e2d 100644 --- a/src/admin/python/lsst/qserv/admin/replicationInterface.py +++ b/src/admin/python/lsst/qserv/admin/replicationInterface.py @@ -201,7 +201,7 @@ def __init__( self.repl_ctrl = urlparse(repl_ctrl_uri) self.auth_key = auth_key self.admin_auth_key = admin_auth_key - self.repl_api_version = 24 + self.repl_api_version = 25 _log.debug(f"ReplicationInterface %s", self.repl_ctrl) def version(self) -> str: diff --git a/src/replica/HttpMetaModule.cc b/src/replica/HttpMetaModule.cc index f1a476d2b8..e0c6c45aa2 100644 --- a/src/replica/HttpMetaModule.cc +++ b/src/replica/HttpMetaModule.cc @@ -34,7 +34,7 @@ using json = nlohmann::json; namespace lsst::qserv::replica { -unsigned int const HttpMetaModule::version = 24; +unsigned int const HttpMetaModule::version = 25; void HttpMetaModule::process(ServiceProvider::Ptr const& serviceProvider, string const& context, qhttp::Request::Ptr const& req, qhttp::Response::Ptr const& resp, diff --git a/src/replica/HttpProcessor.cc b/src/replica/HttpProcessor.cc index 7baa25268f..7f8ee4d64b 100644 --- a/src/replica/HttpProcessor.cc +++ b/src/replica/HttpProcessor.cc @@ -244,6 +244,11 @@ void HttpProcessor::registerServices() { self->_processorConfig, req, resp, "WORKER-DB"); }); + httpServer()->addHandler("GET", "/replication/qserv/master/status", + [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { + HttpQservMonitorModule::process(self->controller(), self->name(), + self->_processorConfig, req, resp, "CZAR"); + }); httpServer()->addHandler("GET", "/replication/qserv/master/db", [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { HttpQservMonitorModule::process(self->controller(), self->name(), @@ -254,7 +259,19 @@ void HttpProcessor::registerServices() { [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { HttpQservMonitorModule::process(self->controller(), self->name(), self->_processorConfig, req, resp, - "QUERIES"); + "QUERIES-ACTIVE"); + }); + httpServer()->addHandler("GET", "/replication/qserv/master/queries/active/progress", + [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { + HttpQservMonitorModule::process(self->controller(), self->name(), + self->_processorConfig, req, resp, + "QUERIES-ACTIVE-PROGRESS"); + }); + httpServer()->addHandler("GET", "/replication/qserv/master/queries/past", + [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { + HttpQservMonitorModule::process(self->controller(), self->name(), + self->_processorConfig, req, resp, + "QUERIES-PAST"); }); httpServer()->addHandler("GET", "/replication/qserv/master/query/:id", [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { diff --git a/src/replica/HttpQservMonitorModule.cc b/src/replica/HttpQservMonitorModule.cc index 49728b7f65..81d511009c 100644 --- a/src/replica/HttpQservMonitorModule.cc +++ b/src/replica/HttpQservMonitorModule.cc @@ -144,10 +144,16 @@ json HttpQservMonitorModule::executeImpl(string const& subModuleName) { return _worker(); else if (subModuleName == "WORKER-DB") return _workerDb(); + else if (subModuleName == "CZAR") + return _czar(); else if (subModuleName == "CZAR-DB") return _czarDb(); - else if (subModuleName == "QUERIES") - return _userQueries(); + else if (subModuleName == "QUERIES-ACTIVE") + return _activeQueries(); + else if (subModuleName == "QUERIES-ACTIVE-PROGRESS") + return _activeQueriesProgress(); + else if (subModuleName == "QUERIES-PAST") + return _pastQueries(); else if (subModuleName == "QUERY") return _userQuery(); else if (subModuleName == "CSS") @@ -251,6 +257,31 @@ json HttpQservMonitorModule::_workerDb() { return result; } +json HttpQservMonitorModule::_czar() { + debug(__func__); + checkApiVersion(__func__, 25); + + // Connect to the Czar's MySQL proxy service. + // Execute w/o any transactions since the transcation management isn't supported + // by Qserv Czar. + auto const conn = Connection::open(Configuration::qservCzarProxyParams()); + QueryGenerator const g(conn); + string const command = "query_proc_stats"; + string const query = g.call(g.QSERV_MANAGER(command)); + string response; + conn->execute([&query, &response](auto conn) { selectSingleValue(conn, query, response); }); + string err; + if (response.empty() || (response == command)) { + err = "no response received from Czar"; + } else { + if (auto const status = json::parse(response); status.is_object()) { + return json::object({{"status", status}}); + } + err = "response received from Czar is not a valid JSON object"; + } + throw HttpError(__func__, err + ", query: " + query); +} + json HttpQservMonitorModule::_czarDb() { debug(__func__); checkApiVersion(__func__, 24); @@ -325,35 +356,15 @@ json HttpQservMonitorModule::_schedulers2chunks2json(map> const return result; } -json HttpQservMonitorModule::_userQueries() { +json HttpQservMonitorModule::_activeQueries() { debug(__func__); - checkApiVersion(__func__, 23); - - auto const config = controller()->serviceProvider()->config(); + checkApiVersion(__func__, 25); - string const queryStatus = query().optionalString("query_status", string()); - string const queryType = query().optionalString("query_type", string()); - unsigned int const queryAgeSec = query().optionalUInt("query_age", 0); - unsigned int const minElapsedSec = query().optionalUInt("min_elapsed_sec", 0); unsigned int const timeoutSec = query().optionalUInt("timeout_sec", workerResponseTimeoutSec()); - unsigned int const limit4past = query().optionalUInt("limit4past", 1); - string const searchPattern = query().optionalString("search_pattern", string()); - bool const searchRegexpMode = query().optionalUInt("search_regexp_mode", 0) != 0; - bool const includeMessages = query().optionalUInt("include_messages", 0) != 0; - - debug(__func__, "query_status=" + queryStatus); - debug(__func__, "query_type=" + queryType); - debug(__func__, "query_age=" + to_string(queryAgeSec)); - debug(__func__, "min_elapsed_sec=" + to_string(minElapsedSec)); debug(__func__, "timeout_sec=" + to_string(timeoutSec)); - debug(__func__, "limit4past=" + to_string(limit4past)); - debug(__func__, "search_pattern=" + searchPattern); - debug(__func__, "search_regexp_mode=" + bool2str(searchRegexpMode)); - debug(__func__, "include_messages=" + bool2str(includeMessages)); // Check which queries and in which schedulers are being executed // by Qserv workers. - bool const allWorkers = true; auto const job = QservStatusJob::create(timeoutSec, allWorkers, controller()); job->start(); @@ -386,6 +397,99 @@ json HttpQservMonitorModule::_userQueries() { json result; h.conn->executeInOwnTransaction( [&](auto conn) { result["queries"] = _currentUserQueries(conn, queryId2scheduler); }); + return result; +} + +json HttpQservMonitorModule::_activeQueriesProgress() { + debug(__func__); + checkApiVersion(__func__, 25); + + QueryId const selectQueryId = query().optionalUInt64("query_id", 0); + unsigned int const selectLastSeconds = query().optionalUInt("last_seconds", 0); + + debug(__func__, "query_id=" + to_string(selectQueryId)); + debug(__func__, "last_seconds=" + to_string(selectLastSeconds)); + + // Connect to the Czar's MySQL proxy service. + auto const conn = Connection::open(Configuration::qservCzarProxyParams()); + QueryGenerator const g(conn); + string const command = "query_info " + to_string(selectQueryId) + " " + to_string(selectLastSeconds); + string const query = g.call(g.QSERV_MANAGER(command)); + + debug(__func__, "query=" + query); + + // Result set processor populates the JSON object and returns the completion + // status of the operation as a string. The empty string indicates success. + json queries = json::object(); + auto const extractResultSet = [&queries, this](auto conn) -> string { + debug(__func__, ""); + // Clear the result in case if the previous retry failed mid-flight. + queries = json::object(); + vector const requiredColumnNames = {"queryId", "timestamp_ms", "num_jobs"}; + if (conn->columnNames() != requiredColumnNames) { + return "unexpected schema of the result set"; + } + string prevQueryIdStr; + Row row; + while (conn->next(row)) { + // Default values indicate NULLs + string const queryIdStr = row.getAs(0, string()); + uint64_t const timestampMs = row.getAs(1, 0); + int const numJobs = row.getAs(2, -1); + if (queryIdStr.empty() || (timestampMs == 0) || (numJobs < 0)) { + return "NULL values in the result set"; + } + // Group query-specific results into dedicated arrays + if (prevQueryIdStr.empty() || (prevQueryIdStr != queryIdStr)) { + prevQueryIdStr = queryIdStr; + queries[queryIdStr] = json::array(); + } + queries[queryIdStr].push_back(json::array({timestampMs, numJobs})); + debug(__func__, "(queryIdStr,timestampMs,numJobs)=(" + queryIdStr + "," + to_string(timestampMs) + + "," + to_string(numJobs) + ")"); + } + return string(); + }; + + // Execute w/o any transactions since the transcation management isn't supported + // by Qserv Czar. Execute the query via the automatic query retry wrapper + string error; + conn->execute([&query, &error, &extractResultSet](auto conn) { + conn->execute(query); + // if (conn->hasResult()) error = extractResultSet(conn); + error = extractResultSet(conn); + }); + if (error.empty()) return json::object({{"queries", queries}}); + throw HttpError(__func__, error + ", query: " + query); +} + +json HttpQservMonitorModule::_pastQueries() { + debug(__func__); + checkApiVersion(__func__, 25); + + auto const config = controller()->serviceProvider()->config(); + string const queryStatus = query().optionalString("query_status", string()); + string const queryType = query().optionalString("query_type", string()); + unsigned int const queryAgeSec = query().optionalUInt("query_age", 0); + unsigned int const minElapsedSec = query().optionalUInt("min_elapsed_sec", 0); + unsigned int const limit4past = query().optionalUInt("limit4past", 1); + string const searchPattern = query().optionalString("search_pattern", string()); + bool const searchRegexpMode = query().optionalUInt("search_regexp_mode", 0) != 0; + bool const includeMessages = query().optionalUInt("include_messages", 0) != 0; + + debug(__func__, "query_status=" + queryStatus); + debug(__func__, "query_type=" + queryType); + debug(__func__, "query_age=" + to_string(queryAgeSec)); + debug(__func__, "min_elapsed_sec=" + to_string(minElapsedSec)); + debug(__func__, "limit4past=" + to_string(limit4past)); + debug(__func__, "search_pattern=" + searchPattern); + debug(__func__, "search_regexp_mode=" + bool2str(searchRegexpMode)); + debug(__func__, "include_messages=" + bool2str(includeMessages)); + + // Connect to the master database. Manage the new connection via the RAII-style + // handler to ensure the transaction is automatically rolled-back in case of exceptions. + ConnectionHandler const h(Connection::open(Configuration::qservCzarDbParams("qservMeta"))); + QueryGenerator const g(h.conn); // Get info on the past queries matching the specified criteria. string constraints; diff --git a/src/replica/HttpQservMonitorModule.h b/src/replica/HttpQservMonitorModule.h index e3798105bb..16ff4e8816 100644 --- a/src/replica/HttpQservMonitorModule.h +++ b/src/replica/HttpQservMonitorModule.h @@ -54,13 +54,16 @@ class HttpQservMonitorModule : public HttpModule { /** * Supported values for parameter 'subModuleName': * - * WORKERS get the status info of many workers (possible selected by various criteria) - * WORKER get the status info of a specific worker - * WORKER-DB get the database status of a specific worker - * CZAR-DB get the database status of Czar - * QUERIES get user query info (queries selected by various criteria) - * QUERY get user query info for a specific query - * CSS get CSS configurations (the shared scan settings, etc.) + * WORKERS - get the status info of many workers + * WORKER - get the status info of a specific worker + * WORKER-DB - get the database status of a specific worker + * CZAR - get the status info of Czar + * CZAR-DB - get the database status of Czar + * QUERIES-ACTIVE - get user query info on the on-going queries + * QUERIES-ACTIVE-PROGRESS - get the progression history (of the active queries) + * QUERIES-PAST - search and display info on the past queries + * QUERY - get user query info for a specific query + * CSS - get CSS configurations (the shared scan settings, etc.) * * @throws std::invalid_argument for unknown values of parameter 'subModuleName' */ @@ -102,6 +105,11 @@ class HttpQservMonitorModule : public HttpModule { */ nlohmann::json _workerDb(); + /** + * Process a request for extracting various status info of Czar. + */ + nlohmann::json _czar(); + /** * Process a request for extracting various status info on the database * service of Czar. @@ -112,7 +120,19 @@ class HttpQservMonitorModule : public HttpModule { * Process a request for extracting a status on select user queries * launched at Qserv. */ - nlohmann::json _userQueries(); + nlohmann::json _activeQueries(); + + /** + * Process a request for extracting the progression history on the active + * user queries that are being executed by Qserv. + */ + nlohmann::json _activeQueriesProgress(); + + /** + * Process a request for extracting a status on the past (finished/failed) user + * queries submitted to Qserv. + */ + nlohmann::json _pastQueries(); /** * Process a request for extracting a status on a specific user query diff --git a/src/replica/MasterControllerHttpApp.cc b/src/replica/MasterControllerHttpApp.cc index d026563207..3e14721bac 100644 --- a/src/replica/MasterControllerHttpApp.cc +++ b/src/replica/MasterControllerHttpApp.cc @@ -154,6 +154,8 @@ MasterControllerHttpApp::MasterControllerHttpApp(int argc, char* argv[]) _permanentDelete); parser().option("qserv-czar-db", "A connection URL to the MySQL server of the Qserv master database.", _qservCzarDbUrl); + parser().option("qserv-czar-proxy", "A connection URL for the MySQL proxy service of Czar.", + _qservCzarProxyUrl); parser().option("http-root", "The root folder for the static content to be served by the built-in HTTP service.", _httpRoot); @@ -164,13 +166,17 @@ MasterControllerHttpApp::MasterControllerHttpApp(int argc, char* argv[]) } int MasterControllerHttpApp::runImpl() { + // IMPORTANT: clear the corresponding member variables after using the URLs + // to the Configuration to prevent contamination of the application's log + // stream with values of the sensitive command line arguments. if (!_qservCzarDbUrl.empty()) { - // IMPORTANT: set the connector, then clear it up to avoid - // contaminating the log files when logging command line arguments - // parsed by the application. Configuration::setQservCzarDbUrl(_qservCzarDbUrl); _qservCzarDbUrl = "******"; } + if (!_qservCzarProxyUrl.empty()) { + Configuration::setQservCzarProxyUrl(_qservCzarProxyUrl); + _qservCzarProxyUrl = "******"; + } _controller = Controller::create(serviceProvider()); // ATTENTION: Controller depends on a number of folders that are used for diff --git a/src/replica/MasterControllerHttpApp.h b/src/replica/MasterControllerHttpApp.h index a50cbe8157..db1cf546ab 100644 --- a/src/replica/MasterControllerHttpApp.h +++ b/src/replica/MasterControllerHttpApp.h @@ -133,9 +133,12 @@ class MasterControllerHttpApp : public Application { bool _forceQservSync; bool _permanentDelete; - /// A connection URL to the MySQL service of the Qserv master database. + /// A connection URL for the MySQL service of the Qserv master database. std::string _qservCzarDbUrl; + /// A connection URL for the MySQL proxy service of Czar. + std::string _qservCzarProxyUrl; + /// The root folder for the static content to be served by the built-in /// HTTP service. std::string _httpRoot; From 6e0d7fc99b414c53317c6804f7d70051f417d89b Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 28 Aug 2023 20:45:58 +0000 Subject: [PATCH 11/15] Split the service reporting info on both ongoing and past queries The service was replaced with two separate services. The first one reports info on the ongoing queries. The second service is used for searching and displaying info on the past queries. --- src/replica/HttpProcessor.cc | 2 +- src/replica/HttpQservMonitorModule.cc | 2 ++ src/replica/HttpQservMonitorModule.h | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/replica/HttpProcessor.cc b/src/replica/HttpProcessor.cc index 7f8ee4d64b..543da1a709 100644 --- a/src/replica/HttpProcessor.cc +++ b/src/replica/HttpProcessor.cc @@ -255,7 +255,7 @@ void HttpProcessor::registerServices() { self->_processorConfig, req, resp, "CZAR-DB"); }); - httpServer()->addHandler("GET", "/replication/qserv/master/query", + httpServer()->addHandler("GET", "/replication/qserv/master/queries/active", [self](qhttp::Request::Ptr const req, qhttp::Response::Ptr const resp) { HttpQservMonitorModule::process(self->controller(), self->name(), self->_processorConfig, req, resp, diff --git a/src/replica/HttpQservMonitorModule.cc b/src/replica/HttpQservMonitorModule.cc index 81d511009c..47fa752c9f 100644 --- a/src/replica/HttpQservMonitorModule.cc +++ b/src/replica/HttpQservMonitorModule.cc @@ -361,6 +361,7 @@ json HttpQservMonitorModule::_activeQueries() { checkApiVersion(__func__, 25); unsigned int const timeoutSec = query().optionalUInt("timeout_sec", workerResponseTimeoutSec()); + debug(__func__, "timeout_sec=" + to_string(timeoutSec)); // Check which queries and in which schedulers are being executed @@ -516,6 +517,7 @@ json HttpQservMonitorModule::_pastQueries() { g.packCond(constraints, g.like("query", "%" + searchPattern + "%")); } } + json result; h.conn->executeInOwnTransaction([&](auto conn) { result["queries_past"] = _pastUserQueries(conn, constraints, limit4past, includeMessages); }); diff --git a/src/replica/HttpQservMonitorModule.h b/src/replica/HttpQservMonitorModule.h index 16ff4e8816..b4517383e9 100644 --- a/src/replica/HttpQservMonitorModule.h +++ b/src/replica/HttpQservMonitorModule.h @@ -117,8 +117,8 @@ class HttpQservMonitorModule : public HttpModule { nlohmann::json _czarDb(); /** - * Process a request for extracting a status on select user queries - * launched at Qserv. + * Process a request for extracting a status on the active user queries + * that are being executed by Qserv. */ nlohmann::json _activeQueries(); From 1cb1eee1e68940c59a0ad287eae579883deba0b9 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 21 Aug 2023 20:50:53 -0700 Subject: [PATCH 12/15] Web Dashboard: minor refactoring in the page update mechanism The improvement aimed at reducing code duplication as well as the total amounbt of code in the application. --- src/www/qserv/js/Common.js | 26 +++++++++++++++++-- src/www/qserv/js/IngestConfiguration.js | 16 +++--------- src/www/qserv/js/IngestContribInfo.js | 12 ++------- src/www/qserv/js/IngestContributions.js | 18 ++++--------- src/www/qserv/js/IngestStatus.js | 16 ++++-------- src/www/qserv/js/IngestTransactions.js | 18 ++++--------- src/www/qserv/js/IngestTransactionsLog.js | 12 ++------- src/www/qserv/js/QservCzarMySQLQueries.js | 11 +------- src/www/qserv/js/QservMySQLConnections.js | 11 +------- src/www/qserv/js/QservWorkerMySQLQueries.js | 11 +------- src/www/qserv/js/QservWorkerQueries.js | 11 +------- .../qserv/js/QservWorkerResultsFilesystem.js | 11 +------- src/www/qserv/js/QservWorkerSchedulerHist.js | 11 +------- src/www/qserv/js/QservWorkerTaskHist.js | 11 +------- src/www/qserv/js/QservWorkerTasks.js | 11 +------- src/www/qserv/js/ReplicationController.js | 10 +------ src/www/qserv/js/ReplicationSchema.js | 14 +++------- src/www/qserv/js/ReplicationTableIndexes.js | 12 +++------ src/www/qserv/js/StatusQueryInspector.js | 10 +------ src/www/qserv/js/StatusUserQueries.js | 11 +------- 20 files changed, 64 insertions(+), 199 deletions(-) diff --git a/src/www/qserv/js/Common.js b/src/www/qserv/js/Common.js index 6fd085e40c..7153c122a5 100644 --- a/src/www/qserv/js/Common.js +++ b/src/www/qserv/js/Common.js @@ -1,7 +1,10 @@ define([ - 'modules/sql-formatter.min'], + 'modules/sql-formatter.min', + 'underscore'], + +function(sqlFormatter, + _) { -function(sqlFormatter) { class Common { static RestAPIVersion = 24; static query2text(query, expanded) { @@ -15,6 +18,25 @@ function(sqlFormatter) { } static _sqlFormatterConfig = {"language":"mysql", "uppercase:":true, "indent":" "}; static _max_compact_length = 120; + static _ivals = [ + {value: 2, name: '2 sec'}, + {value: 5, name: '5 sec'}, + {value: 10, name: '10 sec'}, + {value: 20, name: '20 sec'}, + {value: 30, name: '30 sec'}, + {value: 60, name: '1 min'}, + {value: 120, name: '2 min'}, + {value: 300, name: '5 min'}, + {value: 600, name: '10 min'} + ]; + static html_update_ival(id, default_ival = 30, ivals = undefined) { + return ` + +`; + } } return Common; }); diff --git a/src/www/qserv/js/IngestConfiguration.js b/src/www/qserv/js/IngestConfiguration.js index 8a6887effd..57d6b96ff5 100644 --- a/src/www/qserv/js/IngestConfiguration.js +++ b/src/www/qserv/js/IngestConfiguration.js @@ -65,9 +65,6 @@ function(CSSLoader, } } - /** - * The first time initialization of the page's layout - */ _init() { if (this._initialized === undefined) this._initialized = false; if (this._initialized) return; @@ -89,15 +86,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
@@ -125,6 +114,9 @@ function(CSSLoader,
`; let cont = this.fwk_app_container.html(html); + cont.find("#update-interval").change(() => { + this._load(); + }); cont.find(".form-control-view").change(() => { this._load(); }); diff --git a/src/www/qserv/js/IngestContribInfo.js b/src/www/qserv/js/IngestContribInfo.js index 3528d73d8d..c3b19c8943 100644 --- a/src/www/qserv/js/IngestContribInfo.js +++ b/src/www/qserv/js/IngestContribInfo.js @@ -61,15 +61,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
@@ -278,7 +270,7 @@ function(CSSLoader, } _get_contrib_id() { return this._form_control('input', 'contrib-id').val(); } _set_contrib_id(contrib_id) { this._form_control('input', 'contrib-id').val(contrib_id); } - _update_interval_sec() { return this._form_control('select', 'contrib-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } _table_warnings() { if (this._table_warnings_obj === undefined) { this._table_warnings_obj = this.fwk_app_container.find('table#fwk-ingest-contrib-info-warnings'); diff --git a/src/www/qserv/js/IngestContributions.js b/src/www/qserv/js/IngestContributions.js index 9b31f07e38..cdd0194ebf 100644 --- a/src/www/qserv/js/IngestContributions.js +++ b/src/www/qserv/js/IngestContributions.js @@ -62,9 +62,6 @@ function(CSSLoader, if (this._initialized) return; this._initialized = true; this._prevTimestamp = 0; - - /*   */ - let html = `
@@ -86,15 +83,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 60)}
@@ -262,6 +251,9 @@ function(CSSLoader,
`; let cont = this.fwk_app_container.html(html); + cont.find("#update-interval").change(() => { + this._load(); + }); cont.find(".loader").change(() => { this._load(); }); @@ -367,7 +359,7 @@ function(CSSLoader, _get_stage() { return this._form_control('select', 'contrib-stage').val(); } _get_sort_by_column() { return this._form_control('select', 'contrib-sort-column').val(); } _get_sort_order() { return this._form_control('select', 'contrib-sort-order').val(); } - _update_interval_sec() { return this._form_control('select', 'contrib-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } _load() { if (this._loading === undefined) this._loading = false; diff --git a/src/www/qserv/js/IngestStatus.js b/src/www/qserv/js/IngestStatus.js index 8530d84df2..1ccfe65857 100644 --- a/src/www/qserv/js/IngestStatus.js +++ b/src/www/qserv/js/IngestStatus.js @@ -42,7 +42,6 @@ function(CSSLoader, } } - /// The first time initialization of the page's layout _init() { if (this._initialized === undefined) this._initialized = false; if (this._initialized) return; @@ -64,15 +63,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
@@ -82,6 +73,9 @@ function(CSSLoader,
`; let cont = this.fwk_app_container.html(html); + cont.find("#update-interval").change(() => { + this._load(); + }); cont.find(".form-control-view").change(() => { this._load(); }); @@ -135,7 +129,7 @@ function(CSSLoader, _disable_selectors(disable) { this.fwk_app_container.find(".form-control-view").prop('disabled', disable); } - _update_interval_sec() { return this._form_control('select', 'ingest-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } /// Load data from a web service then render it to the application's page. _load() { diff --git a/src/www/qserv/js/IngestTransactions.js b/src/www/qserv/js/IngestTransactions.js index 66c0d9ccfc..a407f38c27 100644 --- a/src/www/qserv/js/IngestTransactions.js +++ b/src/www/qserv/js/IngestTransactions.js @@ -47,9 +47,6 @@ function(CSSLoader, } } - /** - * The first time initialization of the page's layout - */ _init() { if (this._initialized === undefined) this._initialized = false; if (this._initialized) return; @@ -71,15 +68,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
@@ -120,6 +109,9 @@ function(CSSLoader,
`; let cont = this.fwk_app_container.html(html); + cont.find("#update-interval").change(() => { + this._load(); + }); cont.find(".form-control-view").change(() => { this._load(); }); @@ -156,7 +148,7 @@ function(CSSLoader, _disable_selectors(disable) { this.fwk_app_container.find(".form-control-view").prop('disabled', disable); } - _update_interval_sec() { return this._form_control('select', 'trans-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } _table() { if (this._table_obj === undefined) { this._table_obj = this.fwk_app_container.find('table#fwk-ingest-transactions'); diff --git a/src/www/qserv/js/IngestTransactionsLog.js b/src/www/qserv/js/IngestTransactionsLog.js index 7b36c46645..ce23de9832 100644 --- a/src/www/qserv/js/IngestTransactionsLog.js +++ b/src/www/qserv/js/IngestTransactionsLog.js @@ -129,15 +129,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
@@ -257,7 +249,7 @@ function(CSSLoader, this._form_control('select', 'trans-id').val(current_id); } } - _update_interval_sec() { return this._form_control('select', 'trans-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } _trans_info(attr) { if (this._trans_info_obj === undefined) { this._trans_info_obj = this.fwk_app_container.find('div#fwk-ingest-transactions-log-info'); diff --git a/src/www/qserv/js/QservCzarMySQLQueries.js b/src/www/qserv/js/QservCzarMySQLQueries.js index b00a7ab166..a0e06bf2c1 100644 --- a/src/www/qserv/js/QservCzarMySQLQueries.js +++ b/src/www/qserv/js/QservCzarMySQLQueries.js @@ -52,16 +52,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservMySQLConnections.js b/src/www/qserv/js/QservMySQLConnections.js index 82566f0a57..176e951f3b 100644 --- a/src/www/qserv/js/QservMySQLConnections.js +++ b/src/www/qserv/js/QservMySQLConnections.js @@ -48,16 +48,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservWorkerMySQLQueries.js b/src/www/qserv/js/QservWorkerMySQLQueries.js index a5d033351c..4a85d4372a 100644 --- a/src/www/qserv/js/QservWorkerMySQLQueries.js +++ b/src/www/qserv/js/QservWorkerMySQLQueries.js @@ -57,16 +57,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservWorkerQueries.js b/src/www/qserv/js/QservWorkerQueries.js index 7bc02228bf..49b02d28b0 100644 --- a/src/www/qserv/js/QservWorkerQueries.js +++ b/src/www/qserv/js/QservWorkerQueries.js @@ -52,16 +52,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservWorkerResultsFilesystem.js b/src/www/qserv/js/QservWorkerResultsFilesystem.js index 9846e20fbe..560feb8e23 100644 --- a/src/www/qserv/js/QservWorkerResultsFilesystem.js +++ b/src/www/qserv/js/QservWorkerResultsFilesystem.js @@ -48,16 +48,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservWorkerSchedulerHist.js b/src/www/qserv/js/QservWorkerSchedulerHist.js index 5ed5cc1b9e..c5adb7afed 100644 --- a/src/www/qserv/js/QservWorkerSchedulerHist.js +++ b/src/www/qserv/js/QservWorkerSchedulerHist.js @@ -109,16 +109,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservWorkerTaskHist.js b/src/www/qserv/js/QservWorkerTaskHist.js index 6782750e0d..b8f80307eb 100644 --- a/src/www/qserv/js/QservWorkerTaskHist.js +++ b/src/www/qserv/js/QservWorkerTaskHist.js @@ -93,16 +93,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/QservWorkerTasks.js b/src/www/qserv/js/QservWorkerTasks.js index 7f3d552939..651d8a032e 100644 --- a/src/www/qserv/js/QservWorkerTasks.js +++ b/src/www/qserv/js/QservWorkerTasks.js @@ -93,16 +93,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 10)}
diff --git a/src/www/qserv/js/ReplicationController.js b/src/www/qserv/js/ReplicationController.js index 4c22d2e96c..dc8c174328 100644 --- a/src/www/qserv/js/ReplicationController.js +++ b/src/www/qserv/js/ReplicationController.js @@ -128,15 +128,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
diff --git a/src/www/qserv/js/ReplicationSchema.js b/src/www/qserv/js/ReplicationSchema.js index 37ab838fed..f0e4e5b0c7 100644 --- a/src/www/qserv/js/ReplicationSchema.js +++ b/src/www/qserv/js/ReplicationSchema.js @@ -86,15 +86,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
@@ -122,7 +114,7 @@ function(CSSLoader, cont.find("#schema-table").change(() => { this._load_schema(); }); - cont.find("#schema-update-interval").change(() => { + cont.find("#update-interval").change(() => { this._load(); }); this._disable_selectors(true); @@ -151,7 +143,7 @@ function(CSSLoader, } return this._form_control_obj[id]; } - _update_interval_sec() { return this._form_control('select', 'schema-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } _get_database() { return this._form_control('select', 'schema-database').val(); } _set_database(val) { this._form_control('select', 'schema-database').val(val); } _get_table() { return this._form_control('select', 'schema-table').val(); } diff --git a/src/www/qserv/js/ReplicationTableIndexes.js b/src/www/qserv/js/ReplicationTableIndexes.js index ecc54a1644..1ef605593d 100644 --- a/src/www/qserv/js/ReplicationTableIndexes.js +++ b/src/www/qserv/js/ReplicationTableIndexes.js @@ -88,13 +88,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 600)}
@@ -133,7 +127,7 @@ function(CSSLoader, cont.find("#indexes-overlap").change(() => { this._load_indexes(); }); - cont.find("#indexes-update-interval").change(() => { + cont.find("#update-interval").change(() => { this._load(); }); this._disable_selectors(true); @@ -162,7 +156,7 @@ function(CSSLoader, } return this._form_control_obj[id]; } - _update_interval_sec() { return this._form_control('select', 'indexes-update-interval').val(); } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } _get_database() { return this._form_control('select', 'indexes-database').val(); } _set_database(val) { this._form_control('select', 'indexes-database').val(val); } _get_table() { return this._form_control('select', 'indexes-table').val(); } diff --git a/src/www/qserv/js/StatusQueryInspector.js b/src/www/qserv/js/StatusQueryInspector.js index 8c17509e0e..5d640fca98 100644 --- a/src/www/qserv/js/StatusQueryInspector.js +++ b/src/www/qserv/js/StatusQueryInspector.js @@ -85,15 +85,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval')}
diff --git a/src/www/qserv/js/StatusUserQueries.js b/src/www/qserv/js/StatusUserQueries.js index 760de893e5..dea7f58958 100644 --- a/src/www/qserv/js/StatusUserQueries.js +++ b/src/www/qserv/js/StatusUserQueries.js @@ -176,16 +176,7 @@ function(CSSLoader,
- - + ${Common.html_update_ival('update-interval', 5)}
From 25e8a97720177f09757afed364b22d2ebbe27ac9 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Mon, 28 Aug 2023 12:27:30 -0700 Subject: [PATCH 13/15] Web Dashboard: split the page displaying info on user queries Separated the query into into two pages - the active queries and the past queries. Also rearranges sub-tabs under the 'Status' tab for more intuitive navigation. --- src/www/qserv/css/StatusActiveQueries.css | 48 +++ src/www/qserv/css/StatusPastQueries.css | 48 +++ src/www/qserv/css/StatusUserQueries.css | 55 --- src/www/qserv/js/QservMonitoringDashboard.js | 53 +-- src/www/qserv/js/StatusActiveQueries.js | 322 ++++++++++++++++++ ...tusUserQueries.js => StatusPastQueries.js} | 172 ++-------- 6 files changed, 465 insertions(+), 233 deletions(-) create mode 100644 src/www/qserv/css/StatusActiveQueries.css create mode 100644 src/www/qserv/css/StatusPastQueries.css delete mode 100644 src/www/qserv/css/StatusUserQueries.css create mode 100644 src/www/qserv/js/StatusActiveQueries.js rename src/www/qserv/js/{StatusUserQueries.js => StatusPastQueries.js} (68%) diff --git a/src/www/qserv/css/StatusActiveQueries.css b/src/www/qserv/css/StatusActiveQueries.css new file mode 100644 index 0000000000..075edcb6b4 --- /dev/null +++ b/src/www/qserv/css/StatusActiveQueries.css @@ -0,0 +1,48 @@ +#fwk-status-active-queries-controls { + margin-top: 1em; + padding-top: 0.5em; +} +#fwk-status-active-queries-controls label { + font-weight: bold; +} +table#fwk-status-active-queries pre { + padding: 0; + margin: 0; +} +table#fwk-status-active-queries th, +table#fwk-status-active-queries td { + vertical-align: top; +} +table#fwk-status-active-queries { + position: relative; +} +table#fwk-status-active-queries > thead > tr > th.sticky { + position:sticky; + top:80px; + z-index:2; +} +table#fwk-status-active-queries caption { + caption-side: top; + text-align: right; + padding-top: 0; +} +table#fwk-status-active-queries caption.updating { + background-color: #ffeeba; +} + +table#fwk-status-active-queries tbody > tr > td.query_toggler:hover { + cursor:pointer; +} +table#fwk-status-active-queries span { + font-family: monospace; + white-space: pre; + font-size: 16px; + margin: 0; + padding: 0; +} +table#fwk-status-active-queries span.trend_down { + color: green; +} +table#fwk-status-active-queries span.trend_up { + color: red; +} diff --git a/src/www/qserv/css/StatusPastQueries.css b/src/www/qserv/css/StatusPastQueries.css new file mode 100644 index 0000000000..5c637b43d2 --- /dev/null +++ b/src/www/qserv/css/StatusPastQueries.css @@ -0,0 +1,48 @@ +#fwk-status-past-queries-controls { + margin-top: 1em; + padding-top: 0.5em; +} +#fwk-status-past-queries-controls label { + font-weight: bold; +} +table#fwk-status-past-queries pre { + padding: 0; + margin: 0; +} +table#fwk-status-past-queries th, +table#fwk-status-past-queries td { + vertical-align: top; +} +table#fwk-status-past-queries { + position: relative; +} +table#fwk-status-past-queries > thead > tr > th.sticky { + position:sticky; + top:80px; + z-index:2; +} +table#fwk-status-past-queries caption { + caption-side: top; + text-align: right; + padding-top: 0; +} +table#fwk-status-past-queries caption.updating { + background-color: #ffeeba; +} + +table#fwk-status-past-queries tbody > tr > td.query_toggler:hover { + cursor:pointer; +} +table#fwk-status-past-queries span { + font-family: monospace; + white-space: pre; + font-size: 16px; + margin: 0; + padding: 0; +} +table#fwk-status-past-queries span.trend_down { + color: green; +} +table#fwk-status-past-queries span.trend_up { + color: red; +} diff --git a/src/www/qserv/css/StatusUserQueries.css b/src/www/qserv/css/StatusUserQueries.css deleted file mode 100644 index 8192cb44ec..0000000000 --- a/src/www/qserv/css/StatusUserQueries.css +++ /dev/null @@ -1,55 +0,0 @@ -#fwk-status-queries-controls { - margin-top: 1em; - padding-top: 0.5em; -} -#fwk-status-queries-controls label { - font-weight: bold; -} -table#fwk-status-queries pre, -table#fwk-status-queries-past pre { - padding: 0; - margin: 0; -} -table#fwk-status-queries th, -table#fwk-status-queries td, -table#fwk-status-queries-past th, -table#fwk-status-queries-past td { - vertical-align: top; -} -table#fwk-status-queries-past { - position: relative; -} -table#fwk-status-queries-past > thead > tr > th.sticky { - position:sticky; - top:80px; - z-index:2; -} -table#fwk-status-queries caption { - caption-side: top; - text-align: right; - padding-top: 0; -} -table#fwk-status-queries caption.updating { - background-color: #ffeeba; -} - -table#fwk-status-queries tbody > tr > td.query_toggler:hover, -table#fwk-status-queries-past tbody > tr > td.query_toggler:hover { - cursor:pointer; -} -table#fwk-status-queries span, -table#fwk-status-queries-past span { - font-family: monospace; - white-space: pre; - font-size: 16px; - margin: 0; - padding: 0; -} -table#fwk-status-queries span.trend_down, -table#fwk-status-queries-past span.trend_down { - color: green; -} -table#fwk-status-queries span.trend_up, -table#fwk-status-queries-past span.trend_up { - color: red; -} diff --git a/src/www/qserv/js/QservMonitoringDashboard.js b/src/www/qserv/js/QservMonitoringDashboard.js index 502e207f59..2995c3f47c 100644 --- a/src/www/qserv/js/QservMonitoringDashboard.js +++ b/src/www/qserv/js/QservMonitoringDashboard.js @@ -36,11 +36,12 @@ require([ 'webfwk/Fwk', 'webfwk/FwkTestApp', 'qserv/StatusCatalogs', + 'qserv/StatusActiveQueries', 'qserv/StatusActiveChunksMap', + 'qserv/StatusPastQueries', 'qserv/StatusQueryInspector', 'qserv/StatusReplicationLevel', 'qserv/StatusWorkers', - 'qserv/StatusUserQueries', 'qserv/QservCzarMySQLQueries', 'qserv/QservCss', 'qserv/QservMySQLConnections', @@ -76,11 +77,12 @@ function(CSSLoader, Fwk, FwkTestApp, StatusCatalogs, + StatusActiveQueries, StatusActiveChunksMap, + StatusPastQueries, StatusQueryInspector, StatusReplicationLevel, StatusWorkers, - StatusUserQueries, QservCzarMySQLQueries, QservCss, QservMySQLConnections, @@ -136,12 +138,31 @@ function(CSSLoader, var apps = [ { name: 'Status', apps: [ + new StatusActiveQueries('Active Queries Monitor'), + new StatusActiveChunksMap('Active Chunks Map'), + new StatusPastQueries('Past Queries'), + new StatusQueryInspector('Query Inspector'), new StatusCatalogs('Catalogs'), new StatusReplicationLevel('Replication Level'), - new StatusWorkers('Workers'), - new StatusUserQueries('User Queries Monitor'), - new StatusActiveChunksMap('Active Chunks Map'), - new StatusQueryInspector('Query Inspector') + new StatusWorkers('Workers') + ] + }, + { name: 'Czar', + apps: [ + new QservCzarMySQLQueries('MySQL Queries'), + new QservCss('CSS') + ] + }, + { name: 'Workers', + apps: [ + new QservMySQLConnections('MySQL Connections'), + new QservWorkerMySQLQueries('MySQL Queries'), + new QservWorkerQueries('Queries in Worker Queues'), + new QservWorkerSchedulers('Schedulers'), + new QservWorkerSchedulerHist('Scheduler Histograms'), + new QservWorkerTasks('Tasks'), + new QservWorkerTaskHist('Task Histograms'), + new QservWorkerResultsFilesystem('Results Filesystem') ] }, { name: 'Replication', @@ -165,24 +186,6 @@ function(CSSLoader, new IngestContribInfo('Contribution Info') ] }, - { name: 'Czar', - apps: [ - new QservCzarMySQLQueries('MySQL Queries'), - new QservCss('CSS') - ] - }, - { name: 'Workers', - apps: [ - new QservMySQLConnections('MySQL Connections'), - new QservWorkerMySQLQueries('MySQL Queries'), - new QservWorkerQueries('Queries in Worker Queues'), - new QservWorkerSchedulers('Schedulers'), - new QservWorkerSchedulerHist('Scheduler Histograms'), - new QservWorkerTasks('Tasks'), - new QservWorkerTaskHist('Task Histograms'), - new QservWorkerResultsFilesystem('Results Filesystem') - ] - }, { name: 'Tools', apps: [ new FwkTestApp('Query Qserv'), @@ -198,7 +201,7 @@ function(CSSLoader, if (typeof menus !== 'undefined') { Fwk.show(menus[0], menus[1]); } else { - Fwk.show('Status', 'User Queries Monitor'); + Fwk.show('Status', 'Active Queries Monitor'); } } ); diff --git a/src/www/qserv/js/StatusActiveQueries.js b/src/www/qserv/js/StatusActiveQueries.js new file mode 100644 index 0000000000..0f4abaa6c7 --- /dev/null +++ b/src/www/qserv/js/StatusActiveQueries.js @@ -0,0 +1,322 @@ +define([ + 'webfwk/CSSLoader', + 'webfwk/Fwk', + 'webfwk/FwkApplication', + 'qserv/Common', + 'underscore'], + +function(CSSLoader, + Fwk, + FwkApplication, + Common, + _) { + + CSSLoader.load('qserv/css/StatusActiveQueries.css'); + + class StatusActiveQueries extends FwkApplication { + + /// @returns the suggested server-side timeout for retreiving results + static _server_proc_timeout_sec() { return 2; } + + constructor(name) { + super(name); + this._queryId2Expanded = {}; // Store 'true' to allow persistent state for the expanded + // queries between updates. + this._id2query = {}; // Store query text for each identifier + } + + /** + * @see FwkApplication.fwk_app_on_show + */ + fwk_app_on_show() { + console.log('show: ' + this.fwk_app_name); + this.fwk_app_on_update(); + } + + /** + * @see FwkApplication.fwk_app_on_hide + */ + fwk_app_on_hide() { + console.log('hide: ' + this.fwk_app_name); + } + + /** + * @see FwkApplication.fwk_app_on_update + */ + fwk_app_on_update() { + if (this.fwk_app_visible) { + this._init(); + if (this._prev_update_sec === undefined) { + this._prev_update_sec = 0; + } + let now_sec = Fwk.now().sec; + if (now_sec - this._prev_update_sec > this._update_interval_sec()) { + this._prev_update_sec = now_sec; + this._load(); + } + } + } + + /** + * The first time initialization of the page's layout + */ + _init() { + if (this._initialized === undefined) { + this._initialized = false; + } + if (this._initialized) return; + this._initialized = true; + + this._scheduler2color = { + 'Snail': '#007bff', + 'Slow': '#17a2b8', + 'Med': '#28a745', + 'Fast': '#ffc107', + 'Group': '#dc3545', + 'Loading': 'default' + }; + + let html = ` +
+
+
+
+ ${Common.html_update_ival('update-interval', 5)} +
+
+ + +
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + +
+ Loading... +
StartedProgressSchedElapsedLeft (est.)ChunksCh/minQIDQuery
+
+
`; + let cont = this.fwk_app_container.html(html); + cont.find(".form-control-selector").change(() => { + this._load(); + }); + cont.find("button#reset-queries-form").click(() => { + this._set_update_interval_sec(5); + this._load(); + }); + } + _table() { + if (this._table_obj === undefined) { + this._table_obj = this.fwk_app_container.find('table#fwk-status-active-queries'); + } + return this._table_obj; + } + _status() { + if (this._status_obj === undefined) { + this._status_obj = this._table().children('caption'); + } + return this._status_obj; + } + _form_control(elem_type, id) { + if (this._form_control_obj === undefined) this._form_control_obj = {}; + if (!_.has(this._form_control_obj, id)) { + this._form_control_obj[id] = this.fwk_app_container.find(elem_type + '#' + id); + } + return this._form_control_obj[id]; + } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } + _set_update_interval_sec(val) { this._form_control('select', 'update-interval').val(val); } + + _load() { + if (this._loading === undefined) { + this._loading = false; + } + if (this._loading) return; + this._loading = true; + + this._status().addClass('updating'); + + console.log('_load:1'); + + Fwk.web_service_GET( + "/replication/qserv/master/queries/active", + { version: Common.RestAPIVersion, + timeout_sec: StatusActiveQueries._server_proc_timeout_sec() + }, + (data) => { + console.log('_load:2'); + if (!data.success) { + this._status().html(`${data.error}`); + } else { + this._display(data); + Fwk.setLastUpdate(this._status()); + } + this._status().removeClass('updating'); + this._loading = false; + }, + (msg) => { + this._status().html('No Response'); + this._status().removeClass('updating'); + this._loading = false; + } + ); + } + _display(data) { + this._id2query = {}; + const queryToggleTitle = "Click to toggle query formatting."; + const queryCopyTitle = "Click to copy the query text to the clipboard."; + const queryInspectTitle = "Click to see detailed info (progress, messages, etc.) on the query."; + const queryStyle = "color:#4d4dff;"; + let html = ''; + for (let i in data.queries) { + let query = data.queries[i]; + this._id2query[query.queryId] = query.query; + const progress = Math.floor(100. * query.completedChunks / query.totalChunks); + const scheduler = _.isUndefined(query.scheduler) ? 'Loading...' : query.scheduler.substring('Sched'.length); + const scheduler_color = _.has(this._scheduler2color, scheduler) ? + this._scheduler2color[scheduler] : + this._scheduler2color['Loading']; + + const elapsed = this._elapsed(query.samplingTime_sec - query.queryBegin_sec); + let leftSeconds; + if (query.completedChunks > 0 && query.samplingTime_sec - query.queryBegin_sec > 0) { + leftSeconds = Math.floor( + (query.totalChunks - query.completedChunks) / + (query.completedChunks / (query.samplingTime_sec - query.queryBegin_sec)) + ); + } + const left = this._elapsed(leftSeconds); + const trend = this._trend(query.queryId, leftSeconds); + const performance = this._performance(query.completedChunks, query.samplingTime_sec - query.queryBegin_sec); + const expanded = (query.queryId in this._queryId2Expanded) && this._queryId2Expanded[query.queryId]; + html += ` + +
` + query.queryBegin + `
+ +
+
+ ${progress}% +
+
+ + ${scheduler} + ${elapsed} + ${left}${trend} +
${query.completedChunks}/${query.totalChunks}
+
${performance}
+
${query.queryId}
+ + + + + + +
` + this._query2text(query.queryId, expanded) + `
+`; + } + let that = this; + let toggleQueryDisplay = function(e) { + let td = $(e.currentTarget); + let pre = td.find("pre.query"); + const queryId = td.parent().attr("id"); + const expanded = !((queryId in that._queryId2Expanded) && that._queryId2Expanded[queryId]); + pre.text(that._query2text(queryId, expanded)); + that._queryId2Expanded[queryId] = expanded; + }; + let copyQueryToClipboard = function(e) { + let button = $(e.currentTarget); + let queryId = button.parent().parent().attr("id"); + let query = that._id2query[queryId]; + navigator.clipboard.writeText(query, + () => {}, + () => { alert("Failed to write the query to the clipboard. Please copy the text manually: " + query); } + ); + }; + let displayQuery = function(e) { + let button = $(e.currentTarget); + let queryId = button.parent().parent().attr("id"); + Fwk.find("Status", "Query Inspector").set_query_id(queryId); + Fwk.show("Status", "Query Inspector"); + }; + let tbodyQueries = this._table().children('tbody').html(html); + tbodyQueries.find("td.query_toggler").click(toggleQueryDisplay); + tbodyQueries.find("button.copy-query").click(copyQueryToClipboard); + tbodyQueries.find("button.inspect-query").click(displayQuery); + } + + /** + * @param {Number} seconds + * @returns {String} the amount of time elapsed by a query, formatted as: 'hh:mm:ss' + */ + _elapsed(totalSeconds) { + if (_.isUndefined(totalSeconds)) return ' '; + let hours = Math.floor(totalSeconds / 3600); + let minutes = Math.floor((totalSeconds - 3600 * hours) / 60); + let seconds = (totalSeconds - 3600 * hours - 60 * minutes) % 60; + let displayHours = hours !== 0; + let displayMinutes = displayHours || minutes !== 0; + let displaySeconds = true; + return '' + + (displayHours ? (hours < 10 ? '0' : '') + hours + 'h' : '') + ' ' + + (displayMinutes ? (minutes < 10 ? '0' : '') + minutes + 'm' : '') + ' ' + + (displaySeconds ? (seconds < 10 ? '0' : '') + seconds + 's' : '') + + ''; + } + + /** + * @param {Number} qid a unique identifier of a qiery. It's used to pull a record + * for the previously (of any) recorded number of second estimated before the query + * would expected to finish. + * @param {Number} totalSeconds + * @returns {String} an arrow indicating the trend to slow down or accelerate + */ + _trend(qid, nextTotalSeconds) { + if (!_.isUndefined(nextTotalSeconds)) { + if (this._prevTotalSeconds === undefined) { + this._prevTotalSeconds = {}; + } + let prevTotalSeconds = _.has(this._prevTotalSeconds, qid) ? this._prevTotalSeconds[qid] : nextTotalSeconds; + this._prevTotalSeconds[qid] = nextTotalSeconds; + if (prevTotalSeconds < nextTotalSeconds) { + return ' ↑'; + } else if (prevTotalSeconds > nextTotalSeconds) { + return ' ↓'; + } + } + return '  '; + } + + /** + * @param {integer} chunks + * @param {integer} totalSeconds + * @returns {integer} the number of chunks per minute (or 0 if the totalSeconds is 0) + */ + _performance(chunks, totalSeconds) { + if (chunks === 0 || totalSeconds <= 0) return 0; + return Math.floor(chunks / (totalSeconds / 60.)); + } + _query2text(queryId, expanded) { + return Common.query2text(this._id2query[queryId], expanded); + } + } + return StatusActiveQueries; +}); diff --git a/src/www/qserv/js/StatusUserQueries.js b/src/www/qserv/js/StatusPastQueries.js similarity index 68% rename from src/www/qserv/js/StatusUserQueries.js rename to src/www/qserv/js/StatusPastQueries.js index dea7f58958..85a937cabf 100644 --- a/src/www/qserv/js/StatusUserQueries.js +++ b/src/www/qserv/js/StatusPastQueries.js @@ -11,9 +11,9 @@ function(CSSLoader, Common, _) { - CSSLoader.load('qserv/css/StatusUserQueries.css'); + CSSLoader.load('qserv/css/StatusPastQueries.css'); - class StatusUserQueries extends FwkApplication { + class StatusPastQueries extends FwkApplication { /// @returns the suggested server-side timeout for retreiving results static _server_proc_timeout_sec() { return 2; } @@ -26,7 +26,6 @@ function(CSSLoader, } /** - * Override event handler defined in the base class * @see FwkApplication.fwk_app_on_show */ fwk_app_on_show() { @@ -35,7 +34,6 @@ function(CSSLoader, } /** - * Override event handler defined in the base class * @see FwkApplication.fwk_app_on_hide */ fwk_app_on_hide() { @@ -43,7 +41,6 @@ function(CSSLoader, } /** - * Override event handler defined in the base class * @see FwkApplication.fwk_app_on_update */ fwk_app_on_update() { @@ -70,44 +67,10 @@ function(CSSLoader, if (this._initialized) return; this._initialized = true; - this._scheduler2color = { - 'Snail': '#007bff', - 'Slow': '#17a2b8', - 'Med': '#28a745', - 'Fast': '#ffc107', - 'Group': '#dc3545', - 'Loading': 'default' - }; - let html = ` -
-
- - - - - - - - - - - - - - - - - - -
- Loading... -
StartedProgressSchedElapsedLeft (est.)ChunksCh/minQIDQuery
-
-
-
+
-

Search past queries

+

Search queries

@@ -183,7 +146,10 @@ function(CSSLoader,
- +
+ @@ -220,35 +186,18 @@ function(CSSLoader, this._load(); }); } - - /** - * Table for displaying the progress of the on-going user queries - * @returns JQuery table object - */ - _tableQueries() { - if (this._tableQueries_obj === undefined) { - this._tableQueries_obj = this.fwk_app_container.find('table#fwk-status-queries'); + _table() { + if (this._table_obj === undefined) { + this._table_obj = this.fwk_app_container.find('table#fwk-status-past-queries'); } - return this._tableQueries_obj; + return this._table_obj; } _status() { if (this._status_obj === undefined) { - this._status_obj = this._tableQueries().children('caption'); + this._status_obj = this._table().children('caption'); } return this._status_obj; } - - /** - * Table for displaying the completed, failed, etc. user queries - * @returns JQuery table object - */ - _tablePastQueries() { - if (this._tablePastQueries_obj === undefined) { - this._tablePastQueries_obj = this.fwk_app_container.find('table#fwk-status-queries-past'); - } - return this._tablePastQueries_obj; - } - _form_control(elem_type, id) { if (this._form_control_obj === undefined) this._form_control_obj = {}; if (!_.has(this._form_control_obj, id)) { @@ -278,10 +227,6 @@ function(CSSLoader, _set_max_queries(val) { this._form_control('select', 'max-queries').val(val); } _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } - /** - * Load data from a web servie then render it to the application's - * page. - */ _load() { if (this._loading === undefined) { this._loading = false; @@ -292,15 +237,15 @@ function(CSSLoader, this._status().addClass('updating'); Fwk.web_service_GET( - "/replication/qserv/master/query", - { query_age: this._get_query_age(), + "/replication/qserv/master/queries/past", + { version: Common.RestAPIVersion, + query_age: this._get_query_age(), query_status: this._get_query_status(), min_elapsed_sec: this._get_min_elapsed(), query_type: this._get_query_type(), search_pattern: this._get_query_search_pattern(), search_regexp_mode: this._get_query_search_mode() == "REGEXP" ? 1 : 0, - limit4past: this._get_max_queries(), - timeout_sec: StatusUserQueries._server_proc_timeout_sec() + limit4past: this._get_max_queries() }, (data) => { if (!data.success) { @@ -319,10 +264,6 @@ function(CSSLoader, } ); } - - /** - * Display the queries - */ _display(data) { this._id2query = {}; const queryToggleTitle = "Click to toggle query formatting."; @@ -330,52 +271,6 @@ function(CSSLoader, const queryInspectTitle = "Click to see detailed info (progress, messages, etc.) on the query."; const queryStyle = "color:#4d4dff;"; let html = ''; - for (let i in data.queries) { - let query = data.queries[i]; - this._id2query[query.queryId] = query.query; - const progress = Math.floor(100. * query.completedChunks / query.totalChunks); - const scheduler = _.isUndefined(query.scheduler) ? 'Loading...' : query.scheduler.substring('Sched'.length); - const scheduler_color = _.has(this._scheduler2color, scheduler) ? - this._scheduler2color[scheduler] : - this._scheduler2color['Loading']; - - const elapsed = this._elapsed(query.samplingTime_sec - query.queryBegin_sec); - let leftSeconds; - if (query.completedChunks > 0 && query.samplingTime_sec - query.queryBegin_sec > 0) { - leftSeconds = Math.floor( - (query.totalChunks - query.completedChunks) / - (query.completedChunks / (query.samplingTime_sec - query.queryBegin_sec)) - ); - } - const left = this._elapsed(leftSeconds); - const trend = this._trend(query.queryId, leftSeconds); - const performance = this._performance(query.completedChunks, query.samplingTime_sec - query.queryBegin_sec); - const expanded = (query.queryId in this._queryId2Expanded) && this._queryId2Expanded[query.queryId]; - html += ` - - - - - - - - - - - - -`; - } let that = this; let toggleQueryDisplay = function(e) { let td = $(e.currentTarget); @@ -400,11 +295,6 @@ function(CSSLoader, Fwk.find("Status", "Query Inspector").set_query_id(queryId); Fwk.show("Status", "Query Inspector"); }; - let tbodyQueries = this._tableQueries().children('tbody').html(html); - tbodyQueries.find("td.query_toggler").click(toggleQueryDisplay); - tbodyQueries.find("button.copy-query").click(copyQueryToClipboard); - tbodyQueries.find("button.inspect-query").click(displayQuery); - html = ''; for (let i in data.queries_past) { let query = data.queries_past[i]; this._id2query[query.queryId] = query.query; @@ -433,7 +323,7 @@ function(CSSLoader, `; } - let tbodyPastQueries = this._tablePastQueries().children('tbody').html(html); + let tbodyPastQueries = this._table().children('tbody').html(html); tbodyPastQueries.find("td.query_toggler").click(toggleQueryDisplay); tbodyPastQueries.find("button.copy-query").click(copyQueryToClipboard); tbodyPastQueries.find("button.inspect-query").click(displayQuery); @@ -457,30 +347,6 @@ function(CSSLoader, (displaySeconds ? (seconds < 10 ? '0' : '') + seconds + 's' : '') + ''; } - - /** - * - * @param {Number} qid a unique identifier of a qiery. It's used to pull a record - * for the previously (of any) recorded number of second estimated before the query - * would expected to finish. - * @param {Number} totalSeconds - * @returns {String} an arrow indicating the trend to slow down or accelerate - */ - _trend(qid, nextTotalSeconds) { - if (!_.isUndefined(nextTotalSeconds)) { - if (this._prevTotalSeconds === undefined) { - this._prevTotalSeconds = {}; - } - let prevTotalSeconds = _.has(this._prevTotalSeconds, qid) ? this._prevTotalSeconds[qid] : nextTotalSeconds; - this._prevTotalSeconds[qid] = nextTotalSeconds; - if (prevTotalSeconds < nextTotalSeconds) { - return ' ↑'; - } else if (prevTotalSeconds > nextTotalSeconds) { - return ' ↓'; - } - } - return '  '; - } /** * @param {integer} chunks @@ -495,5 +361,5 @@ function(CSSLoader, return Common.query2text(this._id2query[queryId], expanded); } } - return StatusUserQueries; + return StatusPastQueries; }); From 37198d99738a5d09040edca63335f63905c81a4a Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Fri, 18 Aug 2023 18:34:58 +0000 Subject: [PATCH 14/15] Web Dashboard: added a page for displaying Czar counters --- src/www/dashboard.html | 2 +- src/www/qserv/css/QservCzarStatistics.css | 30 ++ src/www/qserv/js/Common.js | 2 +- src/www/qserv/js/QservCzarStatistics.js | 407 +++++++++++++++++++ src/www/qserv/js/QservMonitoringDashboard.js | 3 + 5 files changed, 442 insertions(+), 2 deletions(-) create mode 100644 src/www/qserv/css/QservCzarStatistics.css create mode 100644 src/www/qserv/js/QservCzarStatistics.js diff --git a/src/www/dashboard.html b/src/www/dashboard.html index c8b950a6c7..154f4288d7 100644 --- a/src/www/dashboard.html +++ b/src/www/dashboard.html @@ -4,7 +4,7 @@ Qserv monitoring dashboard - + diff --git a/src/www/qserv/css/QservCzarStatistics.css b/src/www/qserv/css/QservCzarStatistics.css new file mode 100644 index 0000000000..1472ef3c63 --- /dev/null +++ b/src/www/qserv/css/QservCzarStatistics.css @@ -0,0 +1,30 @@ +#fwk-qserv-czar-stats-controls label { + font-weight: bold; +} +table#fwk-qserv-czar-stats-status { + margin:0; +} +table.fwk-qserv-czar-stats caption { + caption-side: top; + text-align: right; + padding-top: 0; +} +table.fwk-qserv-czar-stats caption.updating { + background-color: #ffeeba; +} +table.fwk-qserv-czar-stats > thead > tr > th.sticky { + position:sticky; + top:80px; + z-index:2; +} +table.fwk-qserv-czar-stats tbody th, +table.fwk-qserv-czar-stats tbody td { + vertical-align:middle; +} +table.fwk-qserv-czar-stats pre { + padding: 0; + margin: 0; +} +table.fwk-qserv-czar-stats pre.perf { + color: maroon; +} diff --git a/src/www/qserv/js/Common.js b/src/www/qserv/js/Common.js index 7153c122a5..e56688504b 100644 --- a/src/www/qserv/js/Common.js +++ b/src/www/qserv/js/Common.js @@ -6,7 +6,7 @@ function(sqlFormatter, _) { class Common { - static RestAPIVersion = 24; + static RestAPIVersion = 25; static query2text(query, expanded) { if (expanded) { return sqlFormatter.format(query, Common._sqlFormatterConfig); diff --git a/src/www/qserv/js/QservCzarStatistics.js b/src/www/qserv/js/QservCzarStatistics.js new file mode 100644 index 0000000000..f61022946c --- /dev/null +++ b/src/www/qserv/js/QservCzarStatistics.js @@ -0,0 +1,407 @@ +define([ + 'webfwk/CSSLoader', + 'webfwk/Fwk', + 'webfwk/FwkApplication', + 'qserv/Common', + 'underscore'], + +function(CSSLoader, + Fwk, + FwkApplication, + Common, + _) { + + CSSLoader.load('qserv/css/QservCzarStatistics.css'); + + class QservCzarStatistics extends FwkApplication { + + constructor(name) { + super(name); + // The previous snapshot of the stats. It's used for reporting "deltas" + // in the relevant counters. + this._prev = undefined; + } + fwk_app_on_show() { + console.log('show: ' + this.fwk_app_name); + this.fwk_app_on_update(); + } + fwk_app_on_hide() { + console.log('hide: ' + this.fwk_app_name); + } + fwk_app_on_update() { + if (this.fwk_app_visible) { + this._init(); + if (this._prev_update_sec === undefined) { + this._prev_update_sec = 0; + } + let now_sec = Fwk.now().sec; + if (now_sec - this._prev_update_sec > this._update_interval_sec()) { + this._prev_update_sec = now_sec; + this._init(); + this._load(); + } + } + } + static _counters = [ + 'queryRespConcurrentSetupCount', + 'queryRespConcurrentWaitCount', + 'queryRespConcurrentProcessingCount', + 'numQueries', + 'numJobs', + 'numResultFiles', + 'numResultMerges' + ]; + static _totals = [ + 'totalQueries', + 'totalJobs', + 'totalResultFiles', + 'totalResultMerges', + 'totalBytesRecv', + 'totalRowsRecv' + ]; + static _totals_data_rate = new Set(['totalBytesRecv']); + static _qdisppool_columns = ['priority', 'running', 'size']; + + _init() { + if (this._initialized === undefined) this._initialized = false; + if (this._initialized) return; + this._initialized = true; + let html = ` +
+
+
+
+ ${Common.html_update_ival('update-interval', 2)} +
+
+ + +
+
+
+
+
+
+
+ Loading... +
Submitted
` + query.queryBegin + `
-
-
- ${progress}% -
-
-
${scheduler}${elapsed}${left}${trend}
${query.completedChunks}/${query.totalChunks}
${performance}
${query.queryId}
- - - -
` + this._query2text(query.queryId, expanded) + `
` + this._query2text(query.queryId, expanded) + `
+ +
Loading...
+
+
+
+
+

Integrated Totals

+ + + + + + + + + + + + + + + + + + + + + + ` + _.reduce(QservCzarStatistics._totals, function(html, counter) { return html + ` + + + + + + + + + `; }, '') + ` + +
 s-1 Δs-1 
runTime
Loading...
     
${counter}
Loading...
+
+
+

Running Counters

+ + + + + + + + + + ` + _.reduce(QservCzarStatistics._counters, function(html, counter) { return html + ` + + + + + + `; }, '') + ` + +
 currentΔs-1
${counter}
Loading...
+
+
+

QdispPool

+ + + ` + _.reduce(QservCzarStatistics._qdisppool_columns, function(html, column) { return html + ` + `; }, '') + ` + + + +
${column}
+
+
+
+
+

Timing Histograms

+
+
+
+
+
+

Data Rates Histograms

+
+
+
+`; + let cont = this.fwk_app_container.html(html); + cont.find(".form-control-selector").change(() => { + this._load(); + }); + cont.find("button#reset-form").click(() => { + this._set_update_interval_sec(2); + this._load(); + }); + } + _form_control(elem_type, id) { + if (this._form_control_obj === undefined) this._form_control_obj = {}; + if (!_.has(this._form_control_obj, id)) { + this._form_control_obj[id] = this.fwk_app_container.find(elem_type + '#' + id); + } + return this._form_control_obj[id]; + } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } + _set_update_interval_sec(val) { this._form_control('select', 'update-interval').val(val); } + _table(name) { + if (_.isUndefined(this._table_obj)) this._table_obj = {}; + if (!_.has(this._table_obj, name)) { + this._table_obj[name] = this.fwk_app_container.find('table#fwk-qserv-czar-stats-' + name); + } + return this._table_obj[name]; + } + _status() { + if (_.isUndefined(this._status_obj)) { + this._status_obj = this._table('status').children('caption'); + } + return this._status_obj; + } + _set_counter(table, counter, val) { + this._set(table, counter, Number(val).toLocaleString()); + } + _set_counter_delta(table, counter, val) { + this._set(table, counter + "_delta", val == 0 ? '' : Number(val).toLocaleString()); + } + _set_counter_perf(table, counter, valAndUnit, suffix='') { + console.log(table, counter, valAndUnit); + const val_unit = valAndUnit.split(' '); + const val = val_unit[0]; + const unit = val_unit.length > 1 ? val_unit[1] : ''; + if (val == 0) { + this._set(table, counter + "_perf" + suffix, ''); + this._set(table, counter + "_unit" + suffix, ''); + } else { + this._set(table, counter + "_perf" + suffix, Number(val).toLocaleString()); + this._set(table, counter + "_unit" + suffix, unit); + } + } + _set(table, counter_id, val) { + if (_.isUndefined(this._counters_obj)) this._counters_obj = {}; + if (_.isUndefined(this._counters_obj[table])) this._counters_obj[table] = {}; + if (!_.has(this._counters_obj[table], counter_id)) { + this._counters_obj[table][counter_id] = this._table(table).children('tbody').find('#' + counter_id); + } + this._counters_obj[table][counter_id].text(val); + } + _load() { + if (this._loading === undefined) this._loading = false; + if (this._loading) return; + this._loading = true; + this._status().addClass('updating'); + Fwk.web_service_GET( + "/replication/qserv/master/status", + {version: Common.RestAPIVersion}, + (data) => { + if (data.success) { + this._display(data.status); + Fwk.setLastUpdate(this._status()); + } else { + console.log('request failed', this.fwk_app_name, data.error); + this._status().html('' + data.error + ''); + } + this._status().removeClass('updating'); + this._loading = false; + }, + (msg) => { + console.log('request failed', this.fwk_app_name, msg); + this._status().html('No Response'); + this._status().removeClass('updating'); + this._loading = false; + } + ); + } + _display(data) { + let tbody = this._table('qdisppool').children('tbody'); + if (_.isEmpty(data) || _.isEmpty(data.qdisp_stats) || _.isEmpty(data.qdisp_stats.QdispPool)) { + tbody.html(''); + return; + } + let that = this; + const runTimeSec = Math.round((data.qdisp_stats.snapshotTimeMs - data.qdisp_stats.startTimeMs) / 1000); + this._set('totals', 'runTime', QservCzarStatistics._elapsed(runTimeSec)); + _.each(QservCzarStatistics._totals, function(counter) { + that._set_counter('totals', counter, data.qdisp_stats[counter]); + if (runTimeSec > 0) { + const perf = data.qdisp_stats[counter] / runTimeSec; + if (QservCzarStatistics._totals_data_rate.has(counter)) { + that._set_counter_perf('totals', counter, QservCzarStatistics._format_data_rate(perf), '_sum'); + } else { + that._set_counter_perf('totals', counter, perf.toFixed(0), '_sum'); + } + } + if (!_.isUndefined(that._prev)) { + const deltaVal = data.qdisp_stats[counter] - that._prev.qdisp_stats[counter]; + that._set_counter_delta('totals', counter, deltaVal); + const deltaT = (data.qdisp_stats.snapshotTimeMs - that._prev.qdisp_stats.snapshotTimeMs) / 1000; + if (deltaT > 0) { + const perf = deltaVal / deltaT; + if (QservCzarStatistics._totals_data_rate.has(counter)) { + that._set_counter_perf('totals', counter, QservCzarStatistics._format_data_rate(perf)); + } else { + that._set_counter_perf('totals', counter, perf.toFixed(0)); + } + } + } + }); + _.each(QservCzarStatistics._counters, function(counter) { + that._set_counter('counters', counter, data.qdisp_stats[counter]); + if (!_.isUndefined(that._prev)) { + const deltaVal = data.qdisp_stats[counter] - that._prev.qdisp_stats[counter]; + that._set_counter_delta('counters', counter, deltaVal); + const deltaT = (data.qdisp_stats.snapshotTimeMs - that._prev.qdisp_stats.snapshotTimeMs) / 1000; + if (deltaT > 0) { + that._set_counter_perf('counters', counter, (deltaVal / deltaT).toFixed(0)); + } + } + }); + let html = ''; + _.each(data.qdisp_stats.QdispPool, function (row) { + html += ` +` + _.reduce(QservCzarStatistics._qdisppool_columns, function (html, column) { return html + ` +
${row[column]}
`; }, '') + ` +`; + }); + this._table('qdisppool').children('tbody').html(html); + + // Locate and display histograms nested in the top-level objects + this._table('timing').html(this._htmlgen_histograms( + _.reduce(data.qdisp_stats, function (histograms, e) { + if (_.isObject(e) && _.has(e, 'HistogramId')) histograms.push(e); + return histograms; + }, []) + )); + this._table('data').html(this._htmlgen_histograms( + _.reduce(data.transmit_stats, function (histograms, e) { + if (_.isObject(e) && _.has(e, 'HistogramId')) histograms.push(e); + return histograms; + }, []), + true + )); + this._prev = data; + } + _htmlgen_histograms(histograms, data_rate = false) { + return _.reduce(histograms, function (html, histogram) { + if (html == '') { + let idx = 0; + html = ` + + + id`; + if (data_rate) { + html += ` + avg`; + } else { + html += ` + total + totalCount + avg`; + } + html += _.reduce(histogram.buckets, function (html, bucket) { return html + ` + ${(idx++) == 0 ? "≤ " : ""}${QservCzarStatistics._format_bucket_limit(bucket.maxVal, data_rate)}`; }, '') + ` + + +`; + } + html += ` + + ${histogram.HistogramId}`; + if (data_rate) { + html += ` +
${Math.round(histogram.avg).toLocaleString()}
`; + } else { + html += ` +
${histogram.total.toFixed(3)}
+
${histogram.totalCount}
+
${histogram.avg.toFixed(3)}
`; + } + html += _.reduce(histogram.buckets, function (html, bucket) { return html + ` +
${bucket.count}
`; }, '') + ` + `; + return html; + }, '') + ` +`; + } + static _KB = 1000; + static _MB = 1000 * 1000; + static _GB = 1000 * 1000 * 1000; + static _format_bucket_limit(v, data_rate=false) { + if (isNaN(v)) return v; + if (data_rate) { + if (v < QservCzarStatistics._KB) return v + " B/s"; + else if (v < QservCzarStatistics._MB) return (v / QservCzarStatistics._KB).toFixed(0) + " KB/s"; + else if (v < QservCzarStatistics._GB) return (v / QservCzarStatistics._MB).toFixed(0) + " MB/s"; + return (v / QservCzarStatistics._GB).toFixed(0) + " GB/s"; + } + return v.toLocaleString(); + } + static _format_data_rate(v) { + if (v == 0) return v + ""; // as string + else if (v < QservCzarStatistics._KB * 10) return v.toFixed(0); + else if (v < QservCzarStatistics._MB * 10) return (v / QservCzarStatistics._KB).toFixed(0) + " KB"; + else if (v < QservCzarStatistics._GB * 10) return (v / QservCzarStatistics._MB).toFixed(0) + " MB"; + else return (v / QservCzarStatistics._GB).toFixed(0) + " GB"; + } + + /** + * @param {Number} seconds + * @returns {String} the amount of time elapsed by a query, formatted as: 'hh:mm:ss' + */ + static _elapsed(totalSeconds) { + let hours = Math.floor(totalSeconds / 3600); + let minutes = Math.floor((totalSeconds - 3600 * hours) / 60); + let seconds = (totalSeconds - 3600 * hours - 60 * minutes) % 60; + let displayHours = hours !== 0; + let displayMinutes = displayHours || minutes !== 0; + let displaySeconds = true; + return (displayHours ? (hours < 10 ? '0' : '') + hours + ':' : '') + + (displayMinutes ? (minutes < 10 ? '0' : '') + minutes + ':' : '') + + (displaySeconds ? (seconds < 10 ? '0' : '') + seconds : ''); + } + } + return QservCzarStatistics; +}); diff --git a/src/www/qserv/js/QservMonitoringDashboard.js b/src/www/qserv/js/QservMonitoringDashboard.js index 2995c3f47c..e0fd22fa6b 100644 --- a/src/www/qserv/js/QservMonitoringDashboard.js +++ b/src/www/qserv/js/QservMonitoringDashboard.js @@ -43,6 +43,7 @@ require([ 'qserv/StatusReplicationLevel', 'qserv/StatusWorkers', 'qserv/QservCzarMySQLQueries', + 'qserv/QservCzarStatistics', 'qserv/QservCss', 'qserv/QservMySQLConnections', 'qserv/QservWorkerMySQLQueries', @@ -84,6 +85,7 @@ function(CSSLoader, StatusReplicationLevel, StatusWorkers, QservCzarMySQLQueries, + QservCzarStatistics, QservCss, QservMySQLConnections, QservWorkerMySQLQueries, @@ -150,6 +152,7 @@ function(CSSLoader, { name: 'Czar', apps: [ new QservCzarMySQLQueries('MySQL Queries'), + new QservCzarStatistics('Statistics'), new QservCss('CSS') ] }, From dc3b3b3adaa3ec411ec5ef9a71fab47e185cef2c Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Fri, 1 Sep 2023 16:22:21 -0700 Subject: [PATCH 15/15] Web Dashboard: added a page for displaying query progress at Czar Added a link (an additional column on the active queries table) to the newely added Query progression plot. Fixed minor layout bugs on the active and past query tables. Loading the Highcharts.js library on demand. --- src/www/dashboard.html | 2 + src/www/qserv/css/QservCzarQueryProgress.css | 60 ++++ src/www/qserv/js/QservCzarQueryProgress.js | 301 +++++++++++++++++++ src/www/qserv/js/QservMonitoringDashboard.js | 16 +- src/www/qserv/js/StatusActiveQueries.js | 14 +- src/www/qserv/js/StatusPastQueries.js | 4 +- 6 files changed, 388 insertions(+), 9 deletions(-) create mode 100644 src/www/qserv/css/QservCzarQueryProgress.css create mode 100644 src/www/qserv/js/QservCzarQueryProgress.js diff --git a/src/www/dashboard.html b/src/www/dashboard.html index 154f4288d7..437f2690d9 100644 --- a/src/www/dashboard.html +++ b/src/www/dashboard.html @@ -4,7 +4,9 @@ Qserv monitoring dashboard + + diff --git a/src/www/qserv/css/QservCzarQueryProgress.css b/src/www/qserv/css/QservCzarQueryProgress.css new file mode 100644 index 0000000000..f182539c67 --- /dev/null +++ b/src/www/qserv/css/QservCzarQueryProgress.css @@ -0,0 +1,60 @@ +#fwk-qserv-czar-query-prog-controls label { + font-weight: bold; +} +table#fwk-qserv-czar-query-prog-status { + margin:0; +} +table#fwk-qserv-czar-query-prog-status caption { + caption-side: top; + text-align: right; + padding-top: 0; +} +table#fwk-qserv-czar-query-prog-status caption.updating { + background-color: #ffeeba; +} + +#fwk-qserv-czar-query-prog-status-queries { + height: 712px; +} +.highcharts-figure, +.highcharts-data-table table { + min-width: 310px; + max-width: 800px; + margin: 1em auto; +} + +.highcharts-data-table table { + font-family: Verdana, sans-serif; + border-collapse: collapse; + border: 1px solid #ebebeb; + margin: 10px auto; + text-align: center; + width: 100%; + max-width: 500px; +} + +.highcharts-data-table caption { + padding: 1em 0; + font-size: 1.2em; + color: #555; +} + +.highcharts-data-table th { + font-weight: 600; + padding: 0.5em; +} + +.highcharts-data-table td, +.highcharts-data-table th, +.highcharts-data-table caption { + padding: 0.5em; +} + +.highcharts-data-table thead tr, +.highcharts-data-table tr:nth-child(even) { + background: #f8f8f8; +} + +.highcharts-data-table tr:hover { + background: #f1f7ff; +} diff --git a/src/www/qserv/js/QservCzarQueryProgress.js b/src/www/qserv/js/QservCzarQueryProgress.js new file mode 100644 index 0000000000..032bdbd41b --- /dev/null +++ b/src/www/qserv/js/QservCzarQueryProgress.js @@ -0,0 +1,301 @@ +define([ + 'webfwk/CSSLoader', + 'webfwk/Fwk', + 'webfwk/FwkApplication', + 'qserv/Common', + 'underscore', + 'highcharts', + 'highcharts/modules/exporting', + 'highcharts/modules/accessibility'], + +function(CSSLoader, + Fwk, + FwkApplication, + Common, + _, + Highcharts) { + + CSSLoader.load('qserv/css/QservCzarQueryProgress.css'); + + class QservCzarQueryProgress extends FwkApplication { + + constructor(name) { + super(name); + this._data = undefined; + this._queries_chart = undefined; + } + fwk_app_on_show() { + console.log('show: ' + this.fwk_app_name); + this.fwk_app_on_update(); + } + fwk_app_on_hide() { + console.log('hide: ' + this.fwk_app_name); + } + fwk_app_on_update() { + if (this.fwk_app_visible) { + this._init(); + if (this._prev_update_sec === undefined) { + this._prev_update_sec = 0; + } + let now_sec = Fwk.now().sec; + if (now_sec - this._prev_update_sec > this._update_interval_sec()) { + this._prev_update_sec = now_sec; + this._init(); + this._load(); + } + } + } + /// Set the identifier and begin loading the query info in the background. + set_query_id(query_id) { + this._init(); + this._set_query_ids([query_id]); // To get the minimally-polulated selector + this._set_query_id(query_id); + this._set_last_seconds(24 * 3600); // Track the known history (if any) of the query + this._load(); + } + _init() { + if (this._initialized === undefined) this._initialized = false; + if (this._initialized) return; + this._initialized = true; + const lastMinutes = [1, 3, 5, 15, 30, 45]; + const lastHours = [1, 2, 4, 8, 12, 16, 20, 24]; + let html = ` +
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ ${Common.html_update_ival('update-interval', 10)} +
+
+ + +
+
+
+
+
+
+ + +
Loading...
+
+
+
+
+
+
+
+`; + let cont = this.fwk_app_container.html(html); + cont.find('[data-toggle="tooltip"]').tooltip(); + this._set_last_seconds(900); + cont.find(".form-control-selector").change(() => { + this._load(); + }); + cont.find(".form-control-viewer").change(() => { + if (_.isUndefined(this._data)) this._load(); + else this._display(this._data.queries); + }); + cont.find("button#reset-form").click(() => { + this._set_update_interval_sec(10); + this._set_query_id(0); + this._set_last_seconds(15 * 60); + this._set_vertical_scale('logarithmic'); + this._set_horizontal_scale(''); + this._load(); + }); + } + _form_control(elem_type, id) { + if (this._form_control_obj === undefined) this._form_control_obj = {}; + if (!_.has(this._form_control_obj, id)) { + this._form_control_obj[id] = this.fwk_app_container.find(elem_type + '#' + id); + } + return this._form_control_obj[id]; + } + _update_interval_sec() { return this._form_control('select', 'update-interval').val(); } + _set_update_interval_sec(val) { this._form_control('select', 'update-interval').val(val); } + _query_id() { return this._form_control('select', 'query-id').val(); } + _set_query_id(val) { + this._form_control('select', 'query-id').val(val); + } + _set_query_ids(queries) { + const prev_query = this._query_id(); + let html = ''; + for (let i in queries) { + const query = queries[i]; + const selected = (_.isEmpty(prev_query) && (i === 0)) || + (!_.isEmpty(prev_query) && (prev_query === query)); + html += ` +`; + } + this._form_control('select', 'query-id').html(html); + } + _vertical_scale() { return this._form_control('select', 'vertical-scale').val(); } + _set_vertical_scale(val) { this._form_control('select', 'vertical-scale').val(val); } + _horizontal_scale() { return this._form_control('select', 'horizontal-scale').val(); } + _set_horizontal_scale(val) { this._form_control('select', 'horizontal-scale').val(val); } + _last_seconds() { return this._form_control('select', 'last-seconds').val(); } + _set_last_seconds(val) { this._form_control('select', 'last-seconds').val(val); } + _table(name) { + if (_.isUndefined(this._table_obj)) this._table_obj = {}; + if (!_.has(this._table_obj, name)) { + this._table_obj[name] = this.fwk_app_container.find('table#fwk-qserv-czar-query-prog-' + name); + } + return this._table_obj[name]; + } + _status() { + if (_.isUndefined(this._status_obj)) { + this._status_obj = this._table('status').children('caption'); + } + return this._status_obj; + } + _queries() { + if (_.isUndefined(this._queries_obj)) { + this._queries_obj = this.fwk_app_container.find('canvas#queries'); + } + return this._queries_obj; + } + _load() { + if (this._loading === undefined) this._loading = false; + if (this._loading) return; + this._loading = true; + this._status().addClass('updating'); + Fwk.web_service_GET( + "replication/qserv/master/queries/active/progress", + { version: Common.RestAPIVersion, + query_id: this._query_id(), + last_seconds: this._last_seconds() + }, + (data) => { + if (data.success) { + this._data = data; + this._display(data.queries); + Fwk.setLastUpdate(this._status()); + } else { + console.log('request failed', this.fwk_app_name, data.error); + this._status().html('' + data.error + ''); + } + this._status().removeClass('updating'); + this._loading = false; + }, + (msg) => { + console.log('request failed', this.fwk_app_name, msg); + this._status().html('No Response'); + this._status().removeClass('updating'); + this._loading = false; + } + ); + } + _display(queries) { + const query_ids = _.keys(queries); + query_ids.sort(); + query_ids.reverse(); + this._set_query_ids(query_ids); // Update a collection of queries in the selector. + // Add a small delta to the points to allow seeing zeroes on the log scale, + // in case the one was requested. + const valueDeltaForLogScale = this._vertical_scale() === 'linear' ? 0 : 0.1; + let series = []; + for (let qid in queries) { + let points = []; + let query_data = queries[qid]; + for (let i in query_data) { + const point = query_data[i]; + // +1 hr is needed for correcting timestamp mismatch between UNIX and JS timing + const timestampSec = point[0] / 1000 + 3600; + let x = new Date(0); + x.setSeconds(timestampSec); + points.push([x.getTime(), point[1] + valueDeltaForLogScale]); + } + series.push({ + name: qid, + data: points, + animation: { + enabled: false + } + }); + } + if (!_.isUndefined(this._queries_chart)) { + this._queries_chart.destroy(); + } + this._queries_chart = Highcharts.chart('fwk-qserv-czar-query-prog-status-queries', { + chart: { + type: 'line' + }, + title: { + text: '# Unfinished Jobs' + }, + subtitle: { + text: '< 24 hours' + }, + xAxis: { + type: 'datetime', + title: { + text: 'Time' + }, + // If auto-zoom is not enabled the plot will go all the way through + // the (viewer's) current time on the right. + max: this._horizontal_scale() === 'auto-zoom-in' ? undefined : new Date().setSeconds(0) + }, + yAxis: { + type: this._vertical_scale(), + title: { + text: '# jobs' + } + }, + tooltip: { + headerFormat: '{series.name}
', + pointFormat: '{point.x:%e. %b}: {point.y:.2f} jobs' + }, + time: { + // To ensure the time stamps are displaye din the (viewer's) local timezone. + timezoneOffset: new Date().getTimezoneOffset() + }, + plotOptions: { + series: { + marker: { + fillColor: '#dddddd', + lineWidth: 2, + lineColor: null + } + } + }, + colors: ['#6CF', '#39F', '#06C', '#036', '#000'], + series: series + }); + } + } + return QservCzarQueryProgress; +}); diff --git a/src/www/qserv/js/QservMonitoringDashboard.js b/src/www/qserv/js/QservMonitoringDashboard.js index e0fd22fa6b..22318a3874 100644 --- a/src/www/qserv/js/QservMonitoringDashboard.js +++ b/src/www/qserv/js/QservMonitoringDashboard.js @@ -5,10 +5,17 @@ require.config({ waitSeconds: 15, urlArgs: "bust="+new Date().getTime(), + packages: [{ + name: 'highcharts', + main: 'highcharts' + }], + paths: { 'jquery': 'https://code.jquery.com/jquery-3.3.1', 'bootstrap': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.bundle', 'underscore': 'https://underscorejs.org/underscore-umd-min', + 'chartjs': 'https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.0/chart.umd.min', + 'highcharts': 'https://code.highcharts.com', 'webfwk': 'webfwk/js', 'qserv': 'qserv/js', 'modules': 'modules/js' @@ -19,13 +26,7 @@ require.config({ }, 'bootstrap': { 'deps': ['jquery','underscore'] - },/* - 'webfwk/*': { - 'deps': ['underscore'] }, - 'qserv/*': { - 'deps': ['underscore'] - },*/ 'underscore': { 'exports': '_' } @@ -44,6 +45,7 @@ require([ 'qserv/StatusWorkers', 'qserv/QservCzarMySQLQueries', 'qserv/QservCzarStatistics', + 'qserv/QservCzarQueryProgress', 'qserv/QservCss', 'qserv/QservMySQLConnections', 'qserv/QservWorkerMySQLQueries', @@ -86,6 +88,7 @@ function(CSSLoader, StatusWorkers, QservCzarMySQLQueries, QservCzarStatistics, + QservCzarQueryProgress, QservCss, QservMySQLConnections, QservWorkerMySQLQueries, @@ -153,6 +156,7 @@ function(CSSLoader, apps: [ new QservCzarMySQLQueries('MySQL Queries'), new QservCzarStatistics('Statistics'), + new QservCzarQueryProgress('Query Progress'), new QservCss('CSS') ] }, diff --git a/src/www/qserv/js/StatusActiveQueries.js b/src/www/qserv/js/StatusActiveQueries.js index 0f4abaa6c7..7978d1be43 100644 --- a/src/www/qserv/js/StatusActiveQueries.js +++ b/src/www/qserv/js/StatusActiveQueries.js @@ -108,6 +108,7 @@ function(CSSLoader, QID + Query @@ -185,6 +186,7 @@ function(CSSLoader, const queryToggleTitle = "Click to toggle query formatting."; const queryCopyTitle = "Click to copy the query text to the clipboard."; const queryInspectTitle = "Click to see detailed info (progress, messages, etc.) on the query."; + const queryProgressTitle = "Click to see query progression plot."; const queryStyle = "color:#4d4dff;"; let html = ''; for (let i in data.queries) { @@ -227,9 +229,12 @@ function(CSSLoader, - + + + +
` + this._query2text(query.queryId, expanded) + `
`; } @@ -257,10 +262,17 @@ function(CSSLoader, Fwk.find("Status", "Query Inspector").set_query_id(queryId); Fwk.show("Status", "Query Inspector"); }; + let displayQueryProgress = function(e) { + let button = $(e.currentTarget); + let queryId = button.parent().parent().attr("id"); + Fwk.find("Czar", "Query Progress").set_query_id(queryId); + Fwk.show("Czar", "Query Progress"); + }; let tbodyQueries = this._table().children('tbody').html(html); tbodyQueries.find("td.query_toggler").click(toggleQueryDisplay); tbodyQueries.find("button.copy-query").click(copyQueryToClipboard); tbodyQueries.find("button.inspect-query").click(displayQuery); + tbodyQueries.find("button.query-progress").click(displayQueryProgress); } /** diff --git a/src/www/qserv/js/StatusPastQueries.js b/src/www/qserv/js/StatusPastQueries.js index 85a937cabf..0326a1ddbe 100644 --- a/src/www/qserv/js/StatusPastQueries.js +++ b/src/www/qserv/js/StatusPastQueries.js @@ -314,10 +314,10 @@ function(CSSLoader,
${query.collectedRows}
${query.finalRows}
${query.queryId}
- + - +
` + this._query2text(query.queryId, expanded) + `