forked from yandex/go-hasql
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnode_checker.go
189 lines (159 loc) · 5.2 KB
/
node_checker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/*
Copyright 2020 YANDEX LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hasql
import (
"context"
"math"
"time"
)
// NodeRole represents role of node in SQL cluster (usually primary/standby)
type NodeRole uint8
const (
// NodeRoleUnknown used to report node with unconvetional role in cluster
NodeRoleUnknown NodeRole = iota
// NodeRolePrimary used to report node with primary role in cluster
NodeRolePrimary
// NodeRoleStandby used to report node with standby role in cluster
NodeRoleStandby
)
// NodeInfoProvider information about single cluster node
type NodeInfoProvider interface {
// Role reports role of node in cluster.
// For SQL servers it is usually either primary or standby
Role() NodeRole
}
// NodeInfo implements NodeInfoProvider with additional useful information
var _ NodeInfoProvider = NodeInfo{}
// NodeInfo contains various information about single cluster node
type NodeInfo struct {
// Role contains determined node's role in cluster
ClusterRole NodeRole
// Latency stores time that has been spent to send check request
// and receive response from server
NetworkLatency time.Duration
// ReplicaLag represents how far behind is data on standby
// in comparison to primary. As determination of real replication
// lag is a tricky task and value type vary from one DBMS to another
// (e.g. bytes count lag, time delta lag etc.) this field contains
// abstract value for sorting purposes only
ReplicaLag int
}
// Role reports determined role of node in cluster
func (n NodeInfo) Role() NodeRole {
return n.ClusterRole
}
// Latency reports time spend on query execution from client's point of view.
// It can be used in LatencyNodePicker to determine node with fastest response time
func (n NodeInfo) Latency() time.Duration {
return n.NetworkLatency
}
// ReplicationLag reports data replication delta on standby.
// It can be used in ReplicationNodePicker to determine node with most up-to-date data
func (n NodeInfo) ReplicationLag() int {
return n.ReplicaLag
}
// NodeChecker is a function that can perform request to SQL node and retrieve various information
type NodeChecker func(context.Context, Querier) (NodeInfoProvider, error)
// PostgreSQLChecker checks state on PostgreSQL node.
// It reports appropriate information for PostgreSQL nodes version 10 and higher
func PostgreSQLChecker(ctx context.Context, db Querier) (NodeInfoProvider, error) {
start := time.Now()
var role NodeRole
var replicationLag *int
err := db.
QueryRowContext(ctx, `
SELECT
((pg_is_in_recovery())::int + 1) AS role,
pg_last_wal_receive_lsn() - pg_last_wal_replay_lsn() AS replication_lag
;
`).
Scan(&role, &replicationLag)
if err != nil {
return nil, err
}
latency := time.Since(start)
// determine proper replication lag value
// by default we assume that replication is not started - hence maximum int value
// see: https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-RECOVERY-CONTROL
lag := math.MaxInt
if replicationLag != nil {
// use reported non-null replication lag
lag = *replicationLag
}
if role == NodeRolePrimary {
// primary node has no replication lag
lag = 0
}
return NodeInfo{
ClusterRole: role,
NetworkLatency: latency,
ReplicaLag: lag,
}, nil
}
// MySQLChecker checks state of MySQL node.
// ATTENTION: database user must have REPLICATION CLIENT privilege to perform underlying query.
func MySQLChecker(ctx context.Context, db Querier) (NodeInfoProvider, error) {
start := time.Now()
rows, err := db.QueryContext(ctx, "SHOW SLAVE STATUS")
if err != nil {
return nil, err
}
defer func() { _ = rows.Close() }()
latency := time.Since(start)
// only standby MySQL server will return rows for `SHOW SLAVE STATUS` query
isStandby := rows.Next()
// TODO: check SECONDS_BEHIND_MASTER row for "replication lag"
if err := rows.Err(); err != nil {
return nil, err
}
role := NodeRoleStandby
lag := math.MaxInt
if !isStandby {
role = NodeRolePrimary
lag = 0
}
return NodeInfo{
ClusterRole: role,
NetworkLatency: latency,
ReplicaLag: lag,
}, nil
}
// MSSQLChecker checks state of MSSQL node
func MSSQLChecker(ctx context.Context, db Querier) (NodeInfoProvider, error) {
start := time.Now()
var isPrimary bool
err := db.
QueryRowContext(ctx, `
SELECT
IIF(count(database_guid) = 0, 'TRUE', 'FALSE') AS STATUS
FROM sys.database_recovery_status
WHERE database_guid IS NULL
`).
Scan(&isPrimary)
if err != nil {
return nil, err
}
latency := time.Since(start)
role := NodeRoleStandby
// TODO: proper replication lag calculation
lag := math.MaxInt
if isPrimary {
role = NodeRolePrimary
lag = 0
}
return NodeInfo{
ClusterRole: role,
NetworkLatency: latency,
ReplicaLag: lag,
}, nil
}