diff --git a/pkg/vitess/api_client.go b/pkg/vitess/api_client.go index f1b252e0..87cc66c8 100644 --- a/pkg/vitess/api_client.go +++ b/pkg/vitess/api_client.go @@ -15,11 +15,25 @@ import ( const defaultTimeout = time.Duration(5) * time.Second +// TabletRealtimeStats represents realtime stats from a running instance of vttablet. +type TabletRealtimeStats struct { + HealthError string `json:"health_error,omitempty"` +} + +// TabletStats represents stats from a running instance of vttablet. +type TabletStats struct { + LastError string `json:"last_error,omitempty"` + Realtime *TabletRealtimeStats `json:"realtime,omitempty"` + Serving bool `json:"serving,omitempty"` + Up bool `json:"up,omitempty"` +} + // Tablet represents information about a running instance of vttablet. type Tablet struct { Alias *topodata.TabletAlias `json:"alias,omitempty"` MysqlHostname string `json:"mysql_hostname,omitempty"` MysqlPort int32 `json:"mysql_port,omitempty"` + Stats *TabletStats `json:"stats,omitempty"` Type topodata.TabletType `json:"type,omitempty"` } @@ -36,9 +50,23 @@ func (t Tablet) HasValidCell(validCells []string) bool { return false } +// IsServeable returns a bool reflecting if a tablet is eligible to serve traffic based on tablet stats. For +// backwards-compatibilty tablets are assumed to be healthy if realtime stats is disabled. This method aims +// to mimic the logic used by vtgate to select tablets for read queries without considering 'serving', minimum +// tablet count (not important to freno) and replication lag (freno polls its own replication lag) +func (t Tablet) IsServeable() bool { + if t.Stats != nil { + return t.Stats.LastError == "" && t.Stats.Realtime != nil + } + return true +} + // IsValidReplica returns a bool reflecting if a tablet type is REPLICA func (t Tablet) IsValidReplica() bool { - return t.Type == topodata.TabletType_REPLICA + if t.Type != topodata.TabletType_REPLICA { + return false + } + return t.IsServeable() } var httpClient = http.Client{ diff --git a/pkg/vitess/api_client_test.go b/pkg/vitess/api_client_test.go index 31682317..97a753d2 100644 --- a/pkg/vitess/api_client_test.go +++ b/pkg/vitess/api_client_test.go @@ -2,7 +2,6 @@ package vitess import ( "encoding/json" - "fmt" "net/http" "net/http/httptest" "testing" @@ -16,44 +15,68 @@ func TestParseTablets(t *testing.T) { vitessApi := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.String() { case "/api/keyspace/test/tablets/00", "/api/keyspace/test/tablets/00?cells=cell2": - data, _ := json.Marshal([]Tablet{ + json.NewEncoder(w).Encode([]Tablet{ { - Alias: &topodata.TabletAlias{Cell: "cell1"}, - MysqlHostname: "master", - Type: topodata.TabletType_MASTER, + // primary (should be ignored) + Alias: &topodata.TabletAlias{Cell: "cell1"}, + Type: topodata.TabletType_MASTER, }, { + // replica without realtime tablet stats enabled (assumed to be healthy) Alias: &topodata.TabletAlias{Cell: "cell2"}, MysqlHostname: "replica1", Type: topodata.TabletType_REPLICA, }, { + // replica with healthy realtime tablet stats Alias: &topodata.TabletAlias{Cell: "cell3"}, MysqlHostname: "replica2", - Type: topodata.TabletType_REPLICA, + Stats: &TabletStats{ + Realtime: &TabletRealtimeStats{}, + }, + Type: topodata.TabletType_REPLICA, + }, + { + // replica with nil realtime stats (should be ignored) + Alias: &topodata.TabletAlias{Cell: "cell1"}, + MysqlHostname: "replica3", + Stats: &TabletStats{ + Realtime: nil, + }, }, { + // replica with realtime tablet stats and 'replication not running' error (should be ignored) Alias: &topodata.TabletAlias{Cell: "cell2"}, - MysqlHostname: "spare", - Type: topodata.TabletType_SPARE, + MysqlHostname: "replica4", + Stats: &TabletStats{ + LastError: "vttablet error: replication is not running", + Realtime: &TabletRealtimeStats{ + HealthError: "replication is not running", + }, + }, + Type: topodata.TabletType_REPLICA, }, { - Alias: &topodata.TabletAlias{Cell: "cell3"}, - MysqlHostname: "batch", - Type: topodata.TabletType_BATCH, + // spare tablet (should be ignored) + Alias: &topodata.TabletAlias{Cell: "cell2"}, + Type: topodata.TabletType_SPARE, }, { - Alias: &topodata.TabletAlias{Cell: "cell2"}, - MysqlHostname: "backup", - Type: topodata.TabletType_BACKUP, + // batch tablet (should be ignored) + Alias: &topodata.TabletAlias{Cell: "cell3"}, + Type: topodata.TabletType_BATCH, }, { - Alias: &topodata.TabletAlias{Cell: "cell1"}, - MysqlHostname: "restore", - Type: topodata.TabletType_RESTORE, + // backup tablet (should be ignored) + Alias: &topodata.TabletAlias{Cell: "cell2"}, + Type: topodata.TabletType_BACKUP, + }, + { + // restore tablet (should be ignored) + Alias: &topodata.TabletAlias{Cell: "cell1"}, + Type: topodata.TabletType_RESTORE, }, }) - fmt.Fprint(w, string(data)) default: w.WriteHeader(http.StatusNotFound) }