Skip to content

Commit

Permalink
dragonboat: added Prometheus health metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
lni committed Jun 24, 2019
1 parent d207acf commit 2d884ea
Show file tree
Hide file tree
Showing 12 changed files with 391 additions and 59 deletions.
5 changes: 5 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,11 @@ type NodeHostConfig struct {
// instance for exchanging Raft message between NodeHost instances. The default
// zero value causes the built-in TCP based RPC module to be used.
RaftRPCFactory RaftRPCFactoryFunc
// EnableMetrics determines whether health metrics in Prometheus format should
// be enabled.
EnableMetrics bool
// RaftEventListener is the listener to get notified for certain Raft events.
RaftEventListener raftio.IRaftEventListener
}

// Validate validates the NodeHostConfig instance and return an error when
Expand Down
139 changes: 139 additions & 0 deletions event.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Copyright 2017-2019 Lei Ni ([email protected]) and other Dragonboat authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package dragonboat

import (
"fmt"
"io"
"sync/atomic"

"github.com/VictoriaMetrics/metrics"
"github.com/lni/dragonboat/v3/internal/server"
"github.com/lni/dragonboat/v3/raftio"
)

// WriteHealthMetrics writes all health metrics in Prometheus format to the
// specified writer. This function is typically called by the metrics http
// handler.
func WriteHealthMetrics(w io.Writer) {
metrics.WritePrometheus(w, false)
}

type raftEventListener struct {
clusterID uint64
nodeID uint64
leaderID *uint64
metrics bool
isLeader *metrics.Gauge
campaignLaunched *metrics.Counter
campaignSkipped *metrics.Counter
snapshotRejected *metrics.Counter
replicationRejected *metrics.Counter
proposalDropped *metrics.Counter
readIndexDropped *metrics.Counter
userListener raftio.IRaftEventListener
}

func newRaftEventListener(clusterID uint64, nodeID uint64,
leaderID *uint64, useMetrics bool,
userListener raftio.IRaftEventListener) *raftEventListener {
el := &raftEventListener{
clusterID: clusterID,
nodeID: nodeID,
leaderID: leaderID,
metrics: useMetrics,
userListener: userListener,
}
if useMetrics {
label := fmt.Sprintf(`{clusterid="%d",nodeid="%d"}`, clusterID, nodeID)
name := fmt.Sprintf(`campaign_launched%s`, label)
campaignLaunched := metrics.GetOrCreateCounter(name)
name = fmt.Sprintf(`campaign_skipped%s`, label)
campaignSkipped := metrics.GetOrCreateCounter(name)
name = fmt.Sprintf(`snapshot_rejected%s`, label)
snapshotRejected := metrics.GetOrCreateCounter(name)
name = fmt.Sprintf(`replication_rejected%s`, label)
replicationRejected := metrics.GetOrCreateCounter(name)
name = fmt.Sprintf(`proposal_dropped%s`, label)
proposalDropped := metrics.GetOrCreateCounter(name)
name = fmt.Sprintf(`read_index_dropped%s`, label)
readIndexDropped := metrics.GetOrCreateCounter(name)
name = fmt.Sprintf(`is_leader%s`, label)
isLeader := metrics.GetOrCreateGauge(name, func() float64 {
if atomic.LoadUint64(leaderID) == nodeID {
return 1.0
}
return 0.0
})

el.isLeader = isLeader
el.campaignLaunched = campaignLaunched
el.campaignSkipped = campaignSkipped
el.snapshotRejected = snapshotRejected
el.replicationRejected = replicationRejected
el.proposalDropped = proposalDropped
el.readIndexDropped = readIndexDropped
}
return el
}

func (e *raftEventListener) LeaderUpdated(info server.LeaderInfo) {
atomic.StoreUint64(e.leaderID, info.LeaderID)
if e.userListener != nil {
ui := raftio.LeaderInfo{
ClusterID: info.ClusterID,
NodeID: info.NodeID,
Term: info.Term,
LeaderID: info.LeaderID,
}
go e.userListener.LeaderUpdated(ui)
}
}

func (e *raftEventListener) CampaignLaunched(info server.CampaignInfo) {
if e.metrics {
e.campaignLaunched.Add(1)
}
}

func (e *raftEventListener) CampaignSkipped(info server.CampaignInfo) {
if e.metrics {
e.campaignSkipped.Add(1)
}
}

func (e *raftEventListener) SnapshotRejected(info server.SnapshotInfo) {
if e.metrics {
e.snapshotRejected.Add(1)
}
}

func (e *raftEventListener) ReplicationRejected(info server.ReplicationInfo) {
if e.metrics {
e.replicationRejected.Add(1)
}
}

func (e *raftEventListener) ProposalDropped(info server.ProposalInfo) {
if e.metrics {
e.proposalDropped.Add(len(info.Entries))
}
}

func (e *raftEventListener) ReadIndexDropped(info server.ReadIndexInfo) {
if e.metrics {
e.readIndexDropped.Add(1)
}
}
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
module github.com/lni/dragonboat/v3

require github.com/golang/protobuf v1.2.0
require (
github.com/VictoriaMetrics/metrics v1.5.0
github.com/golang/protobuf v1.2.0
)
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
github.com/VictoriaMetrics/metrics v1.5.0 h1:WvQqPn+z9pR1U7J58CgaGiWrN8phNGSpr2xUSxJnfpE=
github.com/VictoriaMetrics/metrics v1.5.0/go.mod h1:QZAL5yLaXvhSPeib0ahluGo9VK0HXDZHovKaKlpuWvs=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/valyala/fastrand v1.0.0 h1:LUKT9aKer2dVQNUi3waewTbKV+7H17kvWFNKs2ObdkI=
github.com/valyala/fastrand v1.0.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ=
github.com/valyala/histogram v1.0.1 h1:FzA7n2Tz/wKRMejgu3PV1vw3htAklTjjuoI6z3d4KDg=
github.com/valyala/histogram v1.0.1/go.mod h1:lQy0xA4wUz2+IUnf97SivorsJIp8FxsnRd6x25q7Mto=
17 changes: 4 additions & 13 deletions internal/raft/peer.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ package raft

import (
"sort"
"sync/atomic"

"github.com/lni/dragonboat/v3/config"
"github.com/lni/dragonboat/v3/internal/server"
pb "github.com/lni/dragonboat/v3/raftpb"
)

Expand All @@ -56,18 +56,18 @@ type PeerAddress struct {
// Peer is the interface struct for interacting with the underlying Raft
// protocol implementation.
type Peer struct {
leaderID uint64
raft *raft
prevState pb.State
}

// Launch starts or restarts a Raft node.
func Launch(config *config.Config, logdb ILogDB,
func Launch(config *config.Config,
logdb ILogDB, events server.IRaftEventListener,
addresses []PeerAddress, initial bool, newNode bool) *Peer {
checkLaunchRequest(config, addresses, initial, newNode)
r := newRaft(config, logdb)
rc := &Peer{raft: r}
rc.raft.recordLeader = rc.recordLeader
rc.raft.events = events
_, lastIndex := logdb.GetRange()
if newNode && !config.IsObserver {
r.becomeFollower(1, NoLeader)
Expand Down Expand Up @@ -266,11 +266,6 @@ func (rc *Peer) DumpRaftInfoToLog(addrMap map[uint64]string) {
rc.raft.dumpRaftInfoToLog(addrMap)
}

// GetLeaderID returns the leader id.
func (rc *Peer) GetLeaderID() uint64 {
return atomic.LoadUint64(&rc.leaderID)
}

// NotifyRaftLastApplied passes on the lastApplied index confirmed by the RSM to
// the raft state machine.
func (rc *Peer) NotifyRaftLastApplied(lastApplied uint64) {
Expand All @@ -283,10 +278,6 @@ func (rc *Peer) HasEntryToApply() bool {
return rc.entryLog().hasEntriesToApply()
}

func (rc *Peer) recordLeader(leaderID uint64) {
atomic.StoreUint64(&rc.leaderID, leaderID)
}

func (rc *Peer) entryLog() *entryLog {
return rc.raft.log
}
Expand Down
Loading

0 comments on commit 2d884ea

Please sign in to comment.