Skip to content

Commit

Permalink
feat: prometheus (#201)
Browse files Browse the repository at this point in the history
  • Loading branch information
ToniRamirezM authored Feb 24, 2025
1 parent 061c6dd commit 47b4945
Show file tree
Hide file tree
Showing 12 changed files with 1,252 additions and 5 deletions.
16 changes: 15 additions & 1 deletion aggsender/aggsender.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
zkevm "github.com/agglayer/aggkit"
"github.com/agglayer/aggkit/agglayer"
"github.com/agglayer/aggkit/aggsender/db"
"github.com/agglayer/aggkit/aggsender/metrics"
aggsenderrpc "github.com/agglayer/aggkit/aggsender/rpc"
"github.com/agglayer/aggkit/aggsender/types"
"github.com/agglayer/aggkit/bridgesync"
Expand Down Expand Up @@ -125,6 +126,7 @@ func (a *AggSender) GetRPCServices() []jRPC.Service {
// Start starts the AggSender
func (a *AggSender) Start(ctx context.Context) {
a.log.Info("AggSender started")
metrics.Register()
a.status.Start(time.Now().UTC())
a.checkInitialStatus(ctx)
a.sendCertificates(ctx, 0)
Expand Down Expand Up @@ -231,6 +233,7 @@ func (a *AggSender) sendCertificate(ctx context.Context) (*agglayer.SignedCertif
return nil, nil
}

start := time.Now()
lastL2BlockSynced, err := a.l2Syncer.GetLastProcessedBlock(ctx)
if err != nil {
return nil, fmt.Errorf("error getting last processed block from l2: %w", err)
Expand Down Expand Up @@ -301,6 +304,8 @@ func (a *AggSender) sendCertificate(ctx context.Context) (*agglayer.SignedCertif
return nil, fmt.Errorf("forbidden to send certificate due epoch percentage")
}

metrics.CertificateBuildTime(time.Since(start).Seconds())

a.saveCertificateToFile(signedCertificate)
a.log.Infof("certificate ready to be sent to AggLayer: %s", signedCertificate.Brief())
if a.cfg.DryRun {
Expand All @@ -312,7 +317,8 @@ func (a *AggSender) sendCertificate(ctx context.Context) (*agglayer.SignedCertif
return nil, fmt.Errorf("error sending certificate: %w", err)
}

a.log.Debugf("certificate sent: Height: %d cert: %s", signedCertificate.Height, signedCertificate.Brief())
metrics.CertificateSent()
a.log.Debugf("certificate send: Height: %d cert: %s", signedCertificate.Height, signedCertificate.Brief())

raw, err := json.Marshal(signedCertificate)
if err != nil {
Expand Down Expand Up @@ -787,6 +793,13 @@ func (a *AggSender) updateCertificateStatus(ctx context.Context,
localCert.ID(), localCert.Status, agglayerCert.Status, localCert.ElapsedTimeSinceCreation(),
agglayerCert.String())

switch agglayerCert.Status {
case agglayer.Settled:
metrics.Settled()
case agglayer.InError:
metrics.InError()
}

// That is a strange situation
if agglayerCert.Status.IsOpen() && localCert.Status.IsClosed() {
a.log.Warnf("certificate %s is reopened! from [%s] to [%s]",
Expand Down Expand Up @@ -919,6 +932,7 @@ func getLastSentBlockAndRetryCount(lastSentCertificateInfo *types.CertificateInf
}

retryCount = lastSentCertificateInfo.RetryCount + 1
metrics.SendingRetry()
}

return lastSentBlock, retryCount
Expand Down
79 changes: 79 additions & 0 deletions aggsender/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package metrics

import (
"github.com/agglayer/aggkit/log"
"github.com/agglayer/aggkit/prometheus"
prometheusClient "github.com/prometheus/client_golang/prometheus"
)

const (
prefix = "aggsender_"
numberOfCertificatesSent = prefix + "number_of_certificates_sent"
numberOfCertificatesInError = prefix + "number_of_certificates_in_error"
numberOfSendingRetries = prefix + "number_of_sending_retries"
numberOfCertificatesSettled = prefix + "number_of_sending_settled"
certificateBuildTime = prefix + "certificate_build_time"
proverTime = prefix + "prover_time"
)

// Register the metrics for the aggsender package
func Register() {
gauges := []prometheusClient.GaugeOpts{
{
Name: numberOfCertificatesSent,
Help: "[AGGSENDER] number of certificates sent",
},
{
Name: numberOfCertificatesInError,
Help: "[AGGSENDER] number of certificates in error",
},
{
Name: numberOfSendingRetries,
Help: "[AGGSENDER] number of sending retries",
},
{
Name: numberOfCertificatesSettled,
Help: "[AGGSENDER] number of certificates settled",
},
{
Name: certificateBuildTime,
Help: "[AGGSENDER] certificate build time",
},
{
Name: proverTime,
Help: "[AGGSENDER] prover time",
},
}
prometheus.RegisterGauges(gauges...)
log.Info("Registered prometheus aggsender metrics")
}

// CertificateSent increments the gauge for the number of certificates sent
func CertificateSent() {
prometheus.GaugeInc(numberOfCertificatesSent)
}

// InError increments the gauge for the number of certificates in error
func InError() {
prometheus.GaugeInc(numberOfCertificatesInError)
}

// SendingRetry increments the gauge for the number of sending retries
func SendingRetry() {
prometheus.GaugeInc(numberOfSendingRetries)
}

// Settled increments the gauge for the number of certificates settled
func Settled() {
prometheus.GaugeInc(numberOfCertificatesSettled)
}

// CertificateBuildTime sets the gauge for the certificate build time
func CertificateBuildTime(value float64) {
prometheus.GaugeSet(certificateBuildTime, value)
}

// ProverTime sets the gauge for the prover time
func ProverTime(value float64) {
prometheus.GaugeSet(proverTime, value)
}
43 changes: 43 additions & 0 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ package main
import (
"context"
"fmt"
"net"
"net/http"
"os"
"os/signal"
"runtime"
"time"

jRPC "github.com/0xPolygon/cdk-rpc/rpc"
"github.com/0xPolygon/zkevm-ethtx-manager/ethtxmanager"
Expand All @@ -24,9 +27,11 @@ import (
"github.com/agglayer/aggkit/l1infotreesync"
"github.com/agglayer/aggkit/lastgersync"
"github.com/agglayer/aggkit/log"
"github.com/agglayer/aggkit/prometheus"
"github.com/agglayer/aggkit/reorgdetector"
"github.com/agglayer/aggkit/rpc"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/urfave/cli/v2"
)

Expand All @@ -45,6 +50,10 @@ func start(cliCtx *cli.Context) error {
logVersion()
}

if cfg.Prometheus.Enabled {
prometheus.Init()
}

components := cliCtx.StringSlice(config.FlagComponents)
l1Client := runL1ClientIfNeeded(components, cfg.Etherman.URL)
l2Client := runL2ClientIfNeeded(components, getL2RPCUrl(cfg))
Expand Down Expand Up @@ -114,6 +123,13 @@ func start(cliCtx *cli.Context) error {
}
}()
}

if cfg.Prometheus.Enabled {
go startPrometheusHTTPServer(cfg.Prometheus)
} else {
log.Info("Prometheus metrics server is disabled")
}

waitSignal(nil)

return nil
Expand Down Expand Up @@ -588,3 +604,30 @@ func getL2RPCUrl(c *config.Config) string {

return c.AggOracle.EVMSender.URLRPCL2
}

func startPrometheusHTTPServer(c prometheus.Config) {
const ten = 10
mux := http.NewServeMux()
address := fmt.Sprintf("%s:%d", c.Host, c.Port)
lis, err := net.Listen("tcp", address)
if err != nil {
log.Errorf("failed to create tcp listener for metrics: %v", err)
return
}
mux.Handle(prometheus.Endpoint, promhttp.Handler())

metricsServer := &http.Server{
Handler: mux,
ReadHeaderTimeout: ten * time.Second,
ReadTimeout: ten * time.Second,
}
log.Infof("prometheus server listening on port %d", c.Port)
if err := metricsServer.Serve(lis); err != nil {
if err == http.ErrServerClosed {
log.Warnf("prometheus http server stopped")
return
}
log.Errorf("closed http connection for prometheus server: %v", err)
return
}
}
4 changes: 4 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/agglayer/aggkit/l1infotreesync"
"github.com/agglayer/aggkit/lastgersync"
"github.com/agglayer/aggkit/log"
"github.com/agglayer/aggkit/prometheus"
"github.com/agglayer/aggkit/reorgdetector"
"github.com/mitchellh/mapstructure"
"github.com/pelletier/go-toml/v2"
Expand Down Expand Up @@ -130,6 +131,9 @@ type Config struct {

// AggSender is the configuration of the agg sender service
AggSender aggsender.Config

// Prometheus is the configuration of the prometheus service
Prometheus prometheus.Config
}

// Load loads the configuration
Expand Down
5 changes: 4 additions & 1 deletion config/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,5 +228,8 @@ MaxEpochPercentageAllowedToSendCertificate=80
[AggSender.MaxSubmitCertificateRate]
NumRequests = 20
Interval = "1h"
[Prometheus]
Enabled = true
Host = "localhost"
Port = 9091
`
24 changes: 24 additions & 0 deletions docs/aggsender.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,30 @@ This paragraph explains different use cases with outcomes.
6. Navigate to the `test/bats/pp` folder (`cd test/bats/pp`).
7. Run a test in `bridge-e2e.bats` file: `bats -f "Native gas token deposit to WETH" bridge-e2e.bats`. This will build a new certificate after it is done, and you can debug the whole process.

## Prometheus Endpoint

If enabled in the configuration, Aggsender exposes the following Prometheus metrics:

- Total number of certificates sent
- Number of sending errors
- Number of successful sends
- Certificate build time
- Prover execution time

### Configuration Example

To enable Prometheus metrics, configure Aggsender as follows:

```ini
[Prometheus]
Enabled = true
Host = "localhost"
Port = 9091
```

With this configuration, the metrics will be available at:
http://localhost:9091/metrics

## Additional Documentation
[1] https://potential-couscous-4gw6qyo.pages.github.io/protocol/workflow_centralized.html
[2] https://agglayer.github.io/agglayer/pessimistic_proof/index.html
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ require (
github.com/mattn/go-sqlite3 v1.14.24
github.com/mitchellh/mapstructure v1.5.0
github.com/pelletier/go-toml/v2 v2.2.3
github.com/prometheus/client_golang v1.20.4
github.com/prometheus/client_model v0.6.1
github.com/rubenv/sql-migrate v1.7.1
github.com/russross/meddler v1.0.1
github.com/spf13/viper v1.19.0
Expand Down Expand Up @@ -91,6 +93,7 @@ require (
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/logrusorgru/aurora v2.0.3+incompatible // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
Expand All @@ -108,8 +111,6 @@ require (
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
Expand Down
16 changes: 16 additions & 0 deletions prometheus/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package prometheus

const (
// Endpoint the endpoint for exposing the metrics
Endpoint = "/metrics"
// ProfilingIndexEndpoint the endpoint for exposing the profiling metrics
ProfilingIndexEndpoint = "/debug/pprof/"
// ProfileEndpoint the endpoint for exposing the profile of the profiling metrics
ProfileEndpoint = "/debug/pprof/profile"
// ProfilingCmdEndpoint the endpoint for exposing the command-line of profiling metrics
ProfilingCmdEndpoint = "/debug/pprof/cmdline"
// ProfilingSymbolEndpoint the endpoint for exposing the symbol of profiling metrics
ProfilingSymbolEndpoint = "/debug/pprof/symbol"
// ProfilingTraceEndpoint the endpoint for exposing the trace of profiling metrics
ProfilingTraceEndpoint = "/debug/pprof/trace"
)
11 changes: 11 additions & 0 deletions prometheus/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package prometheus

// Config represents the configuration of the metrics
type Config struct {
// Enabled is the flag to enable/disable the metrics server
Enabled bool `mapstructure:"Enabled"`
// Host is the address to bind the metrics server
Host string `mapstructure:"Host"`
// Port is the port to bind the metrics server
Port int `mapstructure:"Port"`
}
Loading

0 comments on commit 47b4945

Please sign in to comment.