Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: prometheus #201

Merged
merged 8 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions aggsender/aggsender.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
zkevm "github.com/agglayer/aggkit"
"github.com/agglayer/aggkit/agglayer"
"github.com/agglayer/aggkit/aggsender/db"
"github.com/agglayer/aggkit/aggsender/metrics"
aggsenderrpc "github.com/agglayer/aggkit/aggsender/rpc"
"github.com/agglayer/aggkit/aggsender/types"
"github.com/agglayer/aggkit/bridgesync"
Expand Down Expand Up @@ -125,6 +126,7 @@ func (a *AggSender) GetRPCServices() []jRPC.Service {
// Start starts the AggSender
func (a *AggSender) Start(ctx context.Context) {
a.log.Info("AggSender started")
metrics.Register()
a.status.Start(time.Now().UTC())
a.checkInitialStatus(ctx)
a.sendCertificates(ctx, 0)
Expand Down Expand Up @@ -281,10 +283,12 @@ func (a *AggSender) sendCertificate(ctx context.Context) (*agglayer.SignedCertif
a.log.Infof("building certificate for %s estimatedSize=%d",
certificateParams.String(), certificateParams.EstimatedSize())

start := time.Now()
certificate, err := a.buildCertificate(ctx, certificateParams, lastSentCertificateInfo)
if err != nil {
return nil, fmt.Errorf("error building certificate: %w", err)
}
metrics.CertificateBuildTime(time.Since(start).Seconds())

signedCertificate, err := a.signCertificate(certificate)
if err != nil {
Expand Down Expand Up @@ -312,6 +316,7 @@ func (a *AggSender) sendCertificate(ctx context.Context) (*agglayer.SignedCertif
return nil, fmt.Errorf("error sending certificate: %w", err)
}

metrics.CertificateSent()
a.log.Debugf("certificate send: Height: %d cert: %s", signedCertificate.Height, signedCertificate.Brief())

raw, err := json.Marshal(signedCertificate)
Expand Down Expand Up @@ -787,6 +792,13 @@ func (a *AggSender) updateCertificateStatus(ctx context.Context,
localCert.ID(), localCert.Status, agglayerCert.Status, localCert.ElapsedTimeSinceCreation(),
agglayerCert.String())

switch agglayerCert.Status {
case agglayer.Settled:
metrics.SendingSuccess()
case agglayer.InError:
metrics.SendingError()
}

// That is a strange situation
if agglayerCert.Status.IsOpen() && localCert.Status.IsClosed() {
a.log.Warnf("certificate %s is reopened! from [%s] to [%s]",
Expand Down Expand Up @@ -919,6 +931,7 @@ func getLastSentBlockAndRetryCount(lastSentCertificateInfo *types.CertificateInf
}

retryCount = lastSentCertificateInfo.RetryCount + 1
metrics.SendingRetry()
}

return lastSentBlock, retryCount
Expand Down
79 changes: 79 additions & 0 deletions aggsender/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package metrics

import (
"github.com/agglayer/aggkit/log"
"github.com/agglayer/aggkit/prometheus"
prometheusClient "github.com/prometheus/client_golang/prometheus"
)

const (
prefix = "aggsender_"
numberOfCertificatesSent = prefix + "number_of_certificates_sent"
numberOfSendingErrors = prefix + "number_of_sending_errors"
numberOfSendingRetries = prefix + "number_of_sending_retries"
numberOfSendingSuccesses = prefix + "number_of_sending_successes"
certificateBuildTime = prefix + "certificate_build_time"
proverTime = prefix + "prover_time"
)

// Register the metrics for the aggsender package
func Register() {
gauges := []prometheusClient.GaugeOpts{
{
Name: numberOfCertificatesSent,
Help: "[AGGSENDER] number of certificates sent",
},
{
Name: numberOfSendingErrors,
Help: "[AGGSENDER] number of sending errors",
},
{
Name: numberOfSendingRetries,
Help: "[AGGSENDER] number of sending retries",
},
{
Name: numberOfSendingSuccesses,
Help: "[AGGSENDER] number of sending successes",
},
{
Name: certificateBuildTime,
Help: "[AGGSENDER] certificate build time",
},
{
Name: proverTime,
Help: "[AGGSENDER] prover time",
},
}
prometheus.RegisterGauges(gauges...)
log.Info("Registered prometheus aggsender metrics")
}

// CertificateSent increments the gauge for the number of certificates sent
func CertificateSent() {
prometheus.GaugeInc(numberOfCertificatesSent)
}

// SendingError increments the gauge for the number of sending errors
func SendingError() {
prometheus.GaugeInc(numberOfSendingErrors)
}

// SendingRetry increments the gauge for the number of sending retries
func SendingRetry() {
prometheus.GaugeInc(numberOfSendingRetries)
}

// SendingSuccess increments the gauge for the number of sending successes
func SendingSuccess() {
prometheus.GaugeInc(numberOfSendingSuccesses)
}

// CertificateBuildTime sets the gauge for the certificate build time
func CertificateBuildTime(value float64) {
prometheus.GaugeSet(certificateBuildTime, value)
}

// ProverTime sets the gauge for the prover time
func ProverTime(value float64) {
prometheus.GaugeSet(proverTime, value)
}
43 changes: 43 additions & 0 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ package main
import (
"context"
"fmt"
"net"
"net/http"
"os"
"os/signal"
"runtime"
"time"

jRPC "github.com/0xPolygon/cdk-rpc/rpc"
"github.com/0xPolygon/zkevm-ethtx-manager/ethtxmanager"
Expand All @@ -24,9 +27,11 @@ import (
"github.com/agglayer/aggkit/l1infotreesync"
"github.com/agglayer/aggkit/lastgersync"
"github.com/agglayer/aggkit/log"
"github.com/agglayer/aggkit/prometheus"
"github.com/agglayer/aggkit/reorgdetector"
"github.com/agglayer/aggkit/rpc"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/urfave/cli/v2"
)

Expand All @@ -45,6 +50,10 @@ func start(cliCtx *cli.Context) error {
logVersion()
}

if cfg.Prometheus.Enabled {
prometheus.Init()
}

components := cliCtx.StringSlice(config.FlagComponents)
l1Client := runL1ClientIfNeeded(components, cfg.Etherman.URL)
l2Client := runL2ClientIfNeeded(components, getL2RPCUrl(cfg))
Expand Down Expand Up @@ -114,6 +123,13 @@ func start(cliCtx *cli.Context) error {
}
}()
}

if cfg.Prometheus.Enabled {
go startPrometheusHTTPServer(cfg.Prometheus)
} else {
log.Info("Prometheus metrics server is disabled")
}

waitSignal(nil)

return nil
Expand Down Expand Up @@ -588,3 +604,30 @@ func getL2RPCUrl(c *config.Config) string {

return c.AggOracle.EVMSender.URLRPCL2
}

func startPrometheusHTTPServer(c prometheus.Config) {
const ten = 10
mux := http.NewServeMux()
address := fmt.Sprintf("%s:%d", c.Host, c.Port)
lis, err := net.Listen("tcp", address)
if err != nil {
log.Errorf("failed to create tcp listener for metrics: %v", err)
return
}
mux.Handle(prometheus.Endpoint, promhttp.Handler())

metricsServer := &http.Server{
Handler: mux,
ReadHeaderTimeout: ten * time.Second,
ReadTimeout: ten * time.Second,
}
log.Infof("prometheus server listening on port %d", c.Port)
if err := metricsServer.Serve(lis); err != nil {
if err == http.ErrServerClosed {
log.Warnf("prometheus http server stopped")
return
}
log.Errorf("closed http connection for prometheus server: %v", err)
return
}
}
4 changes: 4 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/agglayer/aggkit/l1infotreesync"
"github.com/agglayer/aggkit/lastgersync"
"github.com/agglayer/aggkit/log"
"github.com/agglayer/aggkit/prometheus"
"github.com/agglayer/aggkit/reorgdetector"
"github.com/mitchellh/mapstructure"
"github.com/pelletier/go-toml/v2"
Expand Down Expand Up @@ -130,6 +131,9 @@ type Config struct {

// AggSender is the configuration of the agg sender service
AggSender aggsender.Config

// Prometheus is the configuration of the prometheus service
Prometheus prometheus.Config
}

// Load loads the configuration
Expand Down
5 changes: 4 additions & 1 deletion config/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,5 +228,8 @@ MaxEpochPercentageAllowedToSendCertificate=80
[AggSender.MaxSubmitCertificateRate]
NumRequests = 20
Interval = "1h"

[Prometheus]
Enabled = true
Host = "localhost"
Port = 9091
`
24 changes: 24 additions & 0 deletions docs/aggsender.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,30 @@ This paragraph explains different use cases with outcomes.
6. Navigate to the `test/bats/pp` folder (`cd test/bats/pp`).
7. Run a test in `bridge-e2e.bats` file: `bats -f "Native gas token deposit to WETH" bridge-e2e.bats`. This will build a new certificate after it is done, and you can debug the whole process.

## Prometheus Endpoint

If enabled in the configuration, Aggsender exposes the following Prometheus metrics:

- Total number of certificates sent
- Number of sending errors
- Number of successful sends
- Certificate build time
- Prover execution time

### Configuration Example

To enable Prometheus metrics, configure Aggsender as follows:

```ini
[Prometheus]
Enabled = true
Host = "localhost"
Port = 9091
```

With this configuration, the metrics will be available at:
http://localhost:9091/metrics

## Additional Documentation
[1] https://potential-couscous-4gw6qyo.pages.github.io/protocol/workflow_centralized.html
[2] https://agglayer.github.io/agglayer/pessimistic_proof/index.html
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ require (
github.com/mattn/go-sqlite3 v1.14.24
github.com/mitchellh/mapstructure v1.5.0
github.com/pelletier/go-toml/v2 v2.2.3
github.com/prometheus/client_golang v1.20.4
github.com/prometheus/client_model v0.6.1
github.com/rubenv/sql-migrate v1.7.1
github.com/russross/meddler v1.0.1
github.com/spf13/viper v1.19.0
Expand Down Expand Up @@ -91,6 +93,7 @@ require (
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/logrusorgru/aurora v2.0.3+incompatible // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
Expand All @@ -108,8 +111,6 @@ require (
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
Expand Down
16 changes: 16 additions & 0 deletions prometheus/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package prometheus

const (
// Endpoint the endpoint for exposing the metrics
Endpoint = "/metrics"
// ProfilingIndexEndpoint the endpoint for exposing the profiling metrics
ProfilingIndexEndpoint = "/debug/pprof/"
// ProfileEndpoint the endpoint for exposing the profile of the profiling metrics
ProfileEndpoint = "/debug/pprof/profile"
// ProfilingCmdEndpoint the endpoint for exposing the command-line of profiling metrics
ProfilingCmdEndpoint = "/debug/pprof/cmdline"
// ProfilingSymbolEndpoint the endpoint for exposing the symbol of profiling metrics
ProfilingSymbolEndpoint = "/debug/pprof/symbol"
// ProfilingTraceEndpoint the endpoint for exposing the trace of profiling metrics
ProfilingTraceEndpoint = "/debug/pprof/trace"
)
11 changes: 11 additions & 0 deletions prometheus/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package prometheus

// Config represents the configuration of the metrics
type Config struct {
// Enabled is the flag to enable/disable the metrics server
Enabled bool `mapstructure:"Enabled"`
// Host is the address to bind the metrics server
Host string `mapstructure:"Host"`
// Port is the port to bind the metrics server
Port int `mapstructure:"Port"`
}
Loading
Loading