diff --git a/.golangci.yml b/.golangci.yml index d9728cb4..f50f1e66 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -13,6 +13,9 @@ linters-settings: linters: enable-all: true + disable: + - scopelint # too many false positives + - gochecknoglobals # mostly useless issues: exclude-use-default: false diff --git a/README.md b/README.md index d62f3033..b9cd7a1c 100644 --- a/README.md +++ b/README.md @@ -52,20 +52,46 @@ scrape_configs: - job_name: rds-basic scrape_interval: 60s scrape_timeout: 55s - metrics_path: /basic honor_labels: true static_configs: - targets: - 127.0.0.1:9042 + params: + collect[]: + - basic - job_name: rds-enhanced scrape_interval: 10s scrape_timeout: 9s - metrics_path: /enhanced honor_labels: true static_configs: - targets: - 127.0.0.1:9042 + params: + collect[]: + - enhanced ``` `honor_labels: true` is important because exporter returns metrics with `instance` label set. + +## Collectors + +### Enabled by default + +Name | Description +---------|------------- +basic | Basic metrics from https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/MonitoringOverview.html#monitoring-cloudwatch. +enhanced | Enhanced metrics from https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Monitoring.OS.html. + +### Filtering enabled collectors + +The `rds_exporter` will expose all metrics from enabled collectors by default. + +For advanced use the `rds_exporter` can be passed an optional list of collectors to filter metrics. The `collect[]` parameter may be used multiple times. In Prometheus configuration you can use this syntax under the [scrape config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#). + +``` + params: + collect[]: + - basic + - enhanced +``` diff --git a/basic/basic.go b/basic/basic.go index 4ef0dcd4..4b8e9f9e 100644 --- a/basic/basic.go +++ b/basic/basic.go @@ -11,8 +11,6 @@ import ( "github.com/percona/rds_exporter/sessions" ) -//go:generate go run generate/main.go generate/utils.go - var ( scrapeTimeDesc = prometheus.NewDesc( "rds_exporter_scrape_duration_seconds", @@ -22,6 +20,16 @@ var ( ) ) +// OverlappingMetrics flag. +type OverlappingMetrics bool + +const ( + // EnableOverlapping flag for enabling overlapping metrics. + EnableOverlapping OverlappingMetrics = true + // DisableOverlapping flag for disabling overlapping metrics. + DisableOverlapping OverlappingMetrics = false +) + type Metric struct { Name string Desc *prometheus.Desc @@ -35,11 +43,19 @@ type Exporter struct { } // New creates a new instance of a Exporter. -func New(config *config.Config, sessions *sessions.Sessions) *Exporter { +// enableOverlapping is using for backward compatibility. +// See: https://jira.percona.com/browse/PMM-1901. +func New(config *config.Config, sessions *sessions.Sessions, enableOverlapping OverlappingMetrics) *Exporter { + var m []Metric + m = append(m, Metrics...) + if enableOverlapping { + m = append(m, MetricsOverlappingWithEnhancedCollector...) + } + return &Exporter{ config: config, sessions: sessions, - metrics: Metrics, + metrics: m, l: log.With("component", "basic"), } } diff --git a/basic/basic_test.go b/basic/basic_test.go index 19c95597..e3596b74 100644 --- a/basic/basic_test.go +++ b/basic/basic_test.go @@ -13,7 +13,7 @@ import ( "github.com/percona/rds_exporter/sessions" ) -func getExporter(t *testing.T) *Exporter { +func getExporter(t *testing.T, enableMetrics OverlappingMetrics) *Exporter { t.Helper() cfg, err := config.Load("../config.yml") @@ -21,11 +21,28 @@ func getExporter(t *testing.T) *Exporter { client := client.New() sess, err := sessions.New(cfg.Instances, client.HTTP(), false) require.NoError(t, err) - return New(cfg, sess) + return New(cfg, sess, enableMetrics) } func TestCollector_Describe(t *testing.T) { - c := getExporter(t) + c := getExporter(t, DisableOverlapping) + ch := make(chan *prometheus.Desc) + go func() { + c.Describe(ch) + close(ch) + }() + + const expected = 47 + descs := make([]*prometheus.Desc, 0, expected) + for d := range ch { + descs = append(descs, d) + } + + assert.Equal(t, expected, len(descs), "%+v", descs) +} + +func TestCollector_Describe_WithOverlappingMetrics(t *testing.T) { + c := getExporter(t, EnableOverlapping) ch := make(chan *prometheus.Desc) go func() { c.Describe(ch) @@ -42,7 +59,24 @@ func TestCollector_Describe(t *testing.T) { } func TestCollector_Collect(t *testing.T) { - c := getExporter(t) + c := getExporter(t, DisableOverlapping) + ch := make(chan prometheus.Metric) + go func() { + c.Collect(ch) + close(ch) + }() + + const expected = 91 + metrics := make([]helpers.Metric, 0, expected) + for m := range ch { + metrics = append(metrics, *helpers.ReadMetric(m)) + } + + assert.Equal(t, expected, len(metrics), "%+v", metrics) +} + +func TestCollector_Collect_WithOverlappingMetrics(t *testing.T) { + c := getExporter(t, EnableOverlapping) ch := make(chan prometheus.Metric) go func() { c.Collect(ch) diff --git a/basic/generate/main.go b/basic/generate/main.go deleted file mode 100644 index 9a1510a7..00000000 --- a/basic/generate/main.go +++ /dev/null @@ -1,179 +0,0 @@ -// The following directive is necessary to make the package coherent: -// This program generates metrics.go. It can be invoked by running -// go generate -package main - -import ( - "log" - "os" - "sort" - "text/template" -) - -type Metric string - -func (m Metric) FqName() string { - switch m { - case "FreeStorageSpace": - return "node_filesystem_free" - case "FreeableMemory": - return "node_memory_Cached" - case "CPUUtilization": - return "node_cpu_average" - case "EngineUptime": - return "node_boot_time" - } - - return safeName("AWS/RDS_" + toSnakeCase(string(m)) + "_average") -} - -func (m Metric) Labels() []string { - return []string{ - "instance", - "region", - } -} - -func (m Metric) ConstLabels() map[string]string { - switch m { - case "CPUUtilization": - return map[string]string{ - "cpu": "All", - "mode": "total", - } - } - - return nil -} - -func (m Metric) Name() string { - return string(m) -} - -func (m Metric) Help() string { - if v, ok := doc[m.Name()]; ok { - return v - } - - return m.Name() -} - -var ( - metrics = []Metric{ - "ActiveTransactions", - "AuroraBinlogReplicaLag", - "AuroraReplicaLag", - "AuroraReplicaLagMaximum", - "AuroraReplicaLagMinimum", - "BinLogDiskUsage", - "BlockedTransactions", - "BufferCacheHitRatio", - "BurstBalance", - "CommitLatency", - "CommitThroughput", - "CPUCreditBalance", - "CPUCreditUsage", - "CPUUtilization", - "DatabaseConnections", - "DDLLatency", - "DDLThroughput", - "Deadlocks", - "DeleteLatency", - "DeleteThroughput", - "DiskQueueDepth", - "DMLLatency", - "DMLThroughput", - "EngineUptime", - "FreeableMemory", - "FreeLocalStorage", - "FreeStorageSpace", - "InsertLatency", - "InsertThroughput", - "LoginFailures", - "NetworkReceiveThroughput", - "NetworkThroughput", - "NetworkTransmitThroughput", - "Queries", - "ReadIOPS", - "ReadLatency", - "ReadThroughput", - "ResultSetCacheHitRatio", - "SelectLatency", - "SelectThroughput", - "SwapUsage", - "UpdateLatency", - "UpdateThroughput", - "VolumeBytesUsed", - "VolumeReadIOPs", - "VolumeWriteIOPs", - "WriteIOPS", - "WriteLatency", - "WriteThroughput", - } - - doc = map[string]string{ - "BinLogDiskUsage": "The amount of disk space occupied by binary logs on the master. Applies to MySQL read replicas. Units: Bytes", - "BurstBalance": "The percent of General Purpose SSD (gp2) burst-bucket I/O credits available. Units: Percent", - "CPUUtilization": "The percentage of CPU utilization. Units: Percent", - "CPUCreditUsage": "[T2 instances] The number of CPU credits consumed by the instance. One CPU credit equals one vCPU running at 100% utilization for one minute or an equivalent combination of vCPUs, utilization, and time (for example, one vCPU running at 50% utilization for two minutes or two vCPUs running at 25% utilization for two minutes). CPU credit metrics are available only at a 5 minute frequency. If you specify a period greater than five minutes, use the Sum statistic instead of the Average statistic. Units: Count", - "CPUCreditBalance": "[T2 instances] The number of CPU credits available for the instance to burst beyond its base CPU utilization. Credits are stored in the credit balance after they are earned and removed from the credit balance after they expire. Credits expire 24 hours after they are earned. CPU credit metrics are available only at a 5 minute frequency. Units: Count", - "DatabaseConnections": "The number of database connections in use. Units: Count", - "DiskQueueDepth": "The number of outstanding IOs (read/write requests) waiting to access the disk. Units: Count", - "FreeableMemory": "The amount of available random access memory. Units: Bytes", - "FreeStorageSpace": "The amount of available storage space. Units: Bytes", - "MaximumUsedTransactionIDs": "The maximum transaction ID that has been used. Applies to PostgreSQL. Units: Count", - "NetworkReceiveThroughput": "The incoming (Receive) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication. Units: Bytes/second", - "NetworkTransmitThroughput": "The outgoing (Transmit) network traffic on the DB instance, including both customer database traffic and Amazon RDS traffic used for monitoring and replication. Units: Bytes/second", - "OldestReplicationSlotLag": "The lagging size of the replica lagging the most in terms of WAL data received. Applies to PostgreSQL. Units: Megabytes", - "ReadIOPS": "The average number of disk I/O operations per second. Units: Count/Second", - "ReadLatency": "The average amount of time taken per disk I/O operation. Units: Seconds", - "ReadThroughput": "The average number of bytes read from disk per second. Units: Bytes/Second", - "ReplicaLag": "The amount of time a Read Replica DB instance lags behind the source DB instance. Applies to MySQL, MariaDB, and PostgreSQL Read Replicas. Units: Seconds", - "ReplicationSlotDiskUsage": "The disk space used by replication slot files. Applies to PostgreSQL. Units: Megabytes", - "SwapUsage": "The amount of swap space used on the DB instance. Units: Bytes", - "TransactionLogsDiskUsage": "The disk space used by transaction logs. Applies to PostgreSQL. Units: Megabytes", - "TransactionLogsGeneration": "The size of transaction logs generated per second. Applies to PostgreSQL. Units: Megabytes/second", - "WriteIOPS": "The average number of disk I/O operations per second. Units: Count/Second", - "WriteLatency": "The average amount of time taken per disk I/O operation. Units: Seconds", - "WriteThroughput": "The average number of bytes written to disk per second. Units: Bytes/Second", - } -) - -func main() { - f, err := os.Create("metrics.go") - if err != nil { - log.Fatal(err) - } - defer f.Close() - - sort.SliceStable(metrics, func(i, j int) bool { - return metrics[i] < metrics[j] - }) - packageTemplate.Execute(f, struct { - Metrics []Metric - }{ - Metrics: metrics, - }) -} - -var packageTemplate = template.Must(template.New("").Parse(`// Code generated by go generate; DO NOT EDIT. -package basic - -import ( - "github.com/prometheus/client_golang/prometheus" -) - -var Metrics = []Metric{ -{{- range .Metrics }} - { - Name: "{{.}}", - Desc: prometheus.NewDesc( - "{{.FqName}}", - "{{.Help}}", - {{printf "%#v" .Labels}}, - {{printf "%#v" .ConstLabels}}, - ), - }, -{{- end }} -} -`)) diff --git a/basic/generate/utils.go b/basic/generate/utils.go deleted file mode 100644 index ec75bc5f..00000000 --- a/basic/generate/utils.go +++ /dev/null @@ -1,34 +0,0 @@ -package main - -import ( - "regexp" - "strings" - "unicode" -) - -var ( - sanitizeNameRegex, _ = regexp.Compile("[^a-zA-Z0-9:_]") - mergeUScoreRegex, _ = regexp.Compile("__+") -) - -func safeName(dirty string) string { - return mergeUScoreRegex.ReplaceAllString( - sanitizeNameRegex.ReplaceAllString( - strings.ToLower(dirty), "_"), - "_") -} - -func toSnakeCase(in string) string { - runes := []rune(in) - length := len(runes) - - var out []rune - for i := 0; i < length; i++ { - if i > 0 && unicode.IsUpper(runes[i]) && ((i+1 < length && unicode.IsLower(runes[i+1])) || unicode.IsLower(runes[i-1])) { - out = append(out, '_') - } - out = append(out, unicode.ToLower(runes[i])) - } - - return string(out) -} diff --git a/basic/metrics.go b/basic/metrics.go index d460f0e6..7615f9a6 100644 --- a/basic/metrics.go +++ b/basic/metrics.go @@ -1,4 +1,3 @@ -// Code generated by go generate; DO NOT EDIT. package basic import ( @@ -105,15 +104,6 @@ var Metrics = []Metric{ map[string]string(nil), ), }, - { - Name: "CPUUtilization", - Desc: prometheus.NewDesc( - "node_cpu_average", - "The percentage of CPU utilization. Units: Percent", - []string{"instance", "region"}, - map[string]string{"cpu": "All", "mode": "total"}, - ), - }, { Name: "CommitLatency", Desc: prometheus.NewDesc( @@ -231,24 +221,6 @@ var Metrics = []Metric{ map[string]string(nil), ), }, - { - Name: "FreeStorageSpace", - Desc: prometheus.NewDesc( - "node_filesystem_free", - "The amount of available storage space. Units: Bytes", - []string{"instance", "region"}, - map[string]string(nil), - ), - }, - { - Name: "FreeableMemory", - Desc: prometheus.NewDesc( - "node_memory_Cached", - "The amount of available random access memory. Units: Bytes", - []string{"instance", "region"}, - map[string]string(nil), - ), - }, { Name: "InsertLatency", Desc: prometheus.NewDesc( @@ -448,3 +420,35 @@ var Metrics = []Metric{ ), }, } + +// MetricsOverlappingWithEnhancedCollector metrics which overlapping with enhanced collector. +// See: https://jira.percona.com/browse/PMM-1901. +var MetricsOverlappingWithEnhancedCollector = []Metric{ + { + Name: "CPUUtilization", + Desc: prometheus.NewDesc( + "node_cpu_average", + "The percentage of CPU utilization. Units: Percent", + []string{"instance", "region"}, + map[string]string{"cpu": "All", "mode": "total"}, + ), + }, + { + Name: "FreeStorageSpace", + Desc: prometheus.NewDesc( + "node_filesystem_free", + "The amount of available storage space. Units: Bytes", + []string{"instance", "region"}, + map[string]string(nil), + ), + }, + { + Name: "FreeableMemory", + Desc: prometheus.NewDesc( + "node_memory_Cached", + "The amount of available random access memory. Units: Bytes", + []string{"instance", "region"}, + map[string]string(nil), + ), + }, +} diff --git a/factory/factory.go b/factory/factory.go new file mode 100644 index 00000000..c7e1b994 --- /dev/null +++ b/factory/factory.go @@ -0,0 +1,63 @@ +package factory + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/percona/rds_exporter/basic" + "github.com/percona/rds_exporter/config" + "github.com/percona/rds_exporter/sessions" +) + +// Collectors uses for creating collectors on fly. +type Collectors struct { + config *config.Config + sessions *sessions.Sessions + predefined map[string]prometheus.Collector +} + +// New creates collectors factory. +func New(cfg *config.Config, sess *sessions.Sessions, predefined map[string]prometheus.Collector) *Collectors { + return &Collectors{ + config: cfg, + sessions: sess, + predefined: predefined, + } +} + +// Create creates collectors map based on filters list. +func (f *Collectors) Create(filters []string) map[string]prometheus.Collector { + c := make(map[string]prometheus.Collector) + + // When we have no filters, all collectors will be enabled, so create "basic" one without overlapping metrics. + if len(filters) == 0 { + c["basic"] = basic.New(f.config, f.sessions, basic.DisableOverlapping) + } + // When we have only 1 filter and this is basic one, we need it with all metrics. + if len(filters) == 1 && filterIn(filters, "basic") { + c["basic"] = basic.New(f.config, f.sessions, basic.EnableOverlapping) + return c + } + // When we have more than 1 filters and have basic one... + if len(filters) > 1 && filterIn(filters, "basic") { + if filterIn(filters, "enhanced") { + c["basic"] = basic.New(f.config, f.sessions, basic.DisableOverlapping) + } else { + c["basic"] = basic.New(f.config, f.sessions, basic.EnableOverlapping) + } + } + // Just adding all predefined collectors in map. + for name, collector := range f.predefined { + c[name] = collector + } + + return c +} + +func filterIn(slice []string, filter string) bool { + for _, v := range slice { + if v == filter { + return true + } + } + return false +} diff --git a/main.go b/main.go index fa06f9fa..ea43e005 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,9 @@ package main import ( + "fmt" "net/http" + "os" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -13,6 +15,7 @@ import ( "github.com/percona/rds_exporter/client" "github.com/percona/rds_exporter/config" "github.com/percona/rds_exporter/enhanced" + "github.com/percona/rds_exporter/factory" "github.com/percona/rds_exporter/sessions" ) @@ -43,26 +46,129 @@ func main() { } // basic metrics + client metrics + exporter own metrics (ProcessCollector and GoCollector) + // NOTE: This handler was retained for backward compatibility. See: https://jira.percona.com/browse/PMM-1901. { - prometheus.MustRegister(basic.New(cfg, sess)) - prometheus.MustRegister(client) - http.Handle(*basicMetricsPathF, promhttp.HandlerFor(prometheus.DefaultGatherer, promhttp.HandlerOpts{ + basicCollector := basic.New(cfg, sess, true) + + registry := prometheus.NewRegistry() + registry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), "")) // from prometheus.DefaultGatherer + registry.MustRegister(prometheus.NewGoCollector()) // from prometheus.DefaultGatherer + + registry.MustRegister(basicCollector) + registry.MustRegister(client) + http.Handle(*basicMetricsPathF, promhttp.HandlerFor(registry, promhttp.HandlerOpts{ ErrorLog: log.NewErrorLogger(), ErrorHandling: promhttp.ContinueOnError, })) } + // This collector should be only one for both cases, because it creates goroutines which sends API requests to Amazon in background. + enhancedCollector := enhanced.NewCollector(sess) + // enhanced metrics + // NOTE: This handler was retained for backward compatibility. See: https://jira.percona.com/browse/PMM-1901. { registry := prometheus.NewRegistry() - registry.MustRegister(enhanced.NewCollector(sess)) + registry.MustRegister(enhancedCollector) http.Handle(*enhancedMetricsPathF, promhttp.HandlerFor(registry, promhttp.HandlerOpts{ ErrorLog: log.NewErrorLogger(), ErrorHandling: promhttp.ContinueOnError, })) } - log.Infof("Basic metrics : http://%s%s", *listenAddressF, *basicMetricsPathF) - log.Infof("Enhanced metrics: http://%s%s", *listenAddressF, *enhancedMetricsPathF) + // all metrics (with filtering) + { + psCollector := prometheus.NewProcessCollector(os.Getpid(), "") // from prometheus.DefaultGatherer + goCollector := prometheus.NewGoCollector() // from prometheus.DefaultGatherer + f := factory.New(cfg, sess, map[string]prometheus.Collector{ + "enhanced": enhancedCollector, + "client": client, + "standard.process": psCollector, + "standard.go": goCollector, + }) + handler := newHandler(f) + http.Handle("/metrics", handler) + } + + log.Infof("Metrics: http://%s%s", *listenAddressF, "/metrics") log.Fatal(http.ListenAndServe(*listenAddressF, nil)) } + +// handler wraps an unfiltered http.Handler but uses a filtered handler, +// created on the fly, if filtering is requested. Create instances with +// newHandler. It used for collectors filtering. +type handler struct { + unfilteredHandler http.Handler + factory *factory.Collectors +} + +func newHandler(factory *factory.Collectors) *handler { + h := &handler{factory: factory} + innerHandler, err := h.innerHandler() + if err != nil { + log.Fatalf("Couldn't create metrics handler: %s", err) + } + + h.unfilteredHandler = innerHandler + return h +} + +// ServeHTTP implements http.Handler. +func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + filters := r.URL.Query()["collect[]"] + log.Debugln("collect query:", filters) + + if len(filters) == 0 { + // No filters, use the prepared unfiltered handler. + h.unfilteredHandler.ServeHTTP(w, r) + return + } + + filteredHandler, err := h.innerHandler(filters...) + if err != nil { + log.Warnln("Couldn't create filtered metrics handler:", err) + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte(fmt.Sprintf("Couldn't create filtered metrics handler: %s", err))) + if err != nil { + log.Errorln(err) + } + return + } + filteredHandler.ServeHTTP(w, r) +} + +func (h *handler) innerHandler(filters ...string) (http.Handler, error) { + registry := prometheus.NewRegistry() + + collectors := h.factory.Create(filters) + + // register all collectors by default. + if len(filters) == 0 { + for name, c := range collectors { + if err := registry.Register(c); err != nil { + return nil, err + } + log.Infof("Collector '%s' was registered", name) + } + } + + // register only filtered collectors. + for _, name := range filters { + if c, ok := collectors[name]; ok { + if err := registry.Register(c); err != nil { + return nil, err + } + log.Infof("Collector '%s' was registered", name) + } + } + + handler := promhttp.HandlerFor( + registry, + promhttp.HandlerOpts{ + ErrorLog: log.NewErrorLogger(), + ErrorHandling: promhttp.ContinueOnError, + }, + ) + + return handler, nil +}