diff --git a/README.md b/README.md index 1939732..53108cb 100644 --- a/README.md +++ b/README.md @@ -15,16 +15,16 @@ The goal is to expose stats about storage/disk usage (not transactions) per Azur ## Example metrics output ```text -# HELP pdok_storage_lastRunDateMetric -# TYPE pdok_storage_lastRunDateMetric gauge -pdok_storage_lastRunDateMetric 1.716122623e+09 +# HELP azure_storage_last_run_date +# TYPE azure_storage_last_run_date gauge +azure_storage_last_run_date 1.716122623e+09 # HELP pdok_storage_usage # TYPE pdok_storage_usage gauge -pdok_storage_usage{container="blob-inventory",dataset="other",deleted="false",owner="other"} 1.4511800263e+10 -pdok_storage_usage{container="blob-inventory",dataset="other",deleted="true",owner="other"} 1.4697209865e+10 -pdok_storage_usage{container="deliveries",dataset="something",deleted="false",owner="someone"} 1.4624738e+07 -pdok_storage_usage{container="deliveries",dataset="something",deleted="true",owner="someone"} 2.0263731e+07 -pdok_storage_usage{container="deliveries",dataset="somethingelse",deleted="false",owner="someoneelse"} 1.8042443e+07 +azure_storage_usage{container="blob-inventory",dataset="other",deleted="false",owner="other",storage_account="devstoreaccount1"} 1.4511800263e+10 +azure_storage_usage{container="blob-inventory",dataset="other",deleted="true",owner="other",storage_account="devstoreaccount1"} 1.4697209865e+10 +azure_storage_usage{container="deliveries",dataset="something",deleted="false",owner="someone",storage_account="devstoreaccount1"} 1.4624738e+07 +azure_storage_usage{container="deliveries",dataset="something",deleted="true",owner="someone",storage_account="devstoreaccount1"} 2.0263731e+07 +azure_storage_usage{container="deliveries",dataset="somethingelse",deleted="false",owner="someoneelse",storage_account="devstoreaccount1"} 1.8042443e+07 # ..... ``` diff --git a/cmd/main_test.go b/cmd/main_test.go index 4fc7072..aa29abf 100644 --- a/cmd/main_test.go +++ b/cmd/main_test.go @@ -14,7 +14,7 @@ import ( func TestPerf(t *testing.T) { t.Skip("local") t.Run("perf", func(t *testing.T) { - configFile, err := os.ReadFile("example/pdok-config.yaml") + configFile, err := os.ReadFile("example/config.yaml") require.Nil(t, err) config := new(Config) err = yaml.Unmarshal(configFile, config) diff --git a/example/docker-compose.yaml b/example/docker-compose.yaml index a4b3304..b7b9952 100644 --- a/example/docker-compose.yaml +++ b/example/docker-compose.yaml @@ -64,6 +64,10 @@ services: - -c - | set -e - echo "waiting a bit before checking the metrics (TODO think of something better)" + echo "waiting a bit before checking the metrics" sleep 7 - wget -O - 'http://azure-storage-usage-exporter:8080/metrics' | grep 'pdok_storage_usage{deleted="false",tenant="ZDI2",type="ZTJmNTY2MTU2Y2"} 3.4038013e+07' + if ! wget -q -O - 'http://azure-storage-usage-exporter:8080/metrics' | grep 'azure_storage_usage{deleted="false",storage_account="devstoreaccount1",tenant="ZDI2",type="ZTJmNTY2MTU2Y2"} 3.4038013e+07' > /dev/null; then + echo "expected metric not found" + else + echo "done" + fi diff --git a/example/pdok-config.yaml b/example/pdok-config.yaml deleted file mode 100644 index 679220f..0000000 --- a/example/pdok-config.yaml +++ /dev/null @@ -1,9 +0,0 @@ ---- -azure: {} -labels: - container: other - owner: other - dataset: other -rules: - - pattern: ^(?Pblob-inventory|argo-artifacts|container-logs|mimir-blocks|elasticsearch-snapshots|inspire-reports)(/|$) - - pattern: ^(?P[^/]+)/(?P[^/]+)/(?P[^/]+) diff --git a/internal/agg/aggregator.go b/internal/agg/aggregator.go index 7d59377..991f61a 100644 --- a/internal/agg/aggregator.go +++ b/internal/agg/aggregator.go @@ -15,7 +15,8 @@ import ( ) const ( - Deleted = "deleted" + Deleted = "deleted" + StorageAccount = "storage_account" ) type Labels = map[string]string @@ -43,13 +44,23 @@ type Aggregator struct { rules []AggregationRule } -func NewAggregator(duReader du.Reader, labels Labels, rules []AggregationRule) (*Aggregator, error) { - if _, exists := labels[Deleted]; exists { - return nil, errors.New("cannot use deleted as a label") +func NewAggregator(duReader du.Reader, labelsWithDefaults Labels, rules []AggregationRule) (*Aggregator, error) { + if _, exists := labelsWithDefaults[Deleted]; exists { + return nil, errors.New("cannot use custom label: " + Deleted) + } + if labelsWithDefaults == nil { + labelsWithDefaults = Labels{} + } else { + labelsWithDefaults = maps.Clone(labelsWithDefaults) + } + if given, exists := labelsWithDefaults[StorageAccount]; !exists { + labelsWithDefaults[StorageAccount] = duReader.GetStorageAccountName() + } else if given == "" { + delete(labelsWithDefaults, StorageAccount) } return &Aggregator{ duReader: duReader, - labelsWithDefaults: labels, + labelsWithDefaults: labelsWithDefaults, rules: rules, }, nil } @@ -145,8 +156,9 @@ func (a *Aggregator) applyRulesToAggregate(row du.Row) AggregationGroup { aggregationGroup.Deleted = nilBoolToBool(row.Deleted) return aggregationGroup } + // default if no rule matches return AggregationGroup{ - Labels: a.labelsWithDefaults, + Labels: maps.Clone(a.labelsWithDefaults), Deleted: nilBoolToBool(row.Deleted), } } diff --git a/internal/agg/aggregator_test.go b/internal/agg/aggregator_test.go index 30da789..d548522 100644 --- a/internal/agg/aggregator_test.go +++ b/internal/agg/aggregator_test.go @@ -52,10 +52,10 @@ func TestAggregator_Aggregate(t *testing.T) { previousRunDate: someFixedTime.Add(-24 * time.Hour), }, wantAggregationResults: []AggregationResult{ - {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "default1", "level2": "default2"}, Deleted: false}, StorageUsage: 666}, - {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "special", "level2": "sauce"}, Deleted: false}, StorageUsage: 321}, - {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "dir1", "level2": "dir2"}, Deleted: true}, StorageUsage: 200}, - {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "dir1", "level2": "dir2"}, Deleted: false}, StorageUsage: 100}, + {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "default1", "level2": "default2", StorageAccount: "faker"}, Deleted: false}, StorageUsage: 666}, + {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "special", "level2": "sauce", StorageAccount: "faker"}, Deleted: false}, StorageUsage: 321}, + {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "dir1", "level2": "dir2", StorageAccount: "faker"}, Deleted: true}, StorageUsage: 200}, + {AggregationGroup: AggregationGroup{Labels: Labels{"level1": "dir1", "level2": "dir2", StorageAccount: "faker"}, Deleted: false}, StorageUsage: 100}, }, wantRunDate: someFixedTime, wantErr: false, @@ -138,6 +138,10 @@ func (f *fakeDuReader) TestConnection() error { return nil } +func (f *fakeDuReader) GetStorageAccountName() string { + return "faker" +} + func boolPtr(b bool) *bool { return &b } diff --git a/internal/du/azure_blob_inventory.go b/internal/du/azure_blob_inventory.go index 5d3fefb..79544cd 100644 --- a/internal/du/azure_blob_inventory.go +++ b/internal/du/azure_blob_inventory.go @@ -5,6 +5,8 @@ import ( "errors" "fmt" "log" + "net/url" + "regexp" "slices" "strings" "time" @@ -70,6 +72,22 @@ func (ar *AzureBlobInventoryReportDuReader) TestConnection() error { return err } +func (ar *AzureBlobInventoryReportDuReader) GetStorageAccountName() string { + // github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/internal/shared/ParseConnectionString is unfortunately internal + if match := regexp.MustCompile(`AccountName=([^;]+)`).FindStringSubmatch(ar.config.AzureStorageConnectionString); len(match) == 2 { + return match[1] + } + if match := regexp.MustCompile(`BlobEndpoint=([^;]+)`).FindStringSubmatch(ar.config.AzureStorageConnectionString); len(match) == 2 { + if blobEndpoint, err := url.Parse(match[1]); blobEndpoint != nil && err != nil { + if blobEndpoint.Path != "" { + return blobEndpoint.Path + } + return regexp.MustCompile(`^[^.]+`).FindString(blobEndpoint.Host) + } + } + return "_unknown" +} + func (ar *AzureBlobInventoryReportDuReader) Read(previousRunDate time.Time) (time.Time, <-chan Row, <-chan error, error) { log.Print("finding newest inventory run") rulesRanByDate, err := ar.findRuns() diff --git a/internal/du/reader.go b/internal/du/reader.go index b8d26de..40df17d 100644 --- a/internal/du/reader.go +++ b/internal/du/reader.go @@ -21,4 +21,5 @@ type Row struct { type Reader interface { Read(previousRunDate time.Time) (runDate time.Time, rows <-chan Row, errs <-chan error, err error) TestConnection() error + GetStorageAccountName() string } diff --git a/internal/metrics/metrics_updater.go b/internal/metrics/metrics_updater.go index c2b906a..5bdf6ad 100644 --- a/internal/metrics/metrics_updater.go +++ b/internal/metrics/metrics_updater.go @@ -21,7 +21,7 @@ type Updater struct { } type Config struct { - MetricNamespace string `yaml:"metricNamespace" default:"pdok"` + MetricNamespace string `yaml:"metricNamespace" default:"azure"` MetricSubsystem string `yaml:"metricSubsystem" default:"storage"` Limit int `yaml:"limit" default:"1000"` } @@ -53,7 +53,7 @@ func NewUpdater(aggregator *agg.Aggregator, config Config) *Updater { lastRunDateMetric: promauto.NewGauge(prometheus.GaugeOpts{ Namespace: config.MetricNamespace, Subsystem: config.MetricSubsystem, - Name: "lastRunDateMetric", + Name: "last_run_date", }), } }