From 17c43cf0aa25346e72aa7d5f4d3ef69fb748a948 Mon Sep 17 00:00:00 2001 From: Paul Lorenz Date: Wed, 18 Sep 2024 12:20:33 -0400 Subject: [PATCH] Add anonymize db utility. Fixes #2413 --- ziti/cmd/database/anonymize-db.go | 715 ++++++++++++++++++++++++++++++ ziti/cmd/database/cmd.go | 1 + 2 files changed, 716 insertions(+) create mode 100644 ziti/cmd/database/anonymize-db.go diff --git a/ziti/cmd/database/anonymize-db.go b/ziti/cmd/database/anonymize-db.go new file mode 100644 index 000000000..d0d662c99 --- /dev/null +++ b/ziti/cmd/database/anonymize-db.go @@ -0,0 +1,715 @@ +package database + +import ( + "bytes" + "fmt" + "github.com/openziti/storage/boltz" + "github.com/openziti/ziti/controller/command" + "github.com/openziti/ziti/controller/db" + "github.com/spf13/cobra" + "go.etcd.io/bbolt" + "os" + "sort" + "strings" +) + +var attrCounter = 0 + +func NewAnonymizeAction() *cobra.Command { + action := anonymizeDbAction{ + mappings: map[string]map[string]string{ + "attributes": {}, + }, + } + + cmd := &cobra.Command{ + Use: "anonymize ", + Short: "This utility attempts to remove any personal information from the db. It does the following\n" + + "1. Renames all identities, services, edge-routers, policies and configs with generic names \n" + + "2. Makes all role attributes generic \n" + + "3. Replaces host.v1, host.v2 and intercept.v1 configs with generic versions\n" + + "4. Deletes all authenticators and enrollments\n\n" + + "WARNINGS:\n" + + "* There may be personal information in other database fields, as this doesn't cover every field in every type\n" + + "* This works in place on the provided database. Only run this utility on a COPY of your database", + Args: cobra.ExactArgs(1), + Run: action.run, + } + + cmd.Flags().BoolVar(&action.preserveAuthenticators, "preserve-authenticators", false, "do not delete all authenticators") + cmd.Flags().BoolVar(&action.preserveEnrollments, "preserve-enrollments", false, "do not delete all enrollments") + cmd.Flags().StringVarP(&action.mappingOutput, "mapping-output", "m", "", "output mapping file. If not specified, mapping will not be emitted") + return cmd +} + +type anonymizeDbAction struct { + preserveAuthenticators bool + preserveEnrollments bool + zitiDb boltz.Db + stores *db.Stores + mappings map[string]map[string]string + + identityDialSvcCounts map[string]int + identityBindSvcCounts map[string]int + identityErCounts map[string]int + + serviceDialIdsCounts map[string]int + serviceBindIdsCounts map[string]int + serviceErCounts map[string]int + + erServiceCounts map[string]int + erIdentityCounts map[string]int + + mappingOutput string +} + +func (self *anonymizeDbAction) run(_ *cobra.Command, args []string) { + dbFile := args[0] + + zitiDb, err := db.Open(dbFile) + if err != nil { + panic(err) + } + + defer func() { + if err = zitiDb.Close(); err != nil { + panic(err) + } + }() + + stores, err := db.InitStores(zitiDb, command.NoOpRateLimiter{}, nil) + if err != nil { + panic(err) + } + + self.zitiDb = zitiDb + self.stores = stores + + self.initValidation() + + self.anonymizeIdentities() + self.anonymizeServices() + self.anonymizeEdgeRouters() + self.anonymizeEdgeRouterPolicies() + self.anonymizeServiceEdgeRouterPolicies() + self.anonymizeServicePolicies() + self.validateEntityCounts() + + self.scrubConfigs() + + if !self.preserveAuthenticators { + self.scrubAuthenticators() + } + + if !self.preserveEnrollments { + self.scrubEnrollments() + } + self.outputMappings() +} + +func (self *anonymizeDbAction) outputMappings() { + if self.mappingOutput == "" { + return + } + + var types []string + for k := range self.mappings { + types = append(types, k) + } + sort.Strings(types) + + output := bytes.Buffer{} + + for _, t := range types { + m := self.mappings[t] + + var keys []string + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + v := m[k] + if _, err := fmt.Fprintf(&output, "%s\t%s\t%s\n", t, k, v); err != nil { + panic(err) + } + } + } + + if err := os.WriteFile(self.mappingOutput, output.Bytes(), 0644); err != nil { + panic(err) + } + + fmt.Printf("mappings written to %s\n", self.mappingOutput) +} + +func (self *anonymizeDbAction) initValidation() { + self.identityDialSvcCounts = map[string]int{} + self.identityBindSvcCounts = map[string]int{} + self.identityErCounts = map[string]int{} + + self.serviceDialIdsCounts = map[string]int{} + self.serviceBindIdsCounts = map[string]int{} + self.serviceErCounts = map[string]int{} + + self.erServiceCounts = map[string]int{} + self.erIdentityCounts = map[string]int{} + + // load entity relationship verification data + err := self.zitiDb.View(func(tx *bbolt.Tx) error { + ids, _, err := self.stores.Identity.QueryIds(tx, "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + entityCounter++ + self.identityDialSvcCounts[id] = getRelatedEntityCount(tx, self.stores.Identity, id, db.FieldIdentityDialServices) + self.identityBindSvcCounts[id] = getRelatedEntityCount(tx, self.stores.Identity, id, db.FieldIdentityBindServices) + self.identityErCounts[id] = getRelatedEntityCount(tx, self.stores.Identity, id, db.EntityTypeRouters) + } + + fmt.Printf("scanned stats for %d identities\n", entityCounter) + + ids, _, err = self.stores.EdgeService.QueryIds(tx, "true limit none") + if err != nil { + return err + } + + entityCounter = 0 + + for _, id := range ids { + entityCounter++ + self.serviceDialIdsCounts[id] = getRelatedEntityCount(tx, self.stores.EdgeService, id, db.FieldEdgeServiceDialIdentities) + self.serviceBindIdsCounts[id] = getRelatedEntityCount(tx, self.stores.EdgeService, id, db.FieldEdgeServiceBindIdentities) + self.serviceErCounts[id] = getRelatedEntityCount(tx, self.stores.EdgeService, id, db.FieldEdgeRouters) + } + + fmt.Printf("scanned stats for %d services\n", entityCounter) + + ids, _, err = self.stores.EdgeRouter.QueryIds(tx, "true limit none") + if err != nil { + return err + } + + entityCounter = 0 + + for _, id := range ids { + entityCounter++ + self.erServiceCounts[id] = getRelatedEntityCount(tx, self.stores.EdgeRouter, id, db.EntityTypeServices) + self.erIdentityCounts[id] = getRelatedEntityCount(tx, self.stores.EdgeRouter, id, db.EntityTypeIdentities) + } + + fmt.Printf("scanned stats for %d edge-routers\n", entityCounter) + + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) rename(entity boltz.NamedExtEntity, counter int) string { + m, ok := self.mappings[entity.GetEntityType()] + if !ok { + m = make(map[string]string) + self.mappings[entity.GetEntityType()] = m + } + newName := fmt.Sprintf("%s-%04d", self.stores.GetStoreForEntity(entity).GetSingularEntityType(), counter) + m[entity.GetName()] = newName + return newName +} + +func (self *anonymizeDbAction) anonymizeIdentities() { + // Update Identities + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.Identity.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + entityCounter++ + + entity, err := self.stores.Identity.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + entity.Name = self.rename(entity, entityCounter) + entity.Name = fmt.Sprintf("identity-%04d", entityCounter) + entity.RoleAttributes = self.mapAttr(entity.RoleAttributes) + if err = self.stores.Identity.Update(ctx, entity, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("processed identity %04d\n", entityCounter) + } + } + fmt.Printf("processed %04d identities\n", entityCounter) + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) anonymizeServices() { + // Update Services + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.EdgeService.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + entity, err := self.stores.EdgeService.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + + entityCounter++ + entity.Name = self.rename(entity, entityCounter) + entity.RoleAttributes = self.mapAttr(entity.RoleAttributes) + if err = self.stores.EdgeService.Update(ctx, entity, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("processed service %04d\n", entityCounter) + } + } + fmt.Printf("processed %04d services\n", entityCounter) + return nil + }) + + if err != nil { + panic(err) + } + +} + +func (self *anonymizeDbAction) anonymizeEdgeRouters() { + // Update Edge Routers + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.EdgeRouter.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + entity, err := self.stores.EdgeRouter.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + + entityCounter++ + entity.Name = self.rename(entity, entityCounter) + entity.RoleAttributes = self.mapAttr(entity.RoleAttributes) + if err = self.stores.EdgeRouter.Update(ctx, entity, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("processed edge-router %04d\n", entityCounter) + } + } + + fmt.Printf("processed %04d edge-routers\n", entityCounter) + + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) anonymizeEdgeRouterPolicies() { + // Update Edge Router Policies + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ctx = ctx.GetSystemContext() + ids, _, err := self.stores.EdgeRouterPolicy.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + policy, err := self.stores.EdgeRouterPolicy.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + + entityCounter++ + policy.IdentityRoles = self.mapRoles(policy.IdentityRoles) + policy.EdgeRouterRoles = self.mapRoles(policy.EdgeRouterRoles) + policy.Name = self.rename(policy, entityCounter) + + if err = self.stores.EdgeRouterPolicy.Update(ctx, policy, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("processed edge-router-policy %04d\n", entityCounter) + } + } + fmt.Printf("processed %04d edge-router-policies\n", entityCounter) + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) anonymizeServiceEdgeRouterPolicies() { + // Update Service Edge Router Policies + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.ServiceEdgeRouterPolicy.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + policy, err := self.stores.ServiceEdgeRouterPolicy.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + + entityCounter++ + policy.ServiceRoles = self.mapRoles(policy.ServiceRoles) + policy.EdgeRouterRoles = self.mapRoles(policy.EdgeRouterRoles) + policy.Name = self.rename(policy, entityCounter) + + if err = self.stores.ServiceEdgeRouterPolicy.Update(ctx, policy, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("processed service-edge-router-policy %04d\n", entityCounter) + } + fmt.Printf("processed %04d service-edge-router-policies\n", entityCounter) + } + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) anonymizeServicePolicies() { + // Update Service Policies + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.ServicePolicy.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + policy, err := self.stores.ServicePolicy.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + + entityCounter++ + policy.ServiceRoles = self.mapRoles(policy.ServiceRoles) + policy.IdentityRoles = self.mapRoles(policy.IdentityRoles) + policy.Name = self.rename(policy, entityCounter) + + if err = self.stores.ServicePolicy.Update(ctx, policy, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("processed service-policy %04d\n", entityCounter) + } + } + + fmt.Printf("processed %04d service-policies\n", entityCounter) + + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) validateEntityCounts() { + // Validate identity references + err := self.zitiDb.View(func(tx *bbolt.Tx) error { + ids, _, err := self.stores.Identity.QueryIds(tx, "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + for _, id := range ids { + entityCounter++ + validateRefCount(tx, self.stores.Identity, id, db.FieldIdentityDialServices, self.identityDialSvcCounts) + validateRefCount(tx, self.stores.Identity, id, db.FieldIdentityBindServices, self.identityBindSvcCounts) + validateRefCount(tx, self.stores.Identity, id, db.EntityTypeRouters, self.identityErCounts) + } + fmt.Printf("validated %04d identities\n", entityCounter) + return nil + }) + + if err != nil { + panic(err) + } + + // Validate edge router references + + err = self.zitiDb.View(func(tx *bbolt.Tx) error { + ids, _, err := self.stores.EdgeRouter.QueryIds(tx, "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + for _, id := range ids { + entityCounter++ + validateRefCount(tx, self.stores.EdgeRouter, id, db.EntityTypeServices, self.erServiceCounts) + validateRefCount(tx, self.stores.EdgeRouter, id, db.EntityTypeIdentities, self.erIdentityCounts) + } + fmt.Printf("validated %04d edge-routers\n", entityCounter) + return nil + }) + + if err != nil { + panic(err) + } + + // Validate service references + + err = self.zitiDb.View(func(tx *bbolt.Tx) error { + ids, _, err := self.stores.EdgeService.QueryIds(tx, "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + for _, id := range ids { + entityCounter++ + validateRefCount(tx, self.stores.EdgeService, id, db.FieldEdgeServiceDialIdentities, self.serviceDialIdsCounts) + validateRefCount(tx, self.stores.EdgeService, id, db.FieldEdgeServiceBindIdentities, self.serviceBindIdsCounts) + validateRefCount(tx, self.stores.EdgeService, id, db.FieldEdgeRouters, self.serviceErCounts) + } + fmt.Printf("validated %04d services\n", entityCounter) + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) scrubConfigs() { + hostV2 := map[string]interface{}{ + "terminators": []interface{}{ + map[string]interface{}{ + "address": "localhost", + "port": 8888, + "protocol": "tcp", + }, + }, + } + + hostV1 := map[string]interface{}{ + "address": "localhost", + "port": 8888, + "protocol": "tcp", + } + + interceptV1 := map[string]interface{}{ + "addresses": []interface{}{"echo.ziti"}, + "portRanges": []interface{}{ + map[string]interface{}{ + "low": 1234, + "high": 1234, + }, + }, + "protocols": []interface{}{ + "tcp", + "udp", + }, + } + + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.Config.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + entity, err := self.stores.Config.LoadById(ctx.Tx(), id) + if err != nil { + return err + } + + entityCounter++ + entity.Name = self.rename(entity, entityCounter) + if entity.Type == "NH5p4FpGR" { // host.v1 + entity.Data = hostV1 + } else if entity.Type == "host.v2" { + entity.Data = hostV2 + } else if entity.Type == "g7cIWbcGg" { // intercept.v1 + entity.Data = interceptV1 + } else { + return fmt.Errorf("unexpected config type: %s", entity.Type) + } + + if err = self.stores.Config.Update(ctx, entity, nil); err != nil { + return err + } + if entityCounter%100 == 0 { + fmt.Printf("updated config %04d\n", entityCounter) + } + } + + fmt.Printf("updated %04d configs\n", entityCounter) + + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) scrubAuthenticators() { + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.Authenticator.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + if err = self.stores.Authenticator.DeleteById(ctx, id); err != nil { + return err + } + + entityCounter++ + if entityCounter%100 == 0 { + fmt.Printf("deleted authenticator %04d\n", entityCounter) + } + } + + fmt.Printf("deleted %04d authenticators\n", entityCounter) + + return nil + }) + + if err != nil { + panic(err) + } +} + +func (self *anonymizeDbAction) scrubEnrollments() { + err := self.zitiDb.Update(nil, func(ctx boltz.MutateContext) error { + ids, _, err := self.stores.Enrollment.QueryIds(ctx.Tx(), "true limit none") + if err != nil { + return err + } + + entityCounter := 0 + + for _, id := range ids { + if err = self.stores.Enrollment.DeleteById(ctx, id); err != nil { + return err + } + + entityCounter++ + if entityCounter%100 == 0 { + fmt.Printf("deleted enrollment %04d\n", entityCounter) + } + } + + fmt.Printf("deleted %04d enrollments\n", entityCounter) + + return nil + }) + + if err != nil { + panic(err) + } +} + +func getRelatedEntityCount(tx *bbolt.Tx, store boltz.Store, id string, name string) int { + count := 0 + cursor := store.GetRelatedEntitiesCursor(tx, id, name, true) + for cursor.IsValid() { + count++ + cursor.Next() + } + return count +} + +func (self *anonymizeDbAction) mapRoles(roles []string) []string { + attrMap := self.mappings["attributes"] + var result []string + for _, attr := range roles { + if attr == "#all" || strings.HasPrefix(attr, "@") { + result = append(result, attr) + } else { + key := strings.TrimPrefix(attr, "#") + if newVal, found := attrMap[key]; found { + result = append(result, "#"+newVal) + } else { + attrCounter++ + newVal = fmt.Sprintf("attr%04d", attrCounter) + attrMap[attr] = newVal + result = append(result, "#"+newVal) + } + } + } + return result +} + +func (self *anonymizeDbAction) mapAttr(attrs []string) []string { + attrMap := self.mappings["attributes"] + + var result []string + + for _, attr := range attrs { + if newVal, ok := attrMap[attr]; ok { + result = append(result, newVal) + } else { + attrCounter++ + newVal = fmt.Sprintf("attr%04d", attrCounter) + attrMap[attr] = newVal + result = append(result, newVal) + } + } + return result +} + +func validateRefCount(tx *bbolt.Tx, store boltz.Store, id string, field string, m map[string]int) { + count := getRelatedEntityCount(tx, store, id, field) + + if _, ok := m[id]; !ok { + fmt.Printf("%s %s, old %s: NOT FOUND, new: %v\n", store.GetEntityType(), id, field, count) + os.Exit(1) + } + + if m[id] != count { + fmt.Printf("%s %s, old %s: %v, current: %v\n", store.GetEntityType(), id, field, m[id], count) + os.Exit(1) + } +} diff --git a/ziti/cmd/database/cmd.go b/ziti/cmd/database/cmd.go index 79bc89e2a..2aaf34659 100644 --- a/ziti/cmd/database/cmd.go +++ b/ziti/cmd/database/cmd.go @@ -26,6 +26,7 @@ func NewCmdDb(out io.Writer, errOut io.Writer) *cobra.Command { cmd.AddCommand(NewCompactAction()) cmd.AddCommand(NewDiskUsageAction()) cmd.AddCommand(NewAddDebugAdminAction()) + cmd.AddCommand(NewAnonymizeAction()) return cmd }