From 59671e1280de0d519b76532da6db20330aa01d60 Mon Sep 17 00:00:00 2001 From: Gavin Frazar Date: Fri, 31 Jan 2025 16:15:04 -0800 Subject: [PATCH] Fix database user auto-provisioning * Use a custom query to find user db privileges on tables to avoid the "grantor" filter condition in the information_schema.tables_privileges view. This fixes the cases where the grantor for a privilege is set to the table owner rather than the user (teleport) who issued the grant. Most notably, this happens when a superuser grants privileges on a table they do not own to a user. * Grant USAGE on schemas that contain tables where we intend to grant table privileges. This is necessary to use the table privileges we grant. * Wrap all remaining plpgsql procedure creation/calls with retries. * Add a db permissions e2e test for RDS * Expand e2e tests to test with and without a superuser db admin * Significantly speed up the RDS e2e tests by wrapping EventuallyWithT in a helper func that tries the condition func immediately rather than waiting for the first tick duration. --- e2e/aws/databases_test.go | 64 ++- e2e/aws/fixtures_test.go | 23 +- e2e/aws/main_test.go | 2 + e2e/aws/rds_test.go | 481 ++++++++++++------ e2e/aws/redshift_test.go | 12 +- lib/srv/db/common/permissions/calculation.go | 2 +- .../db/common/permissions/calculation_test.go | 4 +- .../db/postgres/sql/remove-permissions.sql | 55 +- .../db/postgres/sql/update-permissions.sql | 95 +++- lib/srv/db/postgres/users.go | 76 ++- 10 files changed, 568 insertions(+), 246 deletions(-) diff --git a/e2e/aws/databases_test.go b/e2e/aws/databases_test.go index fee21e7fe52f4..63843654e772c 100644 --- a/e2e/aws/databases_test.go +++ b/e2e/aws/databases_test.go @@ -129,12 +129,8 @@ const ( func postgresConnTest(t *testing.T, cluster *helpers.TeleInstance, user string, route tlsca.RouteToDatabase, query string) { t.Helper() var pgConn *pgconn.PgConn - // retry for a while, the database service might need time to give - // itself IAM rds:connect permissions. - require.EventuallyWithT(t, func(t *assert.CollectT) { + waitForDBConnection(t, func(ctx context.Context) error { var err error - ctx, cancel := context.WithTimeout(context.Background(), connRetryTick) - defer cancel() pgConn, err = postgres.MakeTestClient(ctx, common.TestClientConfig{ AuthClient: cluster.GetSiteAPI(cluster.Secrets.SiteName), AuthServer: cluster.Process.GetAuthServer(), @@ -143,10 +139,8 @@ func postgresConnTest(t *testing.T, cluster *helpers.TeleInstance, user string, Username: user, RouteToDatabase: route, }) - assert.NoError(t, err) - assert.NotNil(t, pgConn) - }, waitForConnTimeout, connRetryTick, "connecting to postgres") - + return err + }) execPGTestQuery(t, pgConn, query) } @@ -161,17 +155,11 @@ func postgresLocalProxyConnTest(t *testing.T, cluster *helpers.TeleInstance, use pgconnConfig.User = route.Username pgconnConfig.Database = route.Database var pgConn *pgconn.PgConn - // retry for a while, the database service might need time to give - // itself IAM rds:connect permissions. - require.EventuallyWithT(t, func(t *assert.CollectT) { + waitForDBConnection(t, func(ctx context.Context) error { var err error - ctx, cancel := context.WithTimeout(context.Background(), connRetryTick) - defer cancel() pgConn, err = pgconn.ConnectConfig(ctx, pgconnConfig) - assert.NoError(t, err) - assert.NotNil(t, pgConn) - }, waitForConnTimeout, connRetryTick, "connecting to postgres") - + return err + }) execPGTestQuery(t, pgConn, query) } @@ -204,13 +192,9 @@ func mysqlLocalProxyConnTest(t *testing.T, cluster *helpers.TeleInstance, user s lp := startLocalALPNProxy(t, user, cluster, route) var conn *mysqlclient.Conn - // retry for a while, the database service might need time to give - // itself IAM rds:connect permissions. - require.EventuallyWithT(t, func(t *assert.CollectT) { + waitForDBConnection(t, func(ctx context.Context) error { var err error var nd net.Dialer - ctx, cancel := context.WithTimeout(context.Background(), connRetryTick) - defer cancel() conn, err = mysqlclient.ConnectWithDialer(ctx, "tcp", lp.GetAddr(), route.Username, @@ -218,9 +202,8 @@ func mysqlLocalProxyConnTest(t *testing.T, cluster *helpers.TeleInstance, user s route.Database, nd.DialContext, ) - assert.NoError(t, err) - assert.NotNil(t, conn) - }, waitForConnTimeout, connRetryTick, "connecting to mysql") + return err + }) defer func() { // Disconnect. require.NoError(t, conn.Close()) @@ -413,6 +396,10 @@ func (c *pgConn) Exec(ctx context.Context, sql string, args ...interface{}) (pgc out, err = c.Conn.Exec(ctx, sql, args...) return trace.Wrap(err) }) + c.logger.InfoContext(ctx, "Executed sql statement", + "sql", sql, + "error", err, + ) return out, trace.Wrap(err) } @@ -473,3 +460,28 @@ func isRetryable(err error) bool { } return pgconn.SafeToRetry(err) } + +func waitForDBConnection(t *testing.T, connectFn func(context.Context) error) { + t.Helper() + // retry for a while, the database service might need time to give itself + // IAM permissions. + waitForSuccess(t, func() error { + ctx, cancel := context.WithTimeout(context.Background(), connRetryTick) + defer cancel() + return connectFn(ctx) + }, waitForConnTimeout, connRetryTick, "connecting to database") +} + +// waitForSuccess is a test helper that wraps require.EventuallyWithT but runs +// the given fn first to avoid waiting for the first timer tick. +func waitForSuccess(t *testing.T, fn func() error, waitDur, tick time.Duration, msgAndArgs ...any) { + t.Helper() + // EventuallyWithT waits for the first tick before it makes the first + // attempt, so to speed things up we check for fn success first. + if err := fn(); err == nil { + return + } + require.EventuallyWithT(t, func(t *assert.CollectT) { + assert.NoError(t, fn()) + }, waitDur, tick, msgAndArgs...) +} diff --git a/e2e/aws/fixtures_test.go b/e2e/aws/fixtures_test.go index 95373466c237e..568659df8b28c 100644 --- a/e2e/aws/fixtures_test.go +++ b/e2e/aws/fixtures_test.go @@ -159,7 +159,7 @@ func createTeleportCluster(t *testing.T, opts ...testOptionsFunc) *helpers.TeleI teleport.AddUserWithRole(name, roles...) } - tconf := newTeleportConfig(t) + tconf := newTeleportConfig() for _, optFn := range options.serviceConfigFuncs { optFn(tconf) } @@ -194,7 +194,7 @@ func newInstanceConfig(t *testing.T) helpers.InstanceConfig { } } -func newTeleportConfig(t *testing.T) *servicecfg.Config { +func newTeleportConfig() *servicecfg.Config { tconf := servicecfg.MakeDefaultConfig() // Replace the default auth and proxy listeners with the ones so we can // run multiple tests in parallel. @@ -207,13 +207,13 @@ func newTeleportConfig(t *testing.T) *servicecfg.Config { // withUserRole creates a new role that will be bootstraped and then granted to // the Teleport user under test. -func withUserRole(t *testing.T, user, name string, spec types.RoleSpecV6) testOptionsFunc { +func withUserRole(t *testing.T, userName, roleName string, spec types.RoleSpecV6) testOptionsFunc { t.Helper() // Create a new role with full access to all databases. - role, err := types.NewRole(name, spec) + role, err := types.NewRole(roleName, spec) require.NoError(t, err) return func(options *testOptions) { - options.userRoles[user] = append(options.userRoles[user], role) + options.userRoles[userName] = append(options.userRoles[userName], role) } } @@ -291,3 +291,16 @@ func makeAutoUserDropRoleSpec(roles ...string) types.RoleSpecV6 { spec.Options.CreateDatabaseUserMode = types.CreateDatabaseUserMode_DB_USER_MODE_BEST_EFFORT_DROP return spec } + +func makeAutoUserDBPermissions(dbPermissions ...types.DatabasePermission) types.RoleSpecV6 { + return types.RoleSpecV6{ + Allow: types.RoleConditions{ + DatabaseLabels: types.Labels{types.Wildcard: []string{types.Wildcard}}, + DatabaseNames: []string{types.Wildcard}, + DatabasePermissions: dbPermissions, + }, + Options: types.RoleOptions{ + CreateDatabaseUserMode: types.CreateDatabaseUserMode_DB_USER_MODE_KEEP, + }, + } +} diff --git a/e2e/aws/main_test.go b/e2e/aws/main_test.go index 3efa1f1878141..ee9df7dfa21f1 100644 --- a/e2e/aws/main_test.go +++ b/e2e/aws/main_test.go @@ -23,6 +23,7 @@ import ( "github.com/gravitational/teleport/integration/helpers" "github.com/gravitational/teleport/lib" + "github.com/gravitational/teleport/lib/utils" ) const ( @@ -140,6 +141,7 @@ const ( // TestMain will re-execute Teleport to run a command if "exec" is passed to // it as an argument. Otherwise, it will run tests as normal. func TestMain(m *testing.M) { + utils.InitLoggerForTests() // agents connect over a reverse tunnel to proxy, so we use insecure mode. lib.SetInsecureDevMode(true) helpers.TestMainImplementation(m) diff --git a/e2e/aws/rds_test.go b/e2e/aws/rds_test.go index d536d80a2d183..22519849ff8fd 100644 --- a/e2e/aws/rds_test.go +++ b/e2e/aws/rds_test.go @@ -30,6 +30,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/rds" mysqlclient "github.com/go-mysql-org/go-mysql/client" "github.com/go-mysql-org/go-mysql/mysql" + "github.com/gravitational/trace" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -79,14 +80,34 @@ func testRDS(t *testing.T) { // use random names so we can test auto provisioning these users with these // roles via Teleport, without tests colliding with eachother across // parallel test runs. - autoUserKeep := "auto_keep_" + randASCII(t, 6) - autoUserDrop := "auto_drop_" + randASCII(t, 6) - autoRole1 := "auto_role1_" + randASCII(t, 6) - autoRole2 := "auto_role2_" + randASCII(t, 6) + autoUserFineGrain := "auto_fine_grain_" + randASCII(t) + autoUserKeep := "auto_keep_" + randASCII(t) + autoUserDrop := "auto_drop_" + randASCII(t) + autoRole1 := "auto_granted_role1_" + randASCII(t) + autoRole2 := "auto_granted_role2_" + randASCII(t) + + testSchema := "test_" + randASCII(t) accessRole := mustGetEnv(t, rdsAccessRoleARNEnv) discoveryRole := mustGetEnv(t, rdsDiscoveryRoleARNEnv) opts := []testOptionsFunc{ + withUserRole(t, autoUserFineGrain, "db-auto-user-fine-grain", makeAutoUserDBPermissions( + types.DatabasePermission{ + Permissions: []string{"SELECT"}, + Match: types.Labels{ + "object_kind": {"table"}, + "schema": {"public", testSchema, "information_schema"}, + }, + }, + types.DatabasePermission{ + Permissions: []string{"SELECT"}, + Match: types.Labels{ + "object_kind": {"table"}, + "schema": {"pg_catalog"}, + "name": {"pg_range", "pg_proc"}, + }, + }, + )), withUserRole(t, autoUserKeep, "db-auto-user-keeper", makeAutoUserKeepRoleSpec(autoRole1, autoRole2)), withUserRole(t, autoUserDrop, "db-auto-user-dropper", makeAutoUserDropRoleSpec(autoRole1, autoRole2)), } @@ -100,105 +121,157 @@ func testRDS(t *testing.T) { waitForDatabases(t, cluster.Process, pgDBName) db, err := cluster.Process.GetAuthServer().GetDatabase(ctx, pgDBName) require.NoError(t, err) - adminUser := mustGetDBAdmin(t, db) - - conn := connectAsRDSPostgresAdmin(t, ctx, db.GetAWS().RDS.InstanceID) - provisionRDSPostgresAutoUsersAdmin(t, ctx, conn, adminUser.Name) + // make sure we have set the db admin from labels + _ = mustGetDBAdmin(t, db) - // create a new schema with tables that can only be accessed if the - // auto roles are granted by Teleport automatically. - testSchema := "test_" + randASCII(t, 4) - _, err = conn.Exec(ctx, fmt.Sprintf("CREATE SCHEMA %q", testSchema)) + // provision new databases with new db admin to have distinct admin names in concurrent test runs. + // db1 admin *will not* be a Postgres superuser + db1 := cloneDBWithNewAdmin(t, db, &types.DatabaseAdminUser{ + Name: "admin_" + randASCII(t), + }) + require.NoError(t, cluster.Process.GetAuthServer().CreateDatabase(ctx, db1)) + // db2 admin *will* be a Postgres superuser + db2 := cloneDBWithNewAdmin(t, db, &types.DatabaseAdminUser{ + Name: "su_admin_" + randASCII(t), + }) + require.NoError(t, cluster.Process.GetAuthServer().CreateDatabase(ctx, db2)) + waitForDatabases(t, cluster.Process, db1.GetName(), db2.GetName()) + db1, err = cluster.Process.GetAuthServer().GetDatabase(ctx, db1.GetName()) require.NoError(t, err) - testTable := "ctf" // capture the flag :) - _, err = conn.Exec(ctx, fmt.Sprintf("CREATE TABLE %q.%q ()", testSchema, testTable)) + db2, err = cluster.Process.GetAuthServer().GetDatabase(ctx, db2.GetName()) require.NoError(t, err) + conn := connectAsRDSPostgresAdmin(t, ctx, db.GetAWS().RDS.InstanceID) + + // these users will be auto-created by Teleport, so make sure we clean + // them up after the test. + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP ROLE IF EXISTS %q", autoUserKeep)) + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP ROLE IF EXISTS %q", autoUserDrop)) + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP ROLE IF EXISTS %q", autoUserFineGrain)) + // create the roles that Teleport will auto assign. - // role 1 only allows usage of the test schema. - // role 2 only allows select of the test table in the test schema. - // a user needs to have both roles to select from the test table. - _, err = conn.Exec(ctx, fmt.Sprintf("CREATE ROLE %q", autoRole1)) - require.NoError(t, err) - _, err = conn.Exec(ctx, fmt.Sprintf("CREATE ROLE %q", autoRole2)) - require.NoError(t, err) - _, err = conn.Exec(ctx, fmt.Sprintf("GRANT USAGE ON SCHEMA %q TO %q", testSchema, autoRole1)) - require.NoError(t, err) - _, err = conn.Exec(ctx, fmt.Sprintf("GRANT SELECT ON %q.%q TO %q", testSchema, testTable, autoRole2)) - require.NoError(t, err) - autoRolesQuery := fmt.Sprintf("select 1 from %q.%q", testSchema, testTable) + for _, r := range [...]string{autoRole1, autoRole2} { + createPGTestRole(t, ctx, conn, r) + } - t.Cleanup(func() { - // users/roles can only be dropped after we drop the schema+table. - // So, rather than juggling the order of drops, just attempt to drop - // everything as part of test cleanup, regardless of what the test - // actually created successfully. - for _, stmt := range []string{ - fmt.Sprintf("DROP SCHEMA %q CASCADE", testSchema), - fmt.Sprintf("DROP ROLE IF EXISTS %q", autoRole1), - fmt.Sprintf("DROP ROLE IF EXISTS %q", autoRole2), - fmt.Sprintf("DROP USER IF EXISTS %q", autoUserKeep), - fmt.Sprintf("DROP USER IF EXISTS %q", autoUserDrop), - } { - _, err := conn.Exec(ctx, stmt) - assert.NoError(t, err, "test cleanup failed, stmt=%q", stmt) - } - }) + // create a new schema with tables that can only be accessed if the + // auto roles are granted by Teleport automatically. + createPGTestSchema(t, ctx, conn, testSchema) + testTable := "ctf" + randASCII(t) // capture the flag :) + createPGTestTable(t, ctx, conn, testSchema, testTable) + createPGTestTable(t, ctx, conn, "public", testTable) + + // provision db1 admin that is not a postgres superuser + createPGTestUser(t, ctx, conn, db1.GetAdminUser().Name) + pgMustExec(t, ctx, conn, fmt.Sprintf("ALTER USER %q WITH CREATEROLE", db1.GetAdminUser().Name)) + pgMustExec(t, ctx, conn, fmt.Sprintf("GRANT rds_iam TO %q WITH ADMIN OPTION", db1.GetAdminUser().Name)) + pgMustExec(t, ctx, conn, fmt.Sprintf("GRANT USAGE ON SCHEMA public, information_schema, %q TO %q WITH GRANT OPTION", testSchema, db1.GetAdminUser().Name)) + cleanupDB(t, ctx, conn, fmt.Sprintf("REVOKE USAGE ON SCHEMA public, information_schema, %q FROM %q", testSchema, db1.GetAdminUser().Name)) + pgMustExec(t, ctx, conn, fmt.Sprintf("GRANT ALL ON ALL TABLES IN SCHEMA public, information_schema, %q TO %q WITH GRANT OPTION", testSchema, db1.GetAdminUser().Name)) + cleanupDB(t, ctx, conn, fmt.Sprintf("REVOKE ALL ON ALL TABLES IN SCHEMA public, information_schema, %q FROM %q", testSchema, db1.GetAdminUser().Name)) + + // provision db2 admin that IS a postgres super user + createPGTestUser(t, ctx, conn, db2.GetAdminUser().Name) + // granting rds_superuser is as close as we can get to a superuser in RDS Postgres + pgMustExec(t, ctx, conn, fmt.Sprintf("GRANT rds_iam, rds_superuser TO %q", db2.GetAdminUser().Name)) + + // auto role 1 only allows usage of the test schema. + // auto role 2 only allows select of the test table in the test schema. + // a user needs to have both roles to select from the test table. + pgMustExec(t, ctx, conn, fmt.Sprintf("GRANT USAGE ON SCHEMA %q TO %q", testSchema, autoRole1)) + pgMustExec(t, ctx, conn, fmt.Sprintf("GRANT SELECT ON %q.%q TO %q", testSchema, testTable, autoRole2)) + autoRolesQuery := fmt.Sprintf("select 1 from %q.%q", testSchema, testTable) var pgxConnMu sync.Mutex - for name, test := range map[string]struct { - user string - dbUser string - query string - afterConnTestFn func(t *testing.T) + for _, test := range []struct { + name string + db types.Database }{ - "existing user": { - user: hostUser, - dbUser: adminUser.Name, // admin user already has RDS IAM auth - query: "select 1", - }, - "auto user keep": { - user: autoUserKeep, - dbUser: autoUserKeep, - query: autoRolesQuery, - afterConnTestFn: func(t *testing.T) { - pgxConnMu.Lock() - defer pgxConnMu.Unlock() - waitForPostgresAutoUserDeactivate(t, ctx, conn, autoUserKeep) - }, + { + name: "non superuser db admin", + db: db1, }, - "auto user drop": { - user: autoUserDrop, - dbUser: autoUserDrop, - query: autoRolesQuery, - afterConnTestFn: func(t *testing.T) { - pgxConnMu.Lock() - defer pgxConnMu.Unlock() - waitForPostgresAutoUserDrop(t, ctx, conn, autoUserDrop) - }, + { + name: "superuser db admin", + db: db2, }, } { - test := test - t.Run(name, func(t *testing.T) { - t.Parallel() - t.Run("connect", func(t *testing.T) { - route := tlsca.RouteToDatabase{ - ServiceName: db.GetName(), - Protocol: defaults.ProtocolPostgres, - Username: test.dbUser, - Database: "postgres", - } - t.Run("via proxy", func(t *testing.T) { + t.Run(test.name, func(t *testing.T) { + db := test.db + for name, test := range map[string]struct { + user string + dbUser string + query string + afterConnTestFn func(t *testing.T) + }{ + "existing user": { + user: hostUser, + dbUser: db.GetAdminUser().Name, // admin user already has RDS IAM auth + query: "select 1", + }, + "auto user keep": { + user: autoUserKeep, + dbUser: autoUserKeep, + query: autoRolesQuery, + afterConnTestFn: func(t *testing.T) { + pgxConnMu.Lock() + defer pgxConnMu.Unlock() + waitForPostgresAutoUserDeactivate(t, ctx, conn, autoUserKeep) + }, + }, + "auto user drop": { + user: autoUserDrop, + dbUser: autoUserDrop, + query: autoRolesQuery, + afterConnTestFn: func(t *testing.T) { + pgxConnMu.Lock() + defer pgxConnMu.Unlock() + waitForPostgresAutoUserDrop(t, ctx, conn, autoUserDrop) + }, + }, + "db permissions": { + user: autoUserFineGrain, + dbUser: autoUserFineGrain, + query: fmt.Sprintf(` + SELECT + 1 + FROM + pg_catalog.pg_range, + pg_catalog.pg_proc, + information_schema.sql_parts, + public.%q, + %q.%q + `, testTable, testSchema, testTable), + afterConnTestFn: func(t *testing.T) { + pgxConnMu.Lock() + defer pgxConnMu.Unlock() + waitForPostgresAutoUserPermissionsRemoved(t, ctx, conn, autoUserDrop) + }, + }, + } { + test := test + t.Run(name, func(t *testing.T) { t.Parallel() - postgresConnTest(t, cluster, test.user, route, test.query) + t.Run("connect", func(t *testing.T) { + route := tlsca.RouteToDatabase{ + ServiceName: db.GetName(), + Protocol: defaults.ProtocolPostgres, + Username: test.dbUser, + Database: "postgres", + } + t.Run("via proxy", func(t *testing.T) { + t.Parallel() + postgresConnTest(t, cluster, test.user, route, test.query) + }) + t.Run("via local proxy", func(t *testing.T) { + t.Parallel() + postgresLocalProxyConnTest(t, cluster, test.user, route, test.query) + }) + }) + if test.afterConnTestFn != nil { + test.afterConnTestFn(t) + } }) - t.Run("via local proxy", func(t *testing.T) { - t.Parallel() - postgresLocalProxyConnTest(t, cluster, test.user, route, test.query) - }) - }) - if test.afterConnTestFn != nil { - test.afterConnTestFn(t) } }) } @@ -218,10 +291,10 @@ func testRDS(t *testing.T) { provisionRDSMySQLAutoUsersAdmin(t, conn, adminUser.Name) // create a couple test tables to test role assignment with. - testTable1 := "teleport.test_" + randASCII(t, 4) + testTable1 := "teleport.test_" + randASCII(t) _, err = conn.Execute(fmt.Sprintf("CREATE TABLE %s (x int)", testTable1)) require.NoError(t, err) - testTable2 := "teleport.test_" + randASCII(t, 4) + testTable2 := "teleport.test_" + randASCII(t) _, err = conn.Execute(fmt.Sprintf("CREATE TABLE %s (x int)", testTable2)) require.NoError(t, err) @@ -327,13 +400,13 @@ func testRDS(t *testing.T) { provisionMariaDBAdminUser(t, conn, adminUser.Name) // create a couple test tables to test role assignment with. - testTable1 := "teleport.test_" + randASCII(t, 4) + testTable1 := "teleport.test_" + randASCII(t) _, err = conn.Execute(fmt.Sprintf("CREATE TABLE %s (x int)", testTable1)) require.NoError(t, err) t.Cleanup(func() { _, _ = conn.Execute(fmt.Sprintf("DROP TABLE %s", testTable1)) }) - testTable2 := "teleport.test_" + randASCII(t, 4) + testTable2 := "teleport.test_" + randASCII(t) _, err = conn.Execute(fmt.Sprintf("CREATE TABLE %s (x int)", testTable2)) require.NoError(t, err) t.Cleanup(func() { @@ -506,29 +579,6 @@ func getRDSAdminInfo(t *testing.T, ctx context.Context, instanceID string) dbUse } } -// provisionRDSPostgresAutoUsersAdmin provisions an admin user suitable for auto-user -// provisioning. -func provisionRDSPostgresAutoUsersAdmin(t *testing.T, ctx context.Context, conn *pgConn, adminUser string) { - t.Helper() - // Create the admin user and grant rds_iam so Teleport can auth - // with IAM as an existing user. - // Also needed so the auto-user admin can auto-provision others. - // If the admin already exists, ignore errors - there's only - // one admin because the admin has to own all the functions - // we provision and creating a different admin for each test - // is not necessary. - // Don't cleanup the db admin after, because test runs would interfere - // with each other. - _, err := conn.Exec(ctx, fmt.Sprintf("CREATE USER %q WITH login createrole", adminUser)) - if err != nil { - require.ErrorContains(t, err, "already exists") - } - _, err = conn.Exec(ctx, fmt.Sprintf("GRANT rds_iam TO %q WITH ADMIN OPTION", adminUser)) - if err != nil { - require.ErrorContains(t, err, "already a member") - } -} - // provisionRDSMySQLAutoUsersAdmin provisions an admin user suitable for auto-user // provisioning. func provisionRDSMySQLAutoUsersAdmin(t *testing.T, conn *mySQLConn, adminUser string) { @@ -584,9 +634,10 @@ func provisionMariaDBAdminUser(t *testing.T, conn *mySQLConn, adminUser string) } // randASCII is a helper func that returns a random string of ascii characters. -func randASCII(t *testing.T, length int) string { +func randASCII(t *testing.T) string { t.Helper() - out, err := utils.CryptoRandomHex(length / 2) + const charLen = 8 + out, err := utils.CryptoRandomHex(charLen / 2) require.NoError(t, err) return out } @@ -599,9 +650,47 @@ const ( autoUserWaitStep = 10 * time.Second ) +func waitForPostgresAutoUserPermissionsRemoved(t *testing.T, ctx context.Context, conn *pgConn, user string) { + t.Helper() + waitForSuccess(t, func() error { + rows, _ := conn.Query(ctx, ` +SELECT DISTINCT + pg_namespace.nspname AS table_schema, + obj.relname AS table_name, + acl.privilege_type AS privilege_type +FROM + pg_class as obj +INNER JOIN + pg_namespace ON obj.relnamespace = pg_namespace.oid +INNER JOIN LATERAL + aclexplode(COALESCE(obj.relacl, acldefault('r'::"char", obj.relowner))) AS acl ON true +INNER JOIN + pg_roles AS grantee ON acl.grantee = grantee.oid +WHERE + (obj.relkind = ANY (ARRAY['r', 'v', 'f', 'p'])) + AND (acl.privilege_type = ANY (ARRAY['DELETE'::text, 'INSERT'::text, 'REFERENCES'::text, 'SELECT'::text, 'TRUNCATE'::text, 'TRIGGER'::text, 'UPDATE'::text])) + AND grantee.rolname = $1 +`, user) + var privs []string + for rows.Next() { + for _, v := range rows.RawValues() { + privs = append(privs, string(v)) + } + } + rows.Close() + if err := rows.Err(); err != nil { + return trace.Wrap(err) + } + if len(privs) > 0 { + return trace.Errorf("user %q db permissions %s should have been revoked after disconnecting", user, privs) + } + return nil + }, autoUserWaitDur, autoUserWaitStep, "waiting for auto user %q permissions to be removed", user) +} + func waitForPostgresAutoUserDeactivate(t *testing.T, ctx context.Context, conn *pgConn, user string) { t.Helper() - require.EventuallyWithT(t, func(c *assert.CollectT) { + waitForSuccess(t, func() error { // `Query` documents that it is always safe to attempt to read from the // returned rows even if an error is returned. // It also documents that the same error will be in rows.Err() and @@ -611,38 +700,39 @@ func waitForPostgresAutoUserDeactivate(t *testing.T, ctx context.Context, conn * rows, _ := conn.Query(ctx, "SELECT 1 FROM pg_roles WHERE rolname=$1", user) gotRow := rows.Next() rows.Close() - if !assert.NoError(c, rows.Err()) { - return + if err := rows.Err(); err != nil { + return trace.Wrap(err) } - if !assert.True(c, gotRow, "user %q should not have been dropped after disconnecting", user) { - return + if !gotRow { + return trace.Errorf("user %q should not have been dropped after disconnecting", user) } rows, _ = conn.Query(ctx, "SELECT 1 FROM pg_roles WHERE rolname = $1 AND rolcanlogin = false", user) gotRow = rows.Next() rows.Close() - if !assert.NoError(c, rows.Err()) { - return + if err := rows.Err(); err != nil { + return trace.Wrap(err) } - if !assert.True(c, gotRow, "user %q should not be able to login after deactivating", user) { - return + if !gotRow { + return trace.Errorf("user %q should not be able to login after deactivating", user) } rows, _ = conn.Query(ctx, "SELECT 1 FROM pg_roles AS a WHERE pg_has_role($1, a.oid, 'member') AND a.rolname NOT IN ($1, 'teleport-auto-user')", user) gotRow = rows.Next() rows.Close() - if !assert.NoError(c, rows.Err()) { - return + if err := rows.Err(); err != nil { + return trace.Wrap(err) } - if !assert.False(c, gotRow, "user %q should have lost all additional roles after deactivating", user) { - return + if gotRow { + return trace.Errorf("user %q should have lost all additional roles after deactivating", user) } + return nil }, autoUserWaitDur, autoUserWaitStep, "waiting for auto user %q to be deactivated", user) } func waitForPostgresAutoUserDrop(t *testing.T, ctx context.Context, conn *pgConn, user string) { t.Helper() - require.EventuallyWithT(t, func(c *assert.CollectT) { + waitForSuccess(t, func() error { // `Query` documents that it is always safe to attempt to read from the // returned rows even if an error is returned. // It also documents that the same error will be in rows.Err() and @@ -652,92 +742,100 @@ func waitForPostgresAutoUserDrop(t *testing.T, ctx context.Context, conn *pgConn rows, _ := conn.Query(ctx, "SELECT 1 FROM pg_roles WHERE rolname=$1", user) gotRow := rows.Next() rows.Close() - if !assert.NoError(c, rows.Err()) { - return + if err := rows.Err(); err != nil { + return trace.Wrap(err) + } + if gotRow { + return trace.Errorf("user %q should have been dropped automatically after disconnecting", user) } - assert.False(c, gotRow, "user %q should have been dropped automatically after disconnecting", user) + return nil }, autoUserWaitDur, autoUserWaitStep, "waiting for auto user %q to be dropped", user) } func waitForMySQLAutoUserDeactivate(t *testing.T, conn *mySQLConn, user string) { t.Helper() - require.EventuallyWithT(t, func(c *assert.CollectT) { + waitForSuccess(t, func() error { result, err := conn.Execute("SELECT 1 FROM mysql.user AS u WHERE u.user = ?", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - if !assert.Equal(c, 1, result.RowNumber(), "user %q should not have been dropped after disconnecting", user) { + if result.RowNumber() != 1 { result.Close() - return + return trace.Errorf("user %q should not have been dropped after disconnecting", user) } result.Close() result, err = conn.Execute("SELECT 1 FROM mysql.user AS u WHERE u.user = ? AND u.account_locked = 'Y'", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - if !assert.Equal(c, 1, result.RowNumber(), "user %q should not be able to login after deactivating", user) { + if result.RowNumber() != 1 { result.Close() - return + return trace.Errorf("user %q should not be able to login after deactivating", user) } result.Close() result, err = conn.Execute("SELECT 1 FROM mysql.role_edges AS u WHERE u.to_user = ? AND u.from_user != 'teleport-auto-user'", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - if !assert.Equal(c, 0, result.RowNumber(), "user %q should have lost all additional roles after deactivating", user) { + if result.RowNumber() != 0 { result.Close() - return + return trace.Errorf("user %q should have lost all additional roles after deactivating", user) } result.Close() + return nil }, autoUserWaitDur, autoUserWaitStep, "waiting for auto user %q to be deactivated", user) } func waitForMySQLAutoUserDrop(t *testing.T, conn *mySQLConn, user string) { t.Helper() - require.EventuallyWithT(t, func(c *assert.CollectT) { + waitForSuccess(t, func() error { result, err := conn.Execute("SELECT 1 FROM mysql.user AS u WHERE u.user = ?", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - assert.Equal(c, 0, result.RowNumber(), "user %q should have been dropped automatically after disconnecting", user) - result.Close() + defer result.Close() + if result.RowNumber() != 0 { + return trace.Errorf("user %q should have been dropped automatically after disconnecting", user) + } + return nil }, autoUserWaitDur, autoUserWaitStep, "waiting for auto user %q to be dropped", user) } func waitForMariaDBAutoUserDeactivate(t *testing.T, conn *mySQLConn, user string) { t.Helper() - require.EventuallyWithT(t, func(c *assert.CollectT) { + waitForSuccess(t, func() error { result, err := conn.Execute("SELECT 1 FROM mysql.user AS u WHERE u.user = ?", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - if !assert.Equal(c, 1, result.RowNumber(), "user %q should not have been dropped after disconnecting", user) { + if result.RowNumber() != 1 { result.Close() - return + return trace.Errorf("user %q should not have been dropped after disconnecting", user) } result.Close() result, err = conn.Execute("SELECT 1 FROM mysql.global_priv AS u WHERE u.user = ? AND JSON_EXTRACT(u.priv, '$.account_locked') = true", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - if !assert.Equal(c, 1, result.RowNumber(), "user %q should not be able to login after deactivating", user) { + if result.RowNumber() != 1 { result.Close() - return + return trace.Errorf("user %q should not be able to login after deactivating", user) } result.Close() result, err = conn.Execute("SELECT 1 FROM mysql.roles_mapping AS u WHERE u.user = ? AND u.role != 'teleport-auto-user' AND u.ADMIN_OPTION='N'", user) - if !assert.NoError(c, err) { - return + if err != nil { + return trace.Wrap(err) } - if !assert.Equal(c, 0, result.RowNumber(), "user %q should have lost all additional roles after deactivating", user) { + if result.RowNumber() != 0 { result.Close() - return + return trace.Errorf("user %q should have lost all additional roles after deactivating", user) } result.Close() + return nil }, autoUserWaitDur, autoUserWaitStep, "waiting for auto user %q to be deactivated", user) } @@ -746,3 +844,54 @@ func waitForMariaDBAutoUserDrop(t *testing.T, conn *mySQLConn, user string) { // run the same tests as mysql to check if the user was dropped. waitForMySQLAutoUserDrop(t, conn, user) } + +func pgMustExec(t *testing.T, ctx context.Context, conn *pgConn, statement string) { + t.Helper() + _, err := conn.Exec(ctx, statement) + require.NoError(t, err) +} + +func createPGTestTable(t *testing.T, ctx context.Context, conn *pgConn, schemaName, tableName string) { + t.Helper() + pgMustExec(t, ctx, conn, fmt.Sprintf("CREATE TABLE %q.%q ()", schemaName, tableName)) + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP TABLE IF EXISTS %q.%q", schemaName, tableName)) +} + +func createPGTestSchema(t *testing.T, ctx context.Context, conn *pgConn, schemaName string) { + t.Helper() + pgMustExec(t, ctx, conn, fmt.Sprintf("CREATE SCHEMA %q", schemaName)) + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP SCHEMA IF EXISTS %q CASCADE", schemaName)) +} + +func createPGTestUser(t *testing.T, ctx context.Context, conn *pgConn, userName string) { + t.Helper() + pgMustExec(t, ctx, conn, fmt.Sprintf("CREATE USER %q", userName)) + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP USER IF EXISTS %q", userName)) +} + +func createPGTestRole(t *testing.T, ctx context.Context, conn *pgConn, roleName string) { + t.Helper() + pgMustExec(t, ctx, conn, fmt.Sprintf("CREATE ROLE %q", roleName)) + cleanupDB(t, ctx, conn, fmt.Sprintf("DROP ROLE IF EXISTS %q", roleName)) +} + +func cleanupDB(t *testing.T, ctx context.Context, conn *pgConn, statement string) { + t.Helper() + t.Cleanup(func() { + _, err := conn.Exec(ctx, statement) + assert.NoError(t, err, "failed to cleanup test resource with %s", statement) + }) +} + +func cloneDBWithNewAdmin(t *testing.T, db types.Database, admin *types.DatabaseAdminUser) types.Database { + t.Helper() + clone := db.Copy() + clone.SetName("db-" + randASCII(t)) + clone.SetOrigin(types.OriginDynamic) + clone.Spec.AdminUser = admin + // sanity check + dbAdmin := mustGetDBAdmin(t, clone) + require.Equal(t, clone.Spec.AdminUser.Name, dbAdmin.Name) + require.Equal(t, clone.Spec.AdminUser.DefaultDatabase, dbAdmin.DefaultDatabase) + return clone +} diff --git a/e2e/aws/redshift_test.go b/e2e/aws/redshift_test.go index c8e9bbf418c20..443973bfee0ce 100644 --- a/e2e/aws/redshift_test.go +++ b/e2e/aws/redshift_test.go @@ -70,10 +70,10 @@ func testRedshiftCluster(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) t.Cleanup(cancel) - autoUserKeep := "auto_keep_" + randASCII(t, 6) - autoUserDrop := "auto_drop_" + randASCII(t, 6) - autoRole1 := "auto_role1_" + randASCII(t, 6) - autoRole2 := "auto_role2_" + randASCII(t, 6) + autoUserKeep := "auto_keep_" + randASCII(t) + autoUserDrop := "auto_drop_" + randASCII(t) + autoRole1 := "auto_role1_" + randASCII(t) + autoRole2 := "auto_role2_" + randASCII(t) opts := []testOptionsFunc{ withUserRole(t, autoUserKeep, "db-auto-user-keeper", makeAutoUserKeepRoleSpec(autoRole1, autoRole2)), withUserRole(t, autoUserDrop, "db-auto-user-dropper", makeAutoUserDropRoleSpec(autoRole1, autoRole2)), @@ -94,7 +94,7 @@ func testRedshiftCluster(t *testing.T) { // schema in its search_path to prevent tests from interfering with // eachother. labels := db.GetStaticLabels() - labels[types.DatabaseAdminLabel] = "test_admin_" + randASCII(t, 6) + labels[types.DatabaseAdminLabel] = "test_admin_" + randASCII(t) err = cluster.Process.GetAuthServer().UpdateDatabase(ctx, db) require.NoError(t, err) adminUser := mustGetDBAdmin(t, db) @@ -104,7 +104,7 @@ func testRedshiftCluster(t *testing.T) { // create a new schema with tables that can only be accessed if the // auto roles are granted by Teleport automatically. - testSchema := "test_" + randASCII(t, 8) + testSchema := "test_" + randASCII(t) _, err = conn.Exec(ctx, fmt.Sprintf("CREATE SCHEMA %q", testSchema)) require.NoError(t, err) // now the admin will install its procedures in the test schema. diff --git a/lib/srv/db/common/permissions/calculation.go b/lib/srv/db/common/permissions/calculation.go index 96746fa9ca13f..89fcbfaacd5f8 100644 --- a/lib/srv/db/common/permissions/calculation.go +++ b/lib/srv/db/common/permissions/calculation.go @@ -71,7 +71,7 @@ func SummarizePermissions(perms PermissionSet) (string, []events.DatabasePermiss for _, perm := range permNames { objects := perms[perm] countText, countMap := CountObjectKinds(objects) - fragments = append(fragments, fmt.Sprintf("%q: %v objects (%v)", perm, len(objects), countText)) + fragments = append(fragments, fmt.Sprintf("%s: %d objects (%v)", perm, len(objects), countText)) eventData[perm] = countMap } diff --git a/lib/srv/db/common/permissions/calculation_test.go b/lib/srv/db/common/permissions/calculation_test.go index fe88f6c8495a8..4ebe505fccef0 100644 --- a/lib/srv/db/common/permissions/calculation_test.go +++ b/lib/srv/db/common/permissions/calculation_test.go @@ -91,7 +91,7 @@ func TestCalculatePermissions(t *testing.T) { mkDatabaseObject("bar", map[string]string{"kind": "schema"}), }, }, - summary: "\"DELETE\": 1 objects (table:1), \"SELECT\": 1 objects (table:1)", + summary: "DELETE: 1 objects (table:1), SELECT: 1 objects (table:1)", details: []events.DatabasePermissionEntry{ { Permission: "DELETE", @@ -136,7 +136,7 @@ func TestCalculatePermissions(t *testing.T) { mkDatabaseObject("bar", map[string]string{"kind": "schema"}), }, }, - summary: "\"SELECT\": 1 objects (table:1)", + summary: "SELECT: 1 objects (table:1)", details: []events.DatabasePermissionEntry{ { Permission: "SELECT", diff --git a/lib/srv/db/postgres/sql/remove-permissions.sql b/lib/srv/db/postgres/sql/remove-permissions.sql index 9033839f8e7b9..08bdff88bda12 100644 --- a/lib/srv/db/postgres/sql/remove-permissions.sql +++ b/lib/srv/db/postgres/sql/remove-permissions.sql @@ -8,11 +8,58 @@ BEGIN IF EXISTS (SELECT usename FROM pg_stat_activity WHERE usename = username AND datname = current_database()) THEN RAISE NOTICE 'User has active connections to current database'; ELSE - -- Loop through table permissions - FOR row_data IN (SELECT DISTINCT table_schema, table_name FROM information_schema.table_privileges WHERE grantee = username) + CREATE TEMPORARY TABLE cur_perms ON COMMIT DROP AS ( + SELECT DISTINCT + pg_namespace.nspname::information_schema.sql_identifier AS table_schema, + obj.relname::information_schema.sql_identifier AS table_name + FROM + pg_class as obj + INNER JOIN + pg_namespace ON obj.relnamespace = pg_namespace.oid + INNER JOIN LATERAL + aclexplode(COALESCE(obj.relacl, acldefault('r'::"char", obj.relowner))) AS acl ON true + INNER JOIN + pg_roles AS grantee ON acl.grantee = grantee.oid + WHERE + -- only objects that are one of r=ordinary table, v=view, f=foreign table, p=partitioned table. + (obj.relkind = ANY (ARRAY['r', 'v', 'f', 'p'])) + -- only privileges we support provisioning. + AND (acl.privilege_type = ANY (ARRAY['DELETE'::text, 'INSERT'::text, 'REFERENCES'::text, 'SELECT'::text, 'TRUNCATE'::text, 'TRIGGER'::text, 'UPDATE'::text])) + -- only the user in we are revoking permissions from. + AND grantee.rolname = username + ); + + -- Loop through and revoke all table permissions + FOR row_data IN ( + SELECT + table_schema, + table_name + FROM + cur_perms + ) LOOP - EXECUTE 'REVOKE ALL PRIVILEGES ON ' || quote_ident(row_data.table_schema) || '.' || quote_ident(row_data.table_name) || ' FROM ' || quote_ident(username); + EXECUTE format('REVOKE ALL PRIVILEGES ON %I.%I FROM %I', + row_data.table_schema, + row_data.table_name, + username + ); + END LOOP; + + -- We implicitly grant USAGE on any schema for which we make table privilege grants. + -- Loop through and revoke all schema USAGE. + -- See: https://github.com/gravitational/teleport/issues/51851 + FOR row_data IN ( + SELECT DISTINCT + table_schema + FROM + cur_perms + ) + LOOP + EXECUTE format('REVOKE USAGE ON SCHEMA %I FROM %I', + row_data.table_schema, + username + ); END LOOP; END IF; END; -$$; \ No newline at end of file +$$; diff --git a/lib/srv/db/postgres/sql/update-permissions.sql b/lib/srv/db/postgres/sql/update-permissions.sql index 16a564146d13d..c9ac35ddfde4a 100644 --- a/lib/srv/db/postgres/sql/update-permissions.sql +++ b/lib/srv/db/postgres/sql/update-permissions.sql @@ -3,34 +3,93 @@ LANGUAGE plpgsql AS $$ DECLARE grant_data JSONB; - grant_item JSONB; - diff_count_1 INTEGER; - diff_count_2 INTEGER; + row_data RECORD; + diff_removed INTEGER; + diff_added INTEGER; BEGIN grant_data = COALESCE(NULLIF(permissions_->'tables', 'null'), '[]'::JSONB); + CREATE TEMPORARY TABLE new_perms ON COMMIT DROP AS ( + SELECT + item->>'schema' as table_schema, + item->>'table' as table_name, + item->>'privilege' as privilege_type + FROM + jsonb_array_elements(grant_data) as item + ); -- If the user has active connections to current database, verify that permissions haven't changed. IF EXISTS (SELECT usename FROM pg_stat_activity WHERE usename = username AND datname = current_database()) THEN - CREATE TEMPORARY TABLE cur_perms AS SELECT table_schema, table_name, privilege_type FROM information_schema.table_privileges WHERE grantee = username; - CREATE TEMPORARY TABLE new_perms AS SELECT item->>'schema' as table_schema, item->>'table' as table_name, item->>'privilege' as privilege_type FROM jsonb_array_elements(grant_data) as item; + WITH + cur_perms AS ( + SELECT DISTINCT + pg_namespace.nspname::information_schema.sql_identifier AS table_schema, + obj.relname::information_schema.sql_identifier AS table_name, + acl.privilege_type::information_schema.character_data AS privilege_type + FROM + pg_class as obj + INNER JOIN + pg_namespace ON obj.relnamespace = pg_namespace.oid + INNER JOIN LATERAL + aclexplode(COALESCE(obj.relacl, acldefault('r'::"char", obj.relowner))) AS acl ON true + INNER JOIN + pg_roles AS grantee ON acl.grantee = grantee.oid + WHERE + -- only objects that are one of r=ordinary table, v=view, f=foreign table, p=partitioned table. + (obj.relkind = ANY (ARRAY['r', 'v', 'f', 'p'])) + -- only privileges we support provisioning. + AND (acl.privilege_type = ANY (ARRAY['DELETE'::text, 'INSERT'::text, 'REFERENCES'::text, 'SELECT'::text, 'TRUNCATE'::text, 'TRIGGER'::text, 'UPDATE'::text])) + -- only the user we are checking permissions for. + AND grantee.rolname = username + ), + removed AS ( + SELECT * FROM cur_perms + EXCEPT + SELECT * FROM new_perms + ), + added AS ( + SELECT * FROM new_perms + EXCEPT + SELECT * FROM cur_perms + ) + SELECT + (SELECT COUNT(*) FROM removed), + (SELECT COUNT(*) FROM added) + INTO + diff_removed, + diff_added; - SELECT COUNT(*) INTO diff_count_1 FROM (SELECT * FROM cur_perms EXCEPT SELECT * FROM new_perms) q1; - SELECT COUNT(*) INTO diff_count_2 FROM (SELECT * FROM new_perms EXCEPT SELECT * FROM cur_perms) q2; - - IF (diff_count_1 > 0) OR (diff_count_2 > 0) THEN - RAISE WARNING 'Permission changes: removed=%, added=%', diff_count_1, diff_count_2; + IF (diff_removed > 0) OR (diff_added > 0) THEN + RAISE WARNING 'Permission changes: removed=%, added=%', diff_removed, diff_added; RAISE EXCEPTION SQLSTATE 'TP005' USING MESSAGE = 'TP005: User has active connections and permissions have changed'; END IF; ELSE CALL pg_temp.teleport_remove_permissions(username); - -- Assign all roles to the created/activated user if grants are provided. - IF grant_data <> 'null'::JSONB THEN - FOR grant_item IN SELECT * FROM jsonb_array_elements(grant_data) - LOOP - EXECUTE 'GRANT ' || text(grant_item->>'privilege') || ' ON TABLE ' || QUOTE_IDENT(grant_item->>'schema') || '.' || QUOTE_IDENT(grant_item->>'table') || ' TO ' || QUOTE_IDENT(username); - END LOOP; - END IF; + -- Assign all privileges to the created/activated user if grants are provided. + FOR row_data IN SELECT * FROM new_perms + LOOP + EXECUTE format('GRANT %s ON %I.%I TO %I', + row_data.privilege_type, + row_data.table_schema, + row_data.table_name, + username + ); + END LOOP; + + -- Grant USAGE on any schemas for which we made table privilege grants + -- See: https://github.com/gravitational/teleport/issues/51851 + FOR row_data IN ( + SELECT DISTINCT + table_schema + FROM + new_perms + ) + LOOP + EXECUTE format('GRANT USAGE ON SCHEMA %I TO %I', + row_data.table_schema, + username + ); + END LOOP; END IF; END; -$$; \ No newline at end of file +$$; diff --git a/lib/srv/db/postgres/users.go b/lib/srv/db/postgres/users.go index 1e49614bb7224..af16d09c8b248 100644 --- a/lib/srv/db/postgres/users.go +++ b/lib/srv/db/postgres/users.go @@ -126,13 +126,13 @@ type Permissions struct { } var pgTablePerms = map[string]struct{}{ - "SELECT": {}, - "INSERT": {}, - "UPDATE": {}, "DELETE": {}, - "TRUNCATE": {}, + "INSERT": {}, "REFERENCES": {}, + "SELECT": {}, "TRIGGER": {}, + "TRUNCATE": {}, + "UPDATE": {}, } func checkPgPermission(objKind, perm string) error { @@ -240,20 +240,32 @@ func (e *Engine) applyPermissions(ctx context.Context, sessionCtx *common.Sessio // teleport_remove_permissions and teleport_update_permissions are created in pg_temp table of the session database. // teleport_remove_permissions gets called by teleport_update_permissions as needed. - if err := e.createProcedures(ctx, sessionCtx, conn, []string{removePermissionsProcName, updatePermissionsProcName}); err != nil { + logger := e.Log.With("user", sessionCtx.DatabaseUser) + err = withRetry(ctx, logger, func() error { + err := e.createProcedures(ctx, sessionCtx, conn, []string{removePermissionsProcName, updatePermissionsProcName}) + return trace.Wrap(err) + }) + if err != nil { return trace.Wrap(err) } - if err := e.callProcedure(ctx, sessionCtx, conn, updatePermissionsProcName, sessionCtx.DatabaseUser, perms); err != nil { + err = withRetry(ctx, logger, func() error { + err := e.callProcedure(ctx, sessionCtx, conn, updatePermissionsProcName, sessionCtx.DatabaseUser, perms) + return trace.Wrap(err) + }) + if err != nil { var pgErr *pq.Error if errors.As(err, &pgErr) { if pgErr.Code == common.SQLStatePermissionsChanged { - e.Log.ErrorContext(ctx, "Permissions have changed, rejecting connection.", "user", sessionCtx.DatabaseUser, "error", err) + logger.ErrorContext(ctx, "User permissions have changed, rejecting connection", + "error", err, + ) } } else { - e.Log.ErrorContext(ctx, "Failed to update permissions.", "user", sessionCtx.DatabaseUser, "error", err) + logger.ErrorContext(ctx, "Failed to update user permissions", + "error", err, + ) } - return trace.Wrap(err) } return nil } @@ -273,13 +285,22 @@ func (e *Engine) removePermissions(ctx context.Context, sessionCtx *common.Sessi defer conn.Close(ctx) // teleport_remove_permissions is created in pg_temp table of the session database. - if err := e.createProcedures(ctx, sessionCtx, conn, []string{removePermissionsProcName}); err != nil { + err = withRetry(ctx, logger, func() error { + err := e.createProcedures(ctx, sessionCtx, conn, []string{removePermissionsProcName}) + return trace.Wrap(err) + }) + if err != nil { return trace.Wrap(err) } - if err := e.callProcedure(ctx, sessionCtx, conn, removePermissionsProcName, sessionCtx.DatabaseUser); err != nil { - logger.ErrorContext(ctx, "Removing permissions from user failed.", "error", err) + err = withRetry(ctx, logger, func() error { + err := e.callProcedure(ctx, sessionCtx, conn, removePermissionsProcName, sessionCtx.DatabaseUser) return trace.Wrap(err) + }) + if err != nil { + logger.ErrorContext(ctx, "Removing permissions from user failed", + "error", err, + ) } return nil } @@ -336,12 +357,15 @@ func (e *Engine) DeleteUser(ctx context.Context, sessionCtx *common.Session) err } defer conn.Close(ctx) - if err := e.createProcedures(ctx, sessionCtx, conn, []string{deleteProcName, deactivateProcName}); err != nil { - return trace.Wrap(err) - } - logger := e.Log.With("user", sessionCtx.DatabaseUser) logger.InfoContext(ctx, "Deleting PostgreSQL user.") + err = withRetry(ctx, logger, func() error { + err := e.createProcedures(ctx, sessionCtx, conn, []string{deleteProcName, deactivateProcName}) + return trace.Wrap(err) + }) + if err != nil { + return trace.Wrap(err) + } var state string err = withRetry(ctx, logger, func() error { @@ -679,13 +703,29 @@ func isRetryable(err error) bool { case pgerrcode.DeadlockDetected, pgerrcode.SerializationFailure, pgerrcode.UniqueViolation, pgerrcode.ExclusionViolation: return true + case pgerrcode.InternalError: + if isInternalErrorRetryable(pgErr) { + return true + } } } + return pgconn.SafeToRetry(err) +} + +// isInternalErrorRetryable returns true if an internal error (code XX000) +// should be retried. +func isInternalErrorRetryable(err error) bool { + errMsg := err.Error() // Redshift reports this with a vague SQLSTATE XX000, which is the internal // error code, but this is a serialization error that rolls back the // transaction, so it should be retried. - if strings.Contains(err.Error(), "conflict with concurrent transaction") { + if strings.Contains(errMsg, "conflict with concurrent transaction") { return true } - return pgconn.SafeToRetry(err) + // Postgres this can happen if transaction A tries to revoke or grant privileges + // concurrent with transaction B. + if strings.Contains(errMsg, "tuple concurrently updated") { + return true + } + return false }