From 5a48b157f9348dcd56c5697246487a079dc4597e Mon Sep 17 00:00:00 2001 From: Robin Appelman Date: Thu, 8 Aug 2024 16:16:10 +0200 Subject: [PATCH] feat: implement sharded auto-increment for primary keys and encode the initial shard in the primary key Signed-off-by: Robin Appelman --- lib/private/DB/Connection.php | 4 +++ .../Partitioned/PartitionedQueryBuilder.php | 17 ++++++----- .../Sharded/AutoIncrementHandler.php | 14 +++++++-- .../QueryBuilder/Sharded/ShardDefinition.php | 12 ++++++++ .../Sharded/ShardedQueryBuilder.php | 29 +++++-------------- .../PartitionedQueryBuilderTest.php | 5 +++- .../Sharded/SharedQueryBuilderTest.php | 6 ++-- 7 files changed, 53 insertions(+), 34 deletions(-) diff --git a/lib/private/DB/Connection.php b/lib/private/DB/Connection.php index b2f7cdb48476c..48842ce3f5cef 100644 --- a/lib/private/DB/Connection.php +++ b/lib/private/DB/Connection.php @@ -26,6 +26,7 @@ use OC\DB\QueryBuilder\Partitioned\PartitionedQueryBuilder; use OC\DB\QueryBuilder\Partitioned\PartitionSplit; use OC\DB\QueryBuilder\QueryBuilder; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; use OC\DB\QueryBuilder\Sharded\CrossShardMoveHelper; use OC\DB\QueryBuilder\Sharded\RoundRobinShardMapper; use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; @@ -87,6 +88,7 @@ class Connection extends PrimaryReadReplicaConnection { /** @var ShardDefinition[] */ protected array $shards = []; protected ShardConnectionManager $shardConnectionManager; + protected AutoIncrementHandler $autoIncrementHandler; const SHARD_PRESETS = [ 'filecache' => [ @@ -128,6 +130,7 @@ public function __construct( $this->tablePrefix = $params['tablePrefix']; $this->shardConnectionManager = $this->params['shard_connection_manager'] ?? Server::get(ShardConnectionManager::class); + $this->autoIncrementHandler = Server::get(AutoIncrementHandler::class); $this->systemConfig = \OC::$server->getSystemConfig(); $this->clock = Server::get(ClockInterface::class); $this->logger = Server::get(LoggerInterface::class); @@ -248,6 +251,7 @@ public function getQueryBuilder(): IQueryBuilder { $builder, $this->shards, $this->shardConnectionManager, + $this->autoIncrementHandler, ); foreach ($this->partitions as $name => $tables) { $partition = new PartitionSplit($name, $tables); diff --git a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php index d3f1537578d50..d7d8767606d3b 100644 --- a/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Partitioned/PartitionedQueryBuilder.php @@ -25,6 +25,7 @@ use OC\DB\QueryBuilder\CompositeExpression; use OC\DB\QueryBuilder\QuoteHelper; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OC\DB\QueryBuilder\Sharded\ShardedQueryBuilder; use OCP\DB\IResult; @@ -46,7 +47,7 @@ * * For example: * ``` - * $query->select("mount_point", "mimetype") + * $query->select("mount_point", "mimetype") * ->from("mounts", "m") * ->innerJoin("m", "filecache", "f", $query->expr()->eq("root_id", "fileid")); * ``` @@ -114,11 +115,12 @@ class PartitionedQueryBuilder extends ShardedQueryBuilder { private QuoteHelper $quoteHelper; public function __construct( - IQueryBuilder $builder, - private array $shardDefinitions, - private ShardConnectionManager $shardConnectionManager, + IQueryBuilder $builder, + array $shardDefinitions, + ShardConnectionManager $shardConnectionManager, + AutoIncrementHandler $autoIncrementHandler, ) { - parent::__construct($builder, $this->shardDefinitions, $this->shardConnectionManager); + parent::__construct($builder, $shardDefinitions, $shardConnectionManager, $autoIncrementHandler); $this->quoteHelper = new QuoteHelper(); } @@ -132,6 +134,7 @@ private function newQuery(): IQueryBuilder { $builder, $this->shardDefinitions, $this->shardConnectionManager, + $this->autoIncrementHandler, ); } @@ -177,8 +180,8 @@ private function applySelects(): void { foreach ($this->selects as $select) { foreach ($this->partitions as $partition) { if (is_string($select['select']) && ( - $select['select'] === '*' || - $partition->isColumnInPartition($select['select'])) + $select['select'] === '*' || + $partition->isColumnInPartition($select['select'])) ) { if (isset($this->splitQueries[$partition->name])) { if ($select['alias']) { diff --git a/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php index dd634fd776664..9f61ddcdd6eee 100644 --- a/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php +++ b/lib/private/DB/QueryBuilder/Sharded/AutoIncrementHandler.php @@ -25,11 +25,15 @@ public function __construct( ICacheFactory $cacheFactory, private ShardConnectionManager $shardConnectionManager, ) { + if (PHP_INT_SIZE < 8) { + throw new \Exception("sharding is only supported with 64bit php"); + } + $cache = $cacheFactory->createDistributed("shared_autoincrement"); if ($cache instanceof IMemcache) { $this->cache = $cache; } else { - throw new \Exception('Distributed cache ' . get_class($cache) . ' does not suitable'); + throw new \Exception('Distributed cache ' . get_class($cache) . ' is not suitable'); } } @@ -38,6 +42,9 @@ public function getNextPrimaryKey(ShardDefinition $shardDefinition): int { while ($retries < 5) { $next = $this->getNextPrimaryKeyInner($shardDefinition); if ($next !== null) { + if ($next > ShardDefinition::MAX_PRIMARY_KEY) { + throw new \Exception("Max primary key of " . ShardDefinition::MAX_PRIMARY_KEY . " exceeded"); + } return $next; } else { $retries++; @@ -50,7 +57,7 @@ public function getNextPrimaryKey(ShardDefinition $shardDefinition): int { * @param ShardDefinition $shardDefinition * @return int|null either the next primary key or null if the call needs to be retried */ - private function getNextPrimaryKeyInner(ShardDefinition $shardDefinition): int|null { + private function getNextPrimaryKeyInner(ShardDefinition $shardDefinition): ?int { // because this function will likely be called concurrently from different requests // the implementation needs to ensure that the cached value can be cleared, invalidated or re-calculated at any point between our cache calls // care must be taken that the logic remains fully resilient against race conditions @@ -75,7 +82,8 @@ private function getNextPrimaryKeyInner(ShardDefinition $shardDefinition): int|n } } - $current = $this->getMaxFromDb($shardDefinition); + // discard the encoded initial shard + $current = $this->getMaxFromDb($shardDefinition) & ShardDefinition::PRIMARY_KEY_MASK; $next = max($current, self::MIN_VALID_KEY) + 1; if ($this->cache->cas($shardDefinition->table, "empty-placeholder", $next)) { return $next; diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php index 60b00db8d2ef6..432f4de01cd5c 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardDefinition.php @@ -14,6 +14,15 @@ * Keeps the configuration for a shard setup */ class ShardDefinition { + // we reserve the top byte of the primary key for the initial shard + // but since php doesn't have unsigned integers, we loose the top bit to the sign + // so we can only encode 127 shards in the top byte + public const MAX_SHARDS = 127; + + const PRIMARY_KEY_MASK = 0x00_FF_FF_FF_FF_FF_FF_FF; + const PRIMARY_KEY_SHARD_MASK = 0x7F_00_00_00_00_00_00_00; + const MAX_PRIMARY_KEY = PHP_INT_MAX & self::PRIMARY_KEY_MASK; + /** * @param string $table * @param string $primaryKey @@ -32,6 +41,9 @@ public function __construct( public array $companionTables = [], public array $shards = [], ) { + if (count($this->shards) >= self::MAX_SHARDS) { + throw new \Exception("Only allowed maximum of " . self::MAX_SHARDS . " shards allowed"); + } } public function hasTable(string $table): bool { diff --git a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php index 2e4566712a20d..7caf6c84b1c57 100644 --- a/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php +++ b/lib/private/DB/QueryBuilder/Sharded/ShardedQueryBuilder.php @@ -28,8 +28,9 @@ class ShardedQueryBuilder extends ExtendedQueryBuilder { public function __construct( IQueryBuilder $builder, - private array $shardDefinitions, - private ShardConnectionManager $shardConnectionManager, + protected array $shardDefinitions, + protected ShardConnectionManager $shardConnectionManager, + protected AutoIncrementHandler $autoIncrementHandler, ) { parent::__construct($builder); } @@ -309,25 +310,11 @@ public function executeStatement(?IDBConnection $connection = null): int { foreach ($shards as $shard) { $shardConnection = $this->shardConnectionManager->getConnection($this->shardDefinition, $shard); if (!$this->primaryKeys && $this->shardDefinition->table === $this->insertTable) { - // todo: is random primary key fine, or do we need to do shared-autoincrement - /** - * atomic autoincrement: - * - * $next = $cache->inc('..'); - * if (!$next) { - * $last = $this->getMaxValue(); - * $success = $cache->add('..', $last + 1); - * if ($success) { - * return $last + 1; - * } else { - * / somebody else set it - * return $cache->inc('..'); - * } - * } else { - * return $next - * } - */ - $id = random_int(0, PHP_INT_MAX); + $rawId = $this->autoIncrementHandler->getNextPrimaryKey($this->shardDefinition); + + // we encode the shard the primary key was originally inserted into to allow guessing the shard by primary key later on + $encodedShard = $shard << 56; + $id = $rawId | $encodedShard; parent::setValue($this->shardDefinition->primaryKey, $this->createParameter('__generated_primary_key')); $this->setParameter('__generated_primary_key', $id, self::PARAM_INT); $this->lastInsertId = $id; diff --git a/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php b/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php index 06128c68798f7..a320218b7e535 100644 --- a/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php +++ b/tests/lib/DB/QueryBuilder/Partitioned/PartitionedQueryBuilderTest.php @@ -10,6 +10,7 @@ use OC\DB\QueryBuilder\Partitioned\PartitionSplit; use OC\DB\QueryBuilder\Partitioned\PartitionedQueryBuilder; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OCP\DB\QueryBuilder\IQueryBuilder; use OCP\IDBConnection; @@ -22,10 +23,12 @@ class PartitionedQueryBuilderTest extends TestCase { private IDBConnection $connection; private ShardConnectionManager $shardConnectionManager; + private AutoIncrementHandler $autoIncrementHandler; protected function setUp(): void { $this->connection = Server::get(IDBConnection::class); $this->shardConnectionManager = Server::get(ShardConnectionManager::class); + $this->autoIncrementHandler = Server::get(AutoIncrementHandler::class); $this->setupFileCache(); } @@ -41,7 +44,7 @@ private function getQueryBuilder(): PartitionedQueryBuilder { if ($builder instanceof PartitionedQueryBuilder) { return $builder; } else { - return new PartitionedQueryBuilder($builder, [], $this->shardConnectionManager); + return new PartitionedQueryBuilder($builder, [], $this->shardConnectionManager, $this->autoIncrementHandler); } } diff --git a/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php b/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php index 632481232a7e8..83261729e5e56 100644 --- a/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php +++ b/tests/lib/DB/QueryBuilder/Sharded/SharedQueryBuilderTest.php @@ -8,16 +8,15 @@ namespace Test\DB\QueryBuilder\Sharded; +use OC\DB\QueryBuilder\Sharded\AutoIncrementHandler; use OC\DB\QueryBuilder\Sharded\InvalidShardedQueryException; use OC\DB\QueryBuilder\Sharded\RoundRobinShardMapper; use OC\DB\QueryBuilder\Sharded\ShardConnectionManager; use OC\DB\QueryBuilder\Sharded\ShardDefinition; use OC\DB\QueryBuilder\Sharded\ShardedQueryBuilder; -use OC\SystemConfig; use OCP\DB\QueryBuilder\IQueryBuilder; use OCP\IDBConnection; use OCP\Server; -use Psr\Log\LoggerInterface; use Test\TestCase; /** @@ -25,9 +24,11 @@ */ class SharedQueryBuilderTest extends TestCase { private IDBConnection $connection; + private AutoIncrementHandler $autoIncrementHandler; protected function setUp(): void { $this->connection = Server::get(IDBConnection::class); + $this->autoIncrementHandler = Server::get(AutoIncrementHandler::class); } @@ -38,6 +39,7 @@ private function getQueryBuilder(string $table, string $shardColumn, string $pri new ShardDefinition($table, $primaryColumn, [], $shardColumn, new RoundRobinShardMapper(), $companionTables, []), ], $this->createMock(ShardConnectionManager::class), + $this->autoIncrementHandler, ); }