Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include ON DELETE CASCADE associations in the delete order computation #10913

Merged
merged 6 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions lib/Doctrine/ORM/Internal/StronglyConnectedComponents.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
<?php

declare(strict_types=1);

namespace Doctrine\ORM\Internal;

use InvalidArgumentException;

use function array_keys;
use function array_pop;
use function array_push;
use function min;
use function spl_object_id;

/**
* StronglyConnectedComponents implements Tarjan's algorithm to find strongly connected
* components (SCC) in a directed graph. This algorithm has a linear running time based on
* nodes (V) and edges between the nodes (E), resulting in a computational complexity
* of O(V + E).
*
* See https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
* for an explanation and the meaning of the DFS and lowlink numbers.
*
* @internal
*/
final class StronglyConnectedComponents
{
private const NOT_VISITED = 1;
private const IN_PROGRESS = 2;
private const VISITED = 3;

/**
* Array of all nodes, indexed by object ids.
*
* @var array<int, object>
*/
private $nodes = [];

/**
* DFS state for the different nodes, indexed by node object id and using one of
* this class' constants as value.
*
* @var array<int, self::*>
*/
private $states = [];

/**
* Edges between the nodes. The first-level key is the object id of the outgoing
* node; the second array maps the destination node by object id as key.
*
* @var array<int, array<int, bool>>
*/
private $edges = [];

/**
* DFS numbers, by object ID
*
* @var array<int, int>
*/
private $dfs = [];

/**
* lowlink numbers, by object ID
*
* @var array<int, int>
*/
private $lowlink = [];

/** @var int */
private $maxdfs = 0;

/**
* Nodes representing the SCC another node is in, indexed by lookup-node object ID
*
* @var array<int, object>
*/
private $representingNodes = [];

/** @var array<int> */
mpdude marked this conversation as resolved.
Show resolved Hide resolved
private $stack = [];

/** @param object $node */
public function addNode($node): void
{
$id = spl_object_id($node);
$this->nodes[$id] = $node;
$this->states[$id] = self::NOT_VISITED;
$this->edges[$id] = [];
}

/** @param object $node */
public function hasNode($node): bool
{
return isset($this->nodes[spl_object_id($node)]);
}

/**
* Adds a new edge between two nodes to the graph
*
* @param object $from
* @param object $to
*/
public function addEdge($from, $to): void
{
$fromId = spl_object_id($from);
$toId = spl_object_id($to);

$this->edges[$fromId][$toId] = true;
}

public function findStronglyConnectedComponents(): void
{
foreach (array_keys($this->nodes) as $oid) {
if ($this->states[$oid] === self::NOT_VISITED) {
$this->tarjan($oid);
}
}
}

private function tarjan(int $oid): void
{
$this->dfs[$oid] = $this->lowlink[$oid] = $this->maxdfs++;
$this->states[$oid] = self::IN_PROGRESS;
array_push($this->stack, $oid);

foreach ($this->edges[$oid] as $adjacentId => $ignored) {
if ($this->states[$adjacentId] === self::NOT_VISITED) {
$this->tarjan($adjacentId);
$this->lowlink[$oid] = min($this->lowlink[$oid], $this->lowlink[$adjacentId]);
} elseif ($this->states[$adjacentId] === self::IN_PROGRESS) {
$this->lowlink[$oid] = min($this->lowlink[$oid], $this->dfs[$adjacentId]);
}
}

$lowlink = $this->lowlink[$oid];
if ($lowlink === $this->dfs[$oid]) {
$representingNode = null;
do {
$unwindOid = array_pop($this->stack);

if (! $representingNode) {
$representingNode = $this->nodes[$unwindOid];
}

$this->representingNodes[$unwindOid] = $representingNode;
$this->states[$unwindOid] = self::VISITED;
} while ($unwindOid !== $oid);
}
}

/**
* @param object $node
*
* @return object
*/
public function getNodeRepresentingStronglyConnectedComponent($node)
{
$oid = spl_object_id($node);

if (! isset($this->representingNodes[$oid])) {
throw new InvalidArgumentException('unknown node');

Check warning on line 161 in lib/Doctrine/ORM/Internal/StronglyConnectedComponents.php

View check run for this annotation

Codecov / codecov/patch

lib/Doctrine/ORM/Internal/StronglyConnectedComponents.php#L161

Added line #L161 was not covered by tests
}

return $this->representingNodes[$oid];
}
}
82 changes: 72 additions & 10 deletions lib/Doctrine/ORM/UnitOfWork.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
use Doctrine\ORM\Exception\UnexpectedAssociationValue;
use Doctrine\ORM\Id\AssignedGenerator;
use Doctrine\ORM\Internal\HydrationCompleteHandler;
use Doctrine\ORM\Internal\StronglyConnectedComponents;
use Doctrine\ORM\Internal\TopologicalSort;
use Doctrine\ORM\Mapping\ClassMetadata;
use Doctrine\ORM\Mapping\MappingException;
Expand Down Expand Up @@ -1391,14 +1392,19 @@ private function computeInsertExecutionOrder(): array
/** @return list<object> */
private function computeDeleteExecutionOrder(): array
{
$sort = new TopologicalSort();
$stronglyConnectedComponents = new StronglyConnectedComponents();
$sort = new TopologicalSort();

// First make sure we have all the nodes
foreach ($this->entityDeletions as $entity) {
$stronglyConnectedComponents->addNode($entity);
$sort->addNode($entity);
}

// Now add edges
// First, consider only "on delete cascade" associations between entities
// and find strongly connected groups. Once we delete any one of the entities
// in such a group, _all_ of the other entities will be removed as well. So,
// we need to treat those groups like a single entity when performing delete
// order topological sorting.
foreach ($this->entityDeletions as $entity) {
$class = $this->em->getClassMetadata(get_class($entity));

Expand All @@ -1410,16 +1416,65 @@ private function computeDeleteExecutionOrder(): array
continue;
}

// For associations that implement a database-level cascade/set null operation,
assert(isset($assoc['joinColumns']));
$joinColumns = reset($assoc['joinColumns']);
if (! isset($joinColumns['onDelete'])) {
continue;
}

$onDeleteOption = strtolower($joinColumns['onDelete']);
if ($onDeleteOption !== 'cascade') {
continue;
}

$targetEntity = $class->getFieldValue($entity, $assoc['fieldName']);

// If the association does not refer to another entity or that entity
// is not to be deleted, there is no ordering problem and we can
// skip this particular association.
if ($targetEntity === null || ! $stronglyConnectedComponents->hasNode($targetEntity)) {
continue;
}

$stronglyConnectedComponents->addEdge($entity, $targetEntity);
}
}

$stronglyConnectedComponents->findStronglyConnectedComponents();

// Now do the actual topological sorting to find the delete order.
foreach ($this->entityDeletions as $entity) {
$class = $this->em->getClassMetadata(get_class($entity));

// Get the entities representing the SCC
$entityComponent = $stronglyConnectedComponents->getNodeRepresentingStronglyConnectedComponent($entity);

// When $entity is part of a non-trivial strongly connected component group
// (a group containing not only those entities alone), make sure we process it _after_ the
// entity representing the group.
// The dependency direction implies that "$entity depends on $entityComponent
// being deleted first". The topological sort will output the depended-upon nodes first.
if ($entityComponent !== $entity) {
$sort->addEdge($entity, $entityComponent, false);
}

foreach ($class->associationMappings as $assoc) {
// We only need to consider the owning sides of to-one associations,
// since many-to-many associations can always be (and have already been)
// deleted in a preceding step.
if (! ($assoc['isOwningSide'] && $assoc['type'] & ClassMetadata::TO_ONE)) {
continue;
}

// For associations that implement a database-level set null operation,
// we do not have to follow a particular order: If the referred-to entity is
// deleted first, the DBMS will either delete the current $entity right away
// (CASCADE) or temporarily set the foreign key to NULL (SET NULL).
// Either way, we can skip it in the computation.
// deleted first, the DBMS will temporarily set the foreign key to NULL (SET NULL).
// So, we can skip it in the computation.
assert(isset($assoc['joinColumns']));
$joinColumns = reset($assoc['joinColumns']);
if (isset($joinColumns['onDelete'])) {
$onDeleteOption = strtolower($joinColumns['onDelete']);
if ($onDeleteOption === 'cascade' || $onDeleteOption === 'set null') {
if ($onDeleteOption === 'set null') {
continue;
}
}
Expand All @@ -1433,10 +1488,17 @@ private function computeDeleteExecutionOrder(): array
continue;
}

// Add dependency. The dependency direction implies that "$targetEntity depends on $entity
// Get the entities representing the SCC
$targetEntityComponent = $stronglyConnectedComponents->getNodeRepresentingStronglyConnectedComponent($targetEntity);

// When we have a dependency between two different groups of strongly connected nodes,
// add it to the computation.
// The dependency direction implies that "$targetEntityComponent depends on $entityComponent
// being deleted first". The topological sort will output the depended-upon nodes first,
// so we can work through the result in the returned order.
$sort->addEdge($targetEntity, $entity, false);
if ($targetEntityComponent !== $entityComponent) {
$sort->addEdge($targetEntityComponent, $entityComponent, false);
}
}
}

Expand Down
Loading