Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] PrimaryShardAllocator refactor to abstract out shard state and method calls #12762

Merged
merged 1 commit into from
Mar 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,27 +81,28 @@ public abstract class PrimaryShardAllocator extends BaseGatewayShardAllocator {
/**
* Is the allocator responsible for allocating the given {@link ShardRouting}?
*/
private static boolean isResponsibleFor(final ShardRouting shard) {
protected static boolean isResponsibleFor(final ShardRouting shard) {
return shard.primary() // must be primary
&& shard.unassigned() // must be unassigned
// only handle either an existing store or a snapshot recovery
&& (shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE
|| shard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT);
}

@Override
public AllocateUnassignedDecision makeAllocationDecision(
final ShardRouting unassignedShard,
final RoutingAllocation allocation,
final Logger logger
) {
/**
* Skip doing fetchData call for a shard if recovery mode is snapshot. Also do not take decision if allocator is
* not responsible for this particular shard.
*
* @param unassignedShard unassigned shard routing
* @param allocation routing allocation object
* @return allocation decision taken for this shard
*/
protected AllocateUnassignedDecision getInEligibleShardDecision(ShardRouting unassignedShard, RoutingAllocation allocation) {
if (isResponsibleFor(unassignedShard) == false) {
// this allocator is not responsible for allocating this shard
return AllocateUnassignedDecision.NOT_TAKEN;
}

final boolean explain = allocation.debugDecision();

if (unassignedShard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT
&& allocation.snapshotShardSizeInfo().getShardSize(unassignedShard) == null) {
List<NodeAllocationResult> nodeDecisions = null;
Expand All @@ -110,17 +111,52 @@ public AllocateUnassignedDecision makeAllocationDecision(
}
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
}
return null;
}

@Override
public AllocateUnassignedDecision makeAllocationDecision(
final ShardRouting unassignedShard,
final RoutingAllocation allocation,
final Logger logger
) {
AllocateUnassignedDecision decision = getInEligibleShardDecision(unassignedShard, allocation);
if (decision != null) {
return decision;
}
final FetchResult<NodeGatewayStartedShards> shardState = fetchData(unassignedShard, allocation);
if (shardState.hasData() == false) {
List<NodeGatewayStartedShards> nodeShardStates = adaptToNodeStartedShardList(shardState);
return getAllocationDecision(unassignedShard, allocation, nodeShardStates, logger);
}

/**
* Transforms {@link FetchResult} of {@link NodeGatewayStartedShards} to {@link List} of {@link NodeGatewayStartedShards}
* Returns null if {@link FetchResult} does not have any data.
*/
private static List<NodeGatewayStartedShards> adaptToNodeStartedShardList(FetchResult<NodeGatewayStartedShards> shardsState) {
if (!shardsState.hasData()) {
return null;
}
List<NodeGatewayStartedShards> nodeShardStates = new ArrayList<>();
shardsState.getData().forEach((node, nodeGatewayStartedShard) -> { nodeShardStates.add(nodeGatewayStartedShard); });
return nodeShardStates;
}

protected AllocateUnassignedDecision getAllocationDecision(
ShardRouting unassignedShard,
RoutingAllocation allocation,
List<NodeGatewayStartedShards> shardState,
Logger logger
) {
final boolean explain = allocation.debugDecision();
if (shardState == null) {
allocation.setHasPendingAsyncFetch();
List<NodeAllocationResult> nodeDecisions = null;
if (explain) {
nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
}
return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
}

// don't create a new IndexSetting object for every shard as this could cause a lot of garbage
// on cluster restart if we allocate a boat load of shards
final IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(unassignedShard.index());
Expand Down Expand Up @@ -260,11 +296,11 @@ public AllocateUnassignedDecision makeAllocationDecision(
*/
private static List<NodeAllocationResult> buildNodeDecisions(
NodesToAllocate nodesToAllocate,
FetchResult<NodeGatewayStartedShards> fetchedShardData,
List<NodeGatewayStartedShards> fetchedShardData,
Set<String> inSyncAllocationIds
) {
List<NodeAllocationResult> nodeResults = new ArrayList<>();
Collection<NodeGatewayStartedShards> ineligibleShards;
Collection<NodeGatewayStartedShards> ineligibleShards = new ArrayList<>();
if (nodesToAllocate != null) {
final Set<DiscoveryNode> discoNodes = new HashSet<>();
nodeResults.addAll(
Expand All @@ -280,15 +316,13 @@ private static List<NodeAllocationResult> buildNodeDecisions(
})
.collect(Collectors.toList())
);
ineligibleShards = fetchedShardData.getData()
.values()
.stream()
ineligibleShards = fetchedShardData.stream()
.filter(shardData -> discoNodes.contains(shardData.getNode()) == false)
.collect(Collectors.toList());
} else {
// there were no shard copies that were eligible for being assigned the allocation,
// so all fetched shard data are ineligible shards
ineligibleShards = fetchedShardData.getData().values();
ineligibleShards = fetchedShardData;
}

nodeResults.addAll(
Expand Down Expand Up @@ -328,12 +362,12 @@ protected static NodeShardsResult buildNodeShardsResult(
boolean matchAnyShard,
Set<String> ignoreNodes,
Set<String> inSyncAllocationIds,
FetchResult<NodeGatewayStartedShards> shardState,
List<NodeGatewayStartedShards> shardState,
Logger logger
) {
List<NodeGatewayStartedShards> nodeShardStates = new ArrayList<>();
int numberOfAllocationsFound = 0;
for (NodeGatewayStartedShards nodeShardState : shardState.getData().values()) {
for (NodeGatewayStartedShards nodeShardState : shardState) {
DiscoveryNode node = nodeShardState.getNode();
String allocationId = nodeShardState.allocationId();

Expand Down Expand Up @@ -386,11 +420,27 @@ protected static NodeShardsResult buildNodeShardsResult(
}
}

/*
Orders the active shards copies based on below comparators
1. No store exception i.e. shard copy is readable
2. Prefer previous primary shard
3. Prefer shard copy with the highest replication checkpoint. It is NO-OP for doc rep enabled indices.
nodeShardStates.sort(createActiveShardComparator(matchAnyShard, inSyncAllocationIds));

if (logger.isTraceEnabled()) {
logger.trace(
"{} candidates for allocation: {}",
shard,
nodeShardStates.stream().map(s -> s.getNode().getName()).collect(Collectors.joining(", "))
);
}
return new NodeShardsResult(nodeShardStates, numberOfAllocationsFound);
}

private static Comparator<NodeGatewayStartedShards> createActiveShardComparator(
boolean matchAnyShard,
Set<String> inSyncAllocationIds
) {
/**
* Orders the active shards copies based on below comparators
* 1. No store exception i.e. shard copy is readable
* 2. Prefer previous primary shard
* 3. Prefer shard copy with the highest replication checkpoint. It is NO-OP for doc rep enabled indices.
*/
final Comparator<NodeGatewayStartedShards> comparator; // allocation preference
if (matchAnyShard) {
Expand All @@ -406,16 +456,7 @@ protected static NodeShardsResult buildNodeShardsResult(
.thenComparing(HIGHEST_REPLICATION_CHECKPOINT_FIRST_COMPARATOR);
}

nodeShardStates.sort(comparator);

if (logger.isTraceEnabled()) {
logger.trace(
"{} candidates for allocation: {}",
shard,
nodeShardStates.stream().map(s -> s.getNode().getName()).collect(Collectors.joining(", "))
);
}
return new NodeShardsResult(nodeShardStates, numberOfAllocationsFound);
return comparator;
}

/**
Expand Down Expand Up @@ -457,7 +498,10 @@ private static NodesToAllocate buildNodesToAllocate(

protected abstract FetchResult<NodeGatewayStartedShards> fetchData(ShardRouting shard, RoutingAllocation allocation);

private static class NodeShardsResult {
/**
* This class encapsulates the result of a call to {@link #buildNodeShardsResult}
*/
static class NodeShardsResult {
final List<NodeGatewayStartedShards> orderedAllocationCandidates;
final int allocationsFound;

Expand All @@ -467,7 +511,10 @@ private static class NodeShardsResult {
}
}

static class NodesToAllocate {
/**
* This class encapsulates the result of a call to {@link #buildNodesToAllocate}
*/
protected static class NodesToAllocate {
final List<DecidedNode> yesNodeShards;
final List<DecidedNode> throttleNodeShards;
final List<DecidedNode> noNodeShards;
Expand Down
Loading