Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[improve][misc] Sync commits from apache into 3.1_ds (13 Jan) #362

Merged
merged 5 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ public void initiate() {
payloadProcessorHandle = ml.getManagedLedgerInterceptor()
.processPayloadBeforeLedgerWrite(this, duplicateBuffer);
} catch (Exception e) {
ml.pendingAddEntries.remove(this);
ReferenceCountUtil.safeRelease(duplicateBuffer);
log.error("[{}] Error processing payload before ledger write", ml.getName(), e);
this.failed(new ManagedLedgerException.ManagedLedgerInterceptException(e));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.pulsar.broker.service.persistent;

import static org.apache.pulsar.common.util.Runnables.catchingAndLoggingThrowables;
import com.google.common.annotations.VisibleForTesting;
import io.netty.buffer.ByteBuf;
import io.prometheus.client.Gauge;
import java.io.IOException;
Expand Down Expand Up @@ -206,6 +207,23 @@ private void receiveSubscriptionUpdated(ReplicatedSubscriptionsUpdate update) {
private void startNewSnapshot() {
cleanupTimedOutSnapshots();

if (lastCompletedSnapshotStartTime == 0 && !pendingSnapshots.isEmpty()) {
// 1. If the remote cluster has disabled subscription replication or there's an incorrect config,
// it will not respond to SNAPSHOT_REQUEST. Therefore, lastCompletedSnapshotStartTime will remain 0,
// making it unnecessary to resend the request.
// 2. This approach prevents sending additional SNAPSHOT_REQUEST to both local_topic and remote_topic.
// 3. Since it's uncertain when the remote cluster will enable subscription replication,
// the timeout mechanism of pendingSnapshots is used to ensure retries.
//
// In other words, when hit this case, The frequency of sending SNAPSHOT_REQUEST
// will use `replicatedSubscriptionsSnapshotTimeoutSeconds`.
if (log.isDebugEnabled()) {
log.debug("[{}] PendingSnapshot exists but has never succeeded. "
+ "Skipping snapshot creation until pending snapshot timeout.", topic.getName());
}
return;
}

if (topic.getLastMaxReadPositionMovedForwardTimestamp() < lastCompletedSnapshotStartTime
|| topic.getLastMaxReadPositionMovedForwardTimestamp() == 0) {
// There was no message written since the last snapshot, we can skip creating a new snapshot
Expand Down Expand Up @@ -302,6 +320,11 @@ String localCluster() {
return localCluster;
}

@VisibleForTesting
public ConcurrentMap<String, ReplicatedSubscriptionsSnapshotBuilder> pendingSnapshots() {
return pendingSnapshots;
}

@Override
public boolean isMarkerMessage() {
// Everything published by this controller will be a marker a message
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
*/
package org.apache.pulsar.broker.intercept;

import static org.testng.Assert.fail;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import java.util.ArrayList;
import java.util.Collections;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import lombok.Cleanup;
import org.apache.bookkeeper.mledger.AsyncCallbacks;
Expand Down Expand Up @@ -451,26 +451,31 @@ public Processor inputProcessor() {
return new Processor() {
@Override
public ByteBuf process(Object contextObj, ByteBuf inputPayload) {
throw new RuntimeException(failureMsg);
if (inputPayload.readBoolean()) {
throw new RuntimeException(failureMsg);
}
return inputPayload;
}

@Override
public void release(ByteBuf processedPayload) {
// no-op
fail("the release method can't be reached");
}
};
}
})));

var ledger = factory.open("testManagedLedgerPayloadProcessorFailure", config);
var countDownLatch = new CountDownLatch(1);
int count = 10;
var countDownLatch = new CountDownLatch(count);
var successCount = new AtomicInteger(0);
var expectedException = new ArrayList<Exception>();
ledger.asyncAddEntry("test".getBytes(), 1, 1, new AsyncCallbacks.AddEntryCallback() {

var addEntryCallback = new AsyncCallbacks.AddEntryCallback() {
@Override
public void addComplete(Position position, ByteBuf entryData, Object ctx) {
entryData.release();
countDownLatch.countDown();
successCount.incrementAndGet();
}

@Override
Expand All @@ -479,10 +484,23 @@ public void addFailed(ManagedLedgerException exception, Object ctx) {
expectedException.add(exception);
countDownLatch.countDown();
}
}, null);
};

for (int i = 0; i < count; i++) {
if (i % 2 == 0) {
ledger.asyncAddEntry(Unpooled.buffer().writeBoolean(true), addEntryCallback, null);
} else {
ledger.asyncAddEntry(Unpooled.buffer().writeBoolean(false), addEntryCallback, null);
}
}

countDownLatch.await();
assertEquals(expectedException.size(), 1);
assertEquals(expectedException.get(0).getCause().getMessage(), failureMsg);
assertEquals(expectedException.size(), count / 2);
assertEquals(successCount.get(), count / 2);
for (Exception e : expectedException) {
assertEquals(e.getCause().getMessage(), failureMsg);
}
ledger.close();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.client.api.SubscriptionInitialPosition;
import org.apache.pulsar.client.api.SubscriptionType;
import org.apache.pulsar.client.api.TransactionIsolationLevel;
import org.apache.pulsar.client.impl.MessageImpl;
import org.apache.pulsar.common.api.proto.MarkerType;
import org.apache.pulsar.common.api.proto.MessageMetadata;
import org.apache.pulsar.common.policies.data.PartitionedTopicStats;
import org.apache.pulsar.common.policies.data.PersistentTopicInternalStats;
import org.apache.pulsar.common.policies.data.TenantInfoImpl;
Expand Down Expand Up @@ -987,6 +991,92 @@ public void testReplicatedSubscriptionWithCompaction() throws Exception {
Assert.assertEquals(result, List.of("V2"));
}

@Test
public void testReplicatedSubscriptionOneWay() throws Exception {
final String namespace = BrokerTestUtil.newUniqueName("pulsar-r4/replicatedsubscription");
final String topicName = "persistent://" + namespace + "/one-way";
int defaultSubscriptionsSnapshotFrequency = config1.getReplicatedSubscriptionsSnapshotFrequencyMillis();
int defaultSubscriptionsSnapshotTimeout = config1.getReplicatedSubscriptionsSnapshotTimeoutSeconds();
config1.setReplicatedSubscriptionsSnapshotTimeoutSeconds(2);
config1.setReplicatedSubscriptionsSnapshotFrequencyMillis(100);

// cluster4 disabled ReplicatedSubscriptions
admin1.tenants().createTenant("pulsar-r4",
new TenantInfoImpl(Sets.newHashSet("appid1", "appid4"), Sets.newHashSet(cluster1, cluster4)));
admin1.namespaces().createNamespace(namespace);
admin1.namespaces().setNamespaceReplicationClusters(namespace, Sets.newHashSet(cluster1, cluster4));

String subscriptionName = "cluster-subscription";
boolean replicateSubscriptionState = true;

@Cleanup
PulsarClient client1 = PulsarClient.builder().serviceUrl(url1.toString())
.statsInterval(0, TimeUnit.SECONDS)
.build();

@Cleanup
final PulsarClient client4 = PulsarClient.builder().serviceUrl(url4.toString())
.statsInterval(0, TimeUnit.SECONDS)
.build();

// create subscription in cluster4
createReplicatedSubscription(client1, topicName, subscriptionName, replicateSubscriptionState);
// create subscription in cluster4
createReplicatedSubscription(client4, topicName, subscriptionName, replicateSubscriptionState);

// send messages in cluster1
@Cleanup
Producer<byte[]> producer = client1.newProducer().topic(topicName)
.enableBatching(false)
.messageRoutingMode(MessageRoutingMode.SinglePartition)
.create();
int numMessages = 6;
for (int i = 0; i < numMessages; i++) {
String body = "message" + i;
producer.send(body.getBytes(StandardCharsets.UTF_8));
}
producer.close();

// wait for snapshot marker request to be replicated
Thread.sleep(3 * config1.getReplicatedSubscriptionsSnapshotFrequencyMillis());

// Assert just have 1 pending snapshot in cluster1
final PersistentTopic topic1 =
(PersistentTopic) pulsar1.getBrokerService().getTopic(topicName, false).join().get();
ReplicatedSubscriptionsController r1Controller =
topic1.getReplicatedSubscriptionController().get();
assertEquals(r1Controller.pendingSnapshots().size(), 1);

// Assert cluster4 just receive 1 snapshot request msg
int numSnapshotRequest = 0;
List<Message<byte[]>> r4Messages = admin4.topics()
.peekMessages(topicName, subscriptionName, 100, true, TransactionIsolationLevel.READ_UNCOMMITTED);
for (Message<byte[]> r4Message : r4Messages) {
MessageMetadata msgMetadata = ((MessageImpl<byte[]>) r4Message).getMessageBuilder();
if (msgMetadata.hasMarkerType() && msgMetadata.getMarkerType() == MarkerType.REPLICATED_SUBSCRIPTION_SNAPSHOT_REQUEST_VALUE) {
numSnapshotRequest++;
}
}
Assert.assertEquals(numSnapshotRequest, 1);

// Wait pending snapshot timeout
Thread.sleep(config1.getReplicatedSubscriptionsSnapshotTimeoutSeconds() * 1000);
numSnapshotRequest = 0;
r4Messages = admin4.topics()
.peekMessages(topicName, subscriptionName, 100, true, TransactionIsolationLevel.READ_UNCOMMITTED);
for (Message<byte[]> r4Message : r4Messages) {
MessageMetadata msgMetadata = ((MessageImpl<byte[]>) r4Message).getMessageBuilder();
if (msgMetadata.hasMarkerType() && msgMetadata.getMarkerType() == MarkerType.REPLICATED_SUBSCRIPTION_SNAPSHOT_REQUEST_VALUE) {
numSnapshotRequest++;
}
}
Assert.assertEquals(numSnapshotRequest, 2);

// Set back to default config.
config1.setReplicatedSubscriptionsSnapshotTimeoutSeconds(defaultSubscriptionsSnapshotTimeout);
config1.setReplicatedSubscriptionsSnapshotFrequencyMillis(defaultSubscriptionsSnapshotFrequency);
}

/**
* Disable replication subscription.
* Test scheduled task case.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.pulsar.client.api;

import static org.assertj.core.api.Assertions.assertThat;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
Expand All @@ -39,9 +40,11 @@
import lombok.Cleanup;
import lombok.Data;
import org.apache.avro.reflect.Nullable;
import org.apache.pulsar.broker.BrokerTestUtil;
import org.apache.pulsar.client.api.schema.GenericRecord;
import org.apache.pulsar.client.impl.ConsumerBuilderImpl;
import org.apache.pulsar.client.util.RetryMessageUtil;
import org.apache.pulsar.common.policies.data.SchemaCompatibilityStrategy;
import org.awaitility.Awaitility;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -1019,4 +1022,94 @@ public void testDeadLetterPolicyDeserialize() throws Exception {
consumerBuilder.loadConf(config);
assertEquals(((ConsumerBuilderImpl)consumerBuilder).getConf().getDeadLetterPolicy(), policy);
}

@Data
static class Payload {
String number;

public Payload() {

}

public Payload(String number) {
this.number = number;
}
}

@Data
static class PayloadIncompatible {
long number;

public PayloadIncompatible() {

}

public PayloadIncompatible(long number) {
this.number = number;
}
}

// reproduce issue reported in https://github.com/apache/pulsar/issues/20635#issuecomment-1709616321
@Test
public void testCloseDeadLetterTopicProducerOnExceptionToPreventProducerLeak() throws Exception {
String namespace = BrokerTestUtil.newUniqueName("my-property/my-ns");
admin.namespaces().createNamespace(namespace);
// don't enforce schema validation
admin.namespaces().setSchemaValidationEnforced(namespace, false);
// set schema compatibility strategy to always compatible
admin.namespaces().setSchemaCompatibilityStrategy(namespace, SchemaCompatibilityStrategy.ALWAYS_COMPATIBLE);

Schema<Payload> schema = Schema.AVRO(Payload.class);
Schema<PayloadIncompatible> schemaIncompatible = Schema.AVRO(PayloadIncompatible.class);
String topic = BrokerTestUtil.newUniqueName("persistent://" + namespace
+ "/testCloseDeadLetterTopicProducerOnExceptionToPreventProducerLeak");
String dlqTopic = topic + "-DLQ";

// create topics
admin.topics().createNonPartitionedTopic(topic);
admin.topics().createNonPartitionedTopic(dlqTopic);

AtomicInteger nackCounter = new AtomicInteger(0);
Consumer<Payload> payloadConsumer = null;
try {
payloadConsumer = pulsarClient.newConsumer(schema).topic(topic)
.subscriptionType(SubscriptionType.Shared).subscriptionName("sub")
.ackTimeout(1, TimeUnit.SECONDS)
.negativeAckRedeliveryDelay(1, TimeUnit.MILLISECONDS)
.deadLetterPolicy(DeadLetterPolicy.builder().maxRedeliverCount(3).deadLetterTopic(dlqTopic).build())
.messageListener((c, msg) -> {
if (nackCounter.incrementAndGet() < 10) {
c.negativeAcknowledge(msg);
}
}).subscribe();

// send a message to the topic with the incompatible schema
PayloadIncompatible payloadIncompatible = new PayloadIncompatible(123);
try (Producer<PayloadIncompatible> producer = pulsarClient.newProducer(schemaIncompatible).topic(topic)
.create()) {
producer.send(payloadIncompatible);
}

Thread.sleep(2000L);

assertThat(pulsar.getBrokerService().getTopicReference(dlqTopic).get().getProducers().size())
.describedAs("producer count of dlq topic %s should be <= 1 so that it doesn't leak producers",
dlqTopic)
.isLessThanOrEqualTo(1);

} finally {
if (payloadConsumer != null) {
try {
payloadConsumer.close();
} catch (PulsarClientException e) {
// ignore
}
}
}

assertThat(pulsar.getBrokerService().getTopicReference(dlqTopic).get().getProducers().size())
.describedAs("producer count of dlq topic %s should be 0 here",
dlqTopic)
.isEqualTo(0);
}
}
Loading
Loading