Force Lease to be lost before shutting down with Zombi state

This commit is contained in:
Chunxue Yang 2019-10-10 22:06:55 -07:00
parent afd7742c70
commit 800d24309d
6 changed files with 42 additions and 12 deletions

View file

@ -588,7 +588,7 @@ public class Scheduler implements Runnable {
checkpoint); checkpoint);
ShardConsumerArgument argument = new ShardConsumerArgument(shardInfo, ShardConsumerArgument argument = new ShardConsumerArgument(shardInfo,
streamName, streamName,
leaseRefresher, leaseCoordinator,
executorService, executorService,
cache, cache,
shardRecordProcessorFactory.shardRecordProcessor(), shardRecordProcessorFactory.shardRecordProcessor(),

View file

@ -135,7 +135,7 @@ class ConsumerStates {
@Override @Override
public ConsumerTask createTask(ShardConsumerArgument consumerArgument, ShardConsumer consumer, ProcessRecordsInput input) { public ConsumerTask createTask(ShardConsumerArgument consumerArgument, ShardConsumer consumer, ProcessRecordsInput input) {
return new BlockOnParentShardTask(consumerArgument.shardInfo(), return new BlockOnParentShardTask(consumerArgument.shardInfo(),
consumerArgument.leaseRefresher(), consumerArgument.leaseCoordinator().leaseRefresher(),
consumerArgument.parentShardPollIntervalMillis()); consumerArgument.parentShardPollIntervalMillis());
} }
@ -492,7 +492,7 @@ class ConsumerStates {
argument.initialPositionInStream(), argument.initialPositionInStream(),
argument.cleanupLeasesOfCompletedShards(), argument.cleanupLeasesOfCompletedShards(),
argument.ignoreUnexpectedChildShards(), argument.ignoreUnexpectedChildShards(),
argument.leaseRefresher(), argument.leaseCoordinator(),
argument.taskBackoffTimeMillis(), argument.taskBackoffTimeMillis(),
argument.recordsPublisher(), argument.recordsPublisher(),
argument.hierarchicalShardSyncer(), argument.hierarchicalShardSyncer(),

View file

@ -21,6 +21,7 @@ import lombok.experimental.Accessors;
import software.amazon.kinesis.annotations.KinesisClientInternalApi; import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.InitialPositionInStreamExtended; import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.leases.ShardInfo;
@ -42,7 +43,7 @@ public class ShardConsumerArgument {
@NonNull @NonNull
private final String streamName; private final String streamName;
@NonNull @NonNull
private final LeaseRefresher leaseRefresher; private final LeaseCoordinator leaseCoordinator;
@NonNull @NonNull
private final ExecutorService executorService; private final ExecutorService executorService;
@NonNull @NonNull

View file

@ -25,6 +25,8 @@ import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.annotations.KinesisClientInternalApi; import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.InitialPositionInStreamExtended; import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.leases.ShardInfo;
@ -40,6 +42,7 @@ import software.amazon.kinesis.retrieval.RecordsPublisher;
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber; import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.List; import java.util.List;
/** /**
@ -67,7 +70,7 @@ public class ShutdownTask implements ConsumerTask {
private final boolean cleanupLeasesOfCompletedShards; private final boolean cleanupLeasesOfCompletedShards;
private final boolean ignoreUnexpectedChildShards; private final boolean ignoreUnexpectedChildShards;
@NonNull @NonNull
private final LeaseRefresher leaseRefresher; private final LeaseCoordinator leaseCoordinator;
private final long backoffTimeMillis; private final long backoffTimeMillis;
@NonNull @NonNull
private final RecordsPublisher recordsPublisher; private final RecordsPublisher recordsPublisher;
@ -106,6 +109,8 @@ public class ShutdownTask implements ConsumerTask {
if (!CollectionUtils.isNullOrEmpty(allShards) && !validateShardEnd(allShards)) { if (!CollectionUtils.isNullOrEmpty(allShards) && !validateShardEnd(allShards)) {
localReason = ShutdownReason.LEASE_LOST; localReason = ShutdownReason.LEASE_LOST;
forceLoseLease();
log.debug("Force the lease to be lost before shutting down the consumer.");
} }
} }
@ -147,7 +152,7 @@ public class ShutdownTask implements ConsumerTask {
if (localReason == ShutdownReason.SHARD_END) { if (localReason == ShutdownReason.SHARD_END) {
log.debug("Looking for child shards of shard {}", shardInfo.shardId()); log.debug("Looking for child shards of shard {}", shardInfo.shardId());
// create leases for the child shards // create leases for the child shards
hierarchicalShardSyncer.checkAndCreateLeaseForNewShards(allShards, shardDetector, leaseRefresher, hierarchicalShardSyncer.checkAndCreateLeaseForNewShards(allShards, shardDetector, leaseCoordinator.leaseRefresher(),
initialPositionInStream, cleanupLeasesOfCompletedShards, ignoreUnexpectedChildShards, scope); initialPositionInStream, cleanupLeasesOfCompletedShards, ignoreUnexpectedChildShards, scope);
log.debug("Finished checking for child shards of shard {}", shardInfo.shardId()); log.debug("Finished checking for child shards of shard {}", shardInfo.shardId());
} }
@ -204,4 +209,15 @@ public class ShutdownTask implements ConsumerTask {
|| shard.adjacentParentShardId() != null && shard.adjacentParentShardId().equals(shardInfo.shardId())); || shard.adjacentParentShardId() != null && shard.adjacentParentShardId().equals(shardInfo.shardId()));
} }
private void forceLoseLease() {
Collection<Lease> leases = leaseCoordinator.getAssignments();
if(leases != null && !leases.isEmpty()) {
for(Lease lease : leases) {
if(lease.leaseKey().equals(shardInfo.shardId())) {
leaseCoordinator.dropLease(lease);
}
}
}
}
} }

View file

@ -43,6 +43,7 @@ import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer; import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.InitialPositionInStream; import software.amazon.kinesis.common.InitialPositionInStream;
import software.amazon.kinesis.common.InitialPositionInStreamExtended; import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.leases.ShardInfo;
@ -55,6 +56,8 @@ import software.amazon.kinesis.processor.ShardRecordProcessor;
import software.amazon.kinesis.retrieval.AggregatorUtil; import software.amazon.kinesis.retrieval.AggregatorUtil;
import software.amazon.kinesis.retrieval.RecordsPublisher; import software.amazon.kinesis.retrieval.RecordsPublisher;
import javax.swing.*;
@RunWith(MockitoJUnitRunner.class) @RunWith(MockitoJUnitRunner.class)
public class ConsumerStatesTest { public class ConsumerStatesTest {
private static final String STREAM_NAME = "TestStream"; private static final String STREAM_NAME = "TestStream";
@ -73,6 +76,8 @@ public class ConsumerStatesTest {
@Mock @Mock
private ShardInfo shardInfo; private ShardInfo shardInfo;
@Mock @Mock
private LeaseCoordinator leaseCoordinator;
@Mock
private LeaseRefresher leaseRefresher; private LeaseRefresher leaseRefresher;
@Mock @Mock
private Checkpointer checkpointer; private Checkpointer checkpointer;
@ -109,7 +114,7 @@ public class ConsumerStatesTest {
@Before @Before
public void setup() { public void setup() {
argument = new ShardConsumerArgument(shardInfo, STREAM_NAME, leaseRefresher, executorService, recordsPublisher, argument = new ShardConsumerArgument(shardInfo, STREAM_NAME, leaseCoordinator, executorService, recordsPublisher,
shardRecordProcessor, checkpointer, recordProcessorCheckpointer, parentShardPollIntervalMillis, shardRecordProcessor, checkpointer, recordProcessorCheckpointer, parentShardPollIntervalMillis,
taskBackoffTimeMillis, skipShardSyncAtWorkerInitializationIfLeasesExist, listShardsBackoffTimeInMillis, taskBackoffTimeMillis, skipShardSyncAtWorkerInitializationIfLeasesExist, listShardsBackoffTimeInMillis,
maxListShardsRetryAttempts, shouldCallProcessRecordsEvenForEmptyRecordList, idleTimeInMillis, maxListShardsRetryAttempts, shouldCallProcessRecordsEvenForEmptyRecordList, idleTimeInMillis,
@ -127,6 +132,7 @@ public class ConsumerStatesTest {
@Test @Test
public void blockOnParentStateTest() { public void blockOnParentStateTest() {
ConsumerState state = ShardConsumerState.WAITING_ON_PARENT_SHARDS.consumerState(); ConsumerState state = ShardConsumerState.WAITING_ON_PARENT_SHARDS.consumerState();
when(leaseCoordinator.leaseRefresher()).thenReturn(leaseRefresher);
ConsumerTask task = state.createTask(argument, consumer, null); ConsumerTask task = state.createTask(argument, consumer, null);
@ -309,7 +315,7 @@ public class ConsumerStatesTest {
assertThat(task, shutdownTask(ShardRecordProcessorCheckpointer.class, "recordProcessorCheckpointer", assertThat(task, shutdownTask(ShardRecordProcessorCheckpointer.class, "recordProcessorCheckpointer",
equalTo(recordProcessorCheckpointer))); equalTo(recordProcessorCheckpointer)));
assertThat(task, shutdownTask(ShutdownReason.class, "reason", equalTo(reason))); assertThat(task, shutdownTask(ShutdownReason.class, "reason", equalTo(reason)));
assertThat(task, shutdownTask(LEASE_REFRESHER_CLASS, "leaseRefresher", equalTo(leaseRefresher))); assertThat(task, shutdownTask(LeaseCoordinator.class, "leaseCoordinator", equalTo(leaseCoordinator)));
assertThat(task, shutdownTask(InitialPositionInStreamExtended.class, "initialPositionInStream", assertThat(task, shutdownTask(InitialPositionInStreamExtended.class, "initialPositionInStream",
equalTo(initialPositionInStream))); equalTo(initialPositionInStream)));
assertThat(task, assertThat(task,

View file

@ -42,6 +42,7 @@ import software.amazon.kinesis.common.InitialPositionInStream;
import software.amazon.kinesis.common.InitialPositionInStreamExtended; import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.exceptions.internal.KinesisClientLibIOException; import software.amazon.kinesis.exceptions.internal.KinesisClientLibIOException;
import software.amazon.kinesis.leases.HierarchicalShardSyncer; import software.amazon.kinesis.leases.HierarchicalShardSyncer;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseRefresher; import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.ShardDetector; import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo; import software.amazon.kinesis.leases.ShardInfo;
@ -83,6 +84,8 @@ public class ShutdownTaskTest {
@Mock @Mock
private LeaseRefresher leaseRefresher; private LeaseRefresher leaseRefresher;
@Mock @Mock
private LeaseCoordinator leaseCoordinator;
@Mock
private ShardDetector shardDetector; private ShardDetector shardDetector;
@Mock @Mock
private HierarchicalShardSyncer hierarchicalShardSyncer; private HierarchicalShardSyncer hierarchicalShardSyncer;
@ -99,7 +102,7 @@ public class ShutdownTaskTest {
task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer, task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer,
SHARD_END_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards, SHARD_END_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards,
ignoreUnexpectedChildShards, leaseRefresher, TASK_BACKOFF_TIME_MILLIS, recordsPublisher, ignoreUnexpectedChildShards, leaseCoordinator, TASK_BACKOFF_TIME_MILLIS, recordsPublisher,
hierarchicalShardSyncer, NULL_METRICS_FACTORY); hierarchicalShardSyncer, NULL_METRICS_FACTORY);
} }
@ -124,6 +127,7 @@ public class ShutdownTaskTest {
List<Shard> shards = constructShardListGraphA(); List<Shard> shards = constructShardListGraphA();
when(shardDetector.listShards()).thenReturn(shards); when(shardDetector.listShards()).thenReturn(shards);
when(recordProcessorCheckpointer.lastCheckpointValue()).thenReturn(ExtendedSequenceNumber.SHARD_END); when(recordProcessorCheckpointer.lastCheckpointValue()).thenReturn(ExtendedSequenceNumber.SHARD_END);
when(leaseCoordinator.leaseRefresher()).thenReturn(leaseRefresher);
doAnswer((invocation) -> { doAnswer((invocation) -> {
throw new KinesisClientLibIOException("KinesisClientLibIOException"); throw new KinesisClientLibIOException("KinesisClientLibIOException");
@ -149,7 +153,7 @@ public class ShutdownTaskTest {
ExtendedSequenceNumber.LATEST); ExtendedSequenceNumber.LATEST);
task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer, task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer,
SHARD_END_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards, SHARD_END_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards,
ignoreUnexpectedChildShards, leaseRefresher, TASK_BACKOFF_TIME_MILLIS, recordsPublisher, ignoreUnexpectedChildShards, leaseCoordinator, TASK_BACKOFF_TIME_MILLIS, recordsPublisher,
hierarchicalShardSyncer, NULL_METRICS_FACTORY); hierarchicalShardSyncer, NULL_METRICS_FACTORY);
when(shardDetector.listShards()).thenReturn(constructShardListGraphA()); when(shardDetector.listShards()).thenReturn(constructShardListGraphA());
@ -161,6 +165,7 @@ public class ShutdownTaskTest {
verify(shardRecordProcessor).shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build()); verify(shardRecordProcessor).shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build());
verify(shardRecordProcessor, never()).leaseLost(LeaseLostInput.builder().build()); verify(shardRecordProcessor, never()).leaseLost(LeaseLostInput.builder().build());
verify(shardDetector, times(1)).listShards(); verify(shardDetector, times(1)).listShards();
verify(leaseCoordinator, never()).getAssignments();
} }
/** /**
@ -172,7 +177,7 @@ public class ShutdownTaskTest {
ExtendedSequenceNumber.LATEST); ExtendedSequenceNumber.LATEST);
task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer, task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer,
SHARD_END_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards, SHARD_END_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards,
ignoreUnexpectedChildShards, leaseRefresher, TASK_BACKOFF_TIME_MILLIS, recordsPublisher, ignoreUnexpectedChildShards, leaseCoordinator, TASK_BACKOFF_TIME_MILLIS, recordsPublisher,
hierarchicalShardSyncer, NULL_METRICS_FACTORY); hierarchicalShardSyncer, NULL_METRICS_FACTORY);
when(shardDetector.listShards()).thenReturn(constructShardListGraphA()); when(shardDetector.listShards()).thenReturn(constructShardListGraphA());
@ -183,6 +188,7 @@ public class ShutdownTaskTest {
verify(shardRecordProcessor, never()).shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build()); verify(shardRecordProcessor, never()).shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build());
verify(shardRecordProcessor).leaseLost(LeaseLostInput.builder().build()); verify(shardRecordProcessor).leaseLost(LeaseLostInput.builder().build());
verify(shardDetector, times(1)).listShards(); verify(shardDetector, times(1)).listShards();
verify(leaseCoordinator).getAssignments();
} }
/** /**
@ -194,7 +200,7 @@ public class ShutdownTaskTest {
ExtendedSequenceNumber.LATEST); ExtendedSequenceNumber.LATEST);
task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer, task = new ShutdownTask(shardInfo, shardDetector, shardRecordProcessor, recordProcessorCheckpointer,
LEASE_LOST_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards, LEASE_LOST_SHUTDOWN_REASON, INITIAL_POSITION_TRIM_HORIZON, cleanupLeasesOfCompletedShards,
ignoreUnexpectedChildShards, leaseRefresher, TASK_BACKOFF_TIME_MILLIS, recordsPublisher, ignoreUnexpectedChildShards, leaseCoordinator, TASK_BACKOFF_TIME_MILLIS, recordsPublisher,
hierarchicalShardSyncer, NULL_METRICS_FACTORY); hierarchicalShardSyncer, NULL_METRICS_FACTORY);
when(shardDetector.listShards()).thenReturn(constructShardListGraphA()); when(shardDetector.listShards()).thenReturn(constructShardListGraphA());
@ -205,6 +211,7 @@ public class ShutdownTaskTest {
verify(shardRecordProcessor, never()).shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build()); verify(shardRecordProcessor, never()).shardEnded(ShardEndedInput.builder().checkpointer(recordProcessorCheckpointer).build());
verify(shardRecordProcessor).leaseLost(LeaseLostInput.builder().build()); verify(shardRecordProcessor).leaseLost(LeaseLostInput.builder().build());
verify(shardDetector, never()).listShards(); verify(shardDetector, never()).listShards();
verify(leaseCoordinator, never()).getAssignments();
} }
/** /**