Merge pull request #781 from awslabs/dead_shard_initialize_cycle_fix

Fixing record processor processing deleted leases in cycle
This commit is contained in:
ashwing 2021-01-27 09:17:40 -08:00 committed by GitHub
commit 539550b664
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 78 additions and 7 deletions

View file

@ -14,6 +14,7 @@
*/
package com.amazonaws.services.kinesis.clientlibrary.lib.worker;
import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibNonRetryableException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -79,7 +80,15 @@ class InitializeTask implements ITask {
try {
LOG.debug("Initializing ShardId " + shardInfo.getShardId());
Checkpoint initialCheckpointObject = checkpoint.getCheckpointObject(shardInfo.getShardId());
Checkpoint initialCheckpointObject;
try {
initialCheckpointObject = checkpoint.getCheckpointObject(shardInfo.getShardId());
} catch (KinesisClientLibNonRetryableException e) {
LOG.error("Caught exception while fetching checkpoint for " + shardInfo.getShardId(), e);
final TaskResult result = new TaskResult(e);
result.leaseNotFound();
return result;
}
ExtendedSequenceNumber initialCheckpoint = initialCheckpointObject.getCheckpoint();
dataFetcher.initialize(initialCheckpoint.getSequenceNumber(), streamConfig.getInitialPositionInStream());

View file

@ -283,7 +283,8 @@ class KinesisClientLibLeaseCoordinator extends LeaseCoordinator<KinesisClientLea
try {
KinesisClientLease lease = leaseManager.getLease(shardId);
if (lease == null) {
throw new KinesisClientLibIOException(errorMessage);
// This is a KinesisClientLibNonRetryableException
throw new com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException(errorMessage);
}
return new Checkpoint(lease.getCheckpoint(), lease.getPendingCheckpoint());
} catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) {

View file

@ -374,7 +374,7 @@ class ShardConsumer {
}
private enum TaskOutcome {
SUCCESSFUL, END_OF_SHARD, NOT_COMPLETE, FAILURE
SUCCESSFUL, END_OF_SHARD, NOT_COMPLETE, FAILURE, LEASE_NOT_FOUND
}
private TaskOutcome determineTaskOutcome() {
@ -391,6 +391,10 @@ class ShardConsumer {
return TaskOutcome.SUCCESSFUL;
}
logTaskException(result);
// This is the case of result with exception
if (result.isLeaseNotFound()) {
return TaskOutcome.LEASE_NOT_FOUND;
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
@ -487,6 +491,10 @@ class ShardConsumer {
markForShutdown(ShutdownReason.TERMINATE);
LOG.info("Shard " + shardInfo.getShardId() + ": Mark for shutdown with reason TERMINATE");
}
if (taskOutcome == TaskOutcome.LEASE_NOT_FOUND) {
markForShutdown(ShutdownReason.ZOMBIE);
LOG.info("Shard " + shardInfo.getShardId() + ": Mark for shutdown with reason ZOMBIE as lease was not found");
}
if (isShutdownRequested() && taskOutcome != TaskOutcome.FAILURE) {
currentState = currentState.shutdownTransition(shutdownReason);
} else if (isShutdownRequested() && ConsumerStates.ShardConsumerState.WAITING_ON_PARENT_SHARDS.equals(currentState.getState())) {

View file

@ -33,6 +33,8 @@ class TaskResult {
// List of childShards of the current shard. This field is only required for the task result when we reach end of a shard.
private List<ChildShard> childShards;
private boolean leaseNotFound;
/**
* @return the shardEndReached
*/
@ -57,6 +59,14 @@ class TaskResult {
*/
protected void setChildShards(List<ChildShard> childShards) { this.childShards = childShards; }
public boolean isLeaseNotFound() {
return leaseNotFound;
}
public void leaseNotFound() {
this.leaseNotFound = true;
}
/**
* @return the exception
*/

View file

@ -79,7 +79,7 @@ public class KinesisClientLibLeaseCoordinatorTest {
leaseCoordinator.initialize();
}
@Test(expected = KinesisClientLibIOException.class)
@Test(expected = com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException.class)
public void testGetCheckpointObjectWithNoLease()
throws DependencyException, ProvisionedThroughputException, IllegalStateException, InvalidStateException,
KinesisClientLibException {

View file

@ -54,8 +54,6 @@ import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import com.amazonaws.services.kinesis.leases.impl.LeaseCleanupManager;
import com.amazonaws.services.kinesis.leases.impl.LeaseManager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.hamcrest.Description;
@ -64,7 +62,6 @@ import org.hamcrest.TypeSafeMatcher;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
@ -246,6 +243,52 @@ public class ShardConsumerTest {
assertThat(consumer.getCurrentState(), is(equalTo(ConsumerStates.ShardConsumerState.INITIALIZING)));
}
@Test
public void testInitializationStateTransitionsToShutdownOnLeaseNotFound() throws Exception {
ShardInfo shardInfo = new ShardInfo("s-0-0", "testToken", null, ExtendedSequenceNumber.TRIM_HORIZON);
ICheckpoint checkpoint = new KinesisClientLibLeaseCoordinator(leaseManager, "", 0, 0);
when(leaseManager.getLease(anyString())).thenReturn(null);
when(leaseCoordinator.getLeaseManager()).thenReturn(leaseManager);
StreamConfig streamConfig =
new StreamConfig(streamProxy,
1,
10,
callProcessRecordsForEmptyRecordList,
skipCheckpointValidationValue, INITIAL_POSITION_LATEST);
ShardConsumer consumer =
new ShardConsumer(shardInfo,
streamConfig,
checkpoint,
processor,
leaseCoordinator,
parentShardPollIntervalMillis,
cleanupLeasesOfCompletedShards,
executorService,
metricsFactory,
taskBackoffTimeMillis,
KinesisClientLibConfiguration.DEFAULT_SKIP_SHARD_SYNC_AT_STARTUP_IF_LEASES_EXIST,
config,
shardSyncer,
shardSyncStrategy);
assertThat(consumer.getCurrentState(), is(equalTo(ConsumerStates.ShardConsumerState.WAITING_ON_PARENT_SHARDS)));
consumer.consumeShard();
Thread.sleep(50L);
assertThat(consumer.getCurrentState(), is(equalTo(ConsumerStates.ShardConsumerState.WAITING_ON_PARENT_SHARDS)));
consumer.consumeShard();
Thread.sleep(50L);
assertThat(consumer.getCurrentState(), is(equalTo(ConsumerStates.ShardConsumerState.INITIALIZING)));
consumer.consumeShard();
Thread.sleep(50L);
assertThat(consumer.getCurrentState(), is(equalTo(ConsumerStates.ShardConsumerState.SHUTTING_DOWN)));
consumer.consumeShard();
Thread.sleep(50L);
assertThat(consumer.getCurrentState(), is(equalTo(ConsumerStates.ShardConsumerState.SHUTDOWN_COMPLETE)));
}
@SuppressWarnings("unchecked")
@Test
public final void testRecordProcessorThrowable() throws Exception {