Use an explicit lock for shutdown instead of the general lock (#501)
If the Scheduler loses its lease for a shard it will attempt to shutdown the ShardConsumer processing that shard. When shutting down the ShardConsumer acquires a lock on `this` and makes the necessary state changes. This becomes an issue if the ShardConsumer is currently processing a batch of records as processing of the records is done under the general `this` lock. When these two things combine the Scheduler can become stuck waiting on the record processing to complete. To fix this the ShardConsumer will now use a specific lock on shutdown state changes to prevent the Scheduler from becoming blocked. Allow the shutdown state change future to acquire the lock When the ShardConsumer is being shutdown we create a future for the state change originally the future needed to acquire the lock before attempting to create the future task. This changes it to acquire the lock while running on another thread, and complete the shutdown then.
This commit is contained in:
parent
3b3998a59e
commit
c053789409
2 changed files with 96 additions and 32 deletions
|
|
@ -76,10 +76,8 @@ public class ShardConsumer {
|
||||||
* much coordination/synchronization to handle concurrent reads/updates.
|
* much coordination/synchronization to handle concurrent reads/updates.
|
||||||
*/
|
*/
|
||||||
private ConsumerState currentState;
|
private ConsumerState currentState;
|
||||||
/*
|
|
||||||
* Used to track if we lost the primary responsibility. Once set to true, we will start shutting down.
|
private final Object shutdownLock = new Object();
|
||||||
* If we regain primary responsibility before shutdown is complete, Worker should create a new ShardConsumer object.
|
|
||||||
*/
|
|
||||||
@Getter(AccessLevel.PUBLIC)
|
@Getter(AccessLevel.PUBLIC)
|
||||||
private volatile ShutdownReason shutdownReason;
|
private volatile ShutdownReason shutdownReason;
|
||||||
private volatile ShutdownNotification shutdownNotification;
|
private volatile ShutdownNotification shutdownNotification;
|
||||||
|
|
@ -257,7 +255,9 @@ public class ShardConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
synchronized CompletableFuture<Boolean> shutdownComplete() {
|
CompletableFuture<Boolean> shutdownComplete() {
|
||||||
|
return CompletableFuture.supplyAsync(() -> {
|
||||||
|
synchronized (this) {
|
||||||
if (taskOutcome != null) {
|
if (taskOutcome != null) {
|
||||||
updateState(taskOutcome);
|
updateState(taskOutcome);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -269,12 +269,13 @@ public class ShardConsumer {
|
||||||
updateState(TaskOutcome.SUCCESSFUL);
|
updateState(TaskOutcome.SUCCESSFUL);
|
||||||
}
|
}
|
||||||
if (isShutdown()) {
|
if (isShutdown()) {
|
||||||
return CompletableFuture.completedFuture(true);
|
return true;
|
||||||
}
|
}
|
||||||
return CompletableFuture.supplyAsync(() -> {
|
|
||||||
executeTask(null);
|
executeTask(null);
|
||||||
return false;
|
return false;
|
||||||
});
|
}
|
||||||
|
}, executorService);
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void processData(ProcessRecordsInput input) {
|
private synchronized void processData(ProcessRecordsInput input) {
|
||||||
|
|
@ -339,11 +340,13 @@ public class ShardConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private ConsumerState handleShutdownTransition(TaskOutcome outcome, ConsumerState nextState) {
|
private ConsumerState handleShutdownTransition(TaskOutcome outcome, ConsumerState nextState) {
|
||||||
|
synchronized (shutdownLock) {
|
||||||
if (isShutdownRequested() && outcome != TaskOutcome.FAILURE) {
|
if (isShutdownRequested() && outcome != TaskOutcome.FAILURE) {
|
||||||
return currentState.shutdownTransition(shutdownReason);
|
return currentState.shutdownTransition(shutdownReason);
|
||||||
}
|
}
|
||||||
return nextState;
|
return nextState;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void logTaskException(TaskResult taskResult) {
|
private void logTaskException(TaskResult taskResult) {
|
||||||
if (log.isDebugEnabled()) {
|
if (log.isDebugEnabled()) {
|
||||||
|
|
@ -388,7 +391,8 @@ public class ShardConsumer {
|
||||||
return isShutdown();
|
return isShutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void markForShutdown(ShutdownReason reason) {
|
void markForShutdown(ShutdownReason reason) {
|
||||||
|
synchronized (shutdownLock) {
|
||||||
//
|
//
|
||||||
// ShutdownReason.LEASE_LOST takes precedence over SHARD_END
|
// ShutdownReason.LEASE_LOST takes precedence over SHARD_END
|
||||||
// (we won't be able to save checkpoint at end of shard)
|
// (we won't be able to save checkpoint at end of shard)
|
||||||
|
|
@ -397,6 +401,7 @@ public class ShardConsumer {
|
||||||
shutdownReason = reason;
|
shutdownReason = reason;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used (by Worker) to check if this ShardConsumer instance has been shutdown
|
* Used (by Worker) to check if this ShardConsumer instance has been shutdown
|
||||||
|
|
@ -410,8 +415,10 @@ public class ShardConsumer {
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public boolean isShutdownRequested() {
|
public boolean isShutdownRequested() {
|
||||||
|
synchronized (shutdownLock) {
|
||||||
return shutdownReason != null;
|
return shutdownReason != null;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default task wrapping function for metrics
|
* Default task wrapping function for metrics
|
||||||
|
|
|
||||||
|
|
@ -286,7 +286,64 @@ public class ShardConsumerTest {
|
||||||
verifyNoMoreInteractions(taskExecutionListener);
|
verifyNoMoreInteractions(taskExecutionListener);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 1000L)
|
||||||
|
public void testLeaseLossIsNonBlocking() throws Exception {
|
||||||
|
CyclicBarrier taskCallBarrier = new CyclicBarrier(2);
|
||||||
|
CyclicBarrier processingTaskInterlock = new CyclicBarrier(2);
|
||||||
|
|
||||||
|
mockSuccessfulInitialize(null);
|
||||||
|
|
||||||
|
mockSuccessfulProcessing(taskCallBarrier, processingTaskInterlock);
|
||||||
|
|
||||||
|
mockSuccessfulShutdown(null);
|
||||||
|
|
||||||
|
TestPublisher cache = new TestPublisher();
|
||||||
|
ShardConsumer consumer = new ShardConsumer(cache, executorService, shardInfo, logWarningForTaskAfterMillis,
|
||||||
|
shardConsumerArgument, initialState, Function.identity(), 1, taskExecutionListener);
|
||||||
|
|
||||||
|
boolean initComplete = false;
|
||||||
|
while (!initComplete) {
|
||||||
|
initComplete = consumer.initializeComplete().get();
|
||||||
|
}
|
||||||
|
|
||||||
|
consumer.subscribe();
|
||||||
|
cache.awaitInitialSetup();
|
||||||
|
|
||||||
|
log.debug("Setup complete publishing entry");
|
||||||
|
cache.publish();
|
||||||
|
awaitAndResetBarrier(taskCallBarrier);
|
||||||
|
consumer.leaseLost();
|
||||||
|
|
||||||
|
//
|
||||||
|
// This will block if a lock is held on ShardConsumer#this
|
||||||
|
//
|
||||||
|
consumer.executeLifecycle();
|
||||||
|
assertThat(consumer.isShutdown(), equalTo(false));
|
||||||
|
|
||||||
|
log.debug("Release processing task interlock");
|
||||||
|
awaitAndResetBarrier(processingTaskInterlock);
|
||||||
|
|
||||||
|
while(!consumer.isShutdown()) {
|
||||||
|
consumer.executeLifecycle();
|
||||||
|
Thread.yield();
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(cache.subscription, times(1)).request(anyLong());
|
||||||
|
verify(cache.subscription).cancel();
|
||||||
|
verify(processingState, times(1)).createTask(eq(shardConsumerArgument), eq(consumer), any());
|
||||||
|
verify(taskExecutionListener, times(1)).beforeTaskExecution(initialTaskInput);
|
||||||
|
verify(taskExecutionListener, times(1)).beforeTaskExecution(processTaskInput);
|
||||||
|
verify(taskExecutionListener, times(1)).beforeTaskExecution(shutdownTaskInput);
|
||||||
|
|
||||||
|
initialTaskInput = initialTaskInput.toBuilder().taskOutcome(TaskOutcome.SUCCESSFUL).build();
|
||||||
|
processTaskInput = processTaskInput.toBuilder().taskOutcome(TaskOutcome.SUCCESSFUL).build();
|
||||||
|
shutdownTaskInput = shutdownTaskInput.toBuilder().taskOutcome(TaskOutcome.SUCCESSFUL).build();
|
||||||
|
|
||||||
|
verify(taskExecutionListener, times(1)).afterTaskExecution(initialTaskInput);
|
||||||
|
verify(taskExecutionListener, times(1)).afterTaskExecution(processTaskInput);
|
||||||
|
verify(taskExecutionListener, times(1)).afterTaskExecution(shutdownTaskInput);
|
||||||
|
verifyNoMoreInteractions(taskExecutionListener);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDataArrivesAfterProcessing2() throws Exception {
|
public void testDataArrivesAfterProcessing2() throws Exception {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue