Graceful handling of ReadTimeoutExceptions (#403)
* Handling ReadTimeouts gracefully * Emitting logging messages at DEBUG level for retryable exceptions * Introducing SubscribeToShardRetryableException * Addressing comments * Making private ThrowableCategory class static * Creating static instances for acquiretimeout and readtimeout categories * Cleaned up imports * Renamed and moved SubscribeToShardRetryableException to RetryableRetrievalException * Renamed UNKNOWN exception type to Other
This commit is contained in:
parent
54c171dc2a
commit
a893da6942
4 changed files with 112 additions and 15 deletions
|
|
@ -43,6 +43,7 @@ import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||||
import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator;
|
import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator;
|
||||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||||
|
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Responsible for consuming data records of a (specified) shard.
|
* Responsible for consuming data records of a (specified) shard.
|
||||||
|
|
@ -241,7 +242,12 @@ public class ShardConsumer {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (failure != null) {
|
if (failure != null) {
|
||||||
log.warn("{}: Failure occurred in retrieval. Restarting data requests", shardInfo.shardId(), failure);
|
String logMessage = String.format("%s: Failure occurred in retrieval. Restarting data requests", shardInfo.shardId());
|
||||||
|
if (failure instanceof RetryableRetrievalException) {
|
||||||
|
log.debug(logMessage, failure.getCause());
|
||||||
|
} else {
|
||||||
|
log.warn(logMessage, failure);
|
||||||
|
}
|
||||||
startSubscriptions();
|
startSubscriptions();
|
||||||
return failure;
|
return failure;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Licensed under the Amazon Software License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
* A copy of the License is located at
|
||||||
|
*
|
||||||
|
* http://aws.amazon.com/asl/
|
||||||
|
*
|
||||||
|
* or in the "license" file accompanying this file. This file is distributed
|
||||||
|
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
||||||
|
* express or implied. See the License for the specific language governing
|
||||||
|
* permissions and limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package software.amazon.kinesis.retrieval;
|
||||||
|
|
||||||
|
import software.amazon.kinesis.exceptions.KinesisClientLibRetryableException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RetryableException for SubscribeToShard APIs.
|
||||||
|
*/
|
||||||
|
public class RetryableRetrievalException extends KinesisClientLibRetryableException {
|
||||||
|
public RetryableRetrievalException(final String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public RetryableRetrievalException(final String message, final Exception e) {
|
||||||
|
super(message, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,6 +25,7 @@ import java.util.stream.Collectors;
|
||||||
import org.reactivestreams.Subscriber;
|
import org.reactivestreams.Subscriber;
|
||||||
import org.reactivestreams.Subscription;
|
import org.reactivestreams.Subscription;
|
||||||
|
|
||||||
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import software.amazon.awssdk.core.async.SdkPublisher;
|
import software.amazon.awssdk.core.async.SdkPublisher;
|
||||||
|
|
@ -36,6 +37,7 @@ import software.amazon.awssdk.services.kinesis.model.SubscribeToShardEventStream
|
||||||
import software.amazon.awssdk.services.kinesis.model.SubscribeToShardRequest;
|
import software.amazon.awssdk.services.kinesis.model.SubscribeToShardRequest;
|
||||||
import software.amazon.awssdk.services.kinesis.model.SubscribeToShardResponse;
|
import software.amazon.awssdk.services.kinesis.model.SubscribeToShardResponse;
|
||||||
import software.amazon.awssdk.services.kinesis.model.SubscribeToShardResponseHandler;
|
import software.amazon.awssdk.services.kinesis.model.SubscribeToShardResponseHandler;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientExperimental;
|
||||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
||||||
import software.amazon.kinesis.common.KinesisRequestsBuilder;
|
import software.amazon.kinesis.common.KinesisRequestsBuilder;
|
||||||
|
|
@ -43,12 +45,15 @@ import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||||
import software.amazon.kinesis.retrieval.IteratorBuilder;
|
import software.amazon.kinesis.retrieval.IteratorBuilder;
|
||||||
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
||||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||||
import software.amazon.kinesis.annotations.KinesisClientExperimental;
|
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
|
||||||
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@KinesisClientInternalApi
|
@KinesisClientInternalApi
|
||||||
public class FanOutRecordsPublisher implements RecordsPublisher {
|
public class FanOutRecordsPublisher implements RecordsPublisher {
|
||||||
|
private static final ThrowableCategory ACQUIRE_TIMEOUT_CATEGORY = new ThrowableCategory(
|
||||||
|
ThrowableType.ACUIRE_TIMEOUT);
|
||||||
|
private static final ThrowableCategory READ_TIMEOUT_CATEGORY = new ThrowableCategory(ThrowableType.READ_TIMEOUT);
|
||||||
|
|
||||||
private final KinesisAsyncClient kinesis;
|
private final KinesisAsyncClient kinesis;
|
||||||
private final String shardId;
|
private final String shardId;
|
||||||
|
|
@ -141,19 +146,29 @@ public class FanOutRecordsPublisher implements RecordsPublisher {
|
||||||
shardId, flow.connectionStartedAt, flow.subscribeToShardId);
|
shardId, flow.connectionStartedAt, flow.subscribeToShardId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
String category = throwableCategory(t);
|
Throwable propagationThrowable = t;
|
||||||
|
ThrowableCategory category = throwableCategory(propagationThrowable);
|
||||||
|
|
||||||
if (isActiveFlow(triggeringFlow)) {
|
if (isActiveFlow(triggeringFlow)) {
|
||||||
if (flow != null) {
|
if (flow != null) {
|
||||||
log.warn("{}: [SubscriptionLifetime] - (FanOutRecordsPublisher#errorOccurred) @ {} id: {} -- {}",
|
String logMessage = String.format(
|
||||||
shardId, flow.connectionStartedAt, flow.subscribeToShardId, category, t);
|
"%s: [SubscriptionLifetime] - (FanOutRecordsPublisher#errorOccurred) @ %s id: %s -- %s",
|
||||||
|
shardId, flow.connectionStartedAt, flow.subscribeToShardId, category.throwableTypeString);
|
||||||
|
if (category.throwableType.equals(ThrowableType.READ_TIMEOUT)) {
|
||||||
|
log.debug(logMessage, propagationThrowable);
|
||||||
|
propagationThrowable = new RetryableRetrievalException(category.throwableTypeString,
|
||||||
|
(Exception) propagationThrowable.getCause());
|
||||||
|
} else {
|
||||||
|
log.warn(logMessage, propagationThrowable);
|
||||||
|
}
|
||||||
|
|
||||||
flow.cancel();
|
flow.cancel();
|
||||||
}
|
}
|
||||||
log.debug("{}: availableQueueSpace zeroing from {}", shardId, availableQueueSpace);
|
log.debug("{}: availableQueueSpace zeroing from {}", shardId, availableQueueSpace);
|
||||||
availableQueueSpace = 0;
|
availableQueueSpace = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
handleFlowError(t);
|
handleFlowError(propagationThrowable);
|
||||||
} catch (Throwable innerThrowable) {
|
} catch (Throwable innerThrowable) {
|
||||||
log.warn("{}: Exception while calling subscriber.onError", shardId, innerThrowable);
|
log.warn("{}: Exception while calling subscriber.onError", shardId, innerThrowable);
|
||||||
}
|
}
|
||||||
|
|
@ -163,7 +178,8 @@ public class FanOutRecordsPublisher implements RecordsPublisher {
|
||||||
if (triggeringFlow != null) {
|
if (triggeringFlow != null) {
|
||||||
log.debug(
|
log.debug(
|
||||||
"{}: [SubscriptionLifetime] - (FanOutRecordsPublisher#errorOccurred) @ {} id: {} -- {} -> triggeringFlow wasn't the active flow. Didn't dispatch error",
|
"{}: [SubscriptionLifetime] - (FanOutRecordsPublisher#errorOccurred) @ {} id: {} -- {} -> triggeringFlow wasn't the active flow. Didn't dispatch error",
|
||||||
shardId, triggeringFlow.connectionStartedAt, triggeringFlow.subscribeToShardId, category);
|
shardId, triggeringFlow.connectionStartedAt, triggeringFlow.subscribeToShardId,
|
||||||
|
category.throwableTypeString);
|
||||||
triggeringFlow.cancel();
|
triggeringFlow.cancel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -184,29 +200,55 @@ public class FanOutRecordsPublisher implements RecordsPublisher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String throwableCategory(Throwable t) {
|
private enum ThrowableType {
|
||||||
|
ACUIRE_TIMEOUT("AcquireTimeout"), READ_TIMEOUT("ReadTimeout"), OTHER("Other");
|
||||||
|
|
||||||
|
String value;
|
||||||
|
|
||||||
|
ThrowableType(final String value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ThrowableCategory {
|
||||||
|
@NonNull
|
||||||
|
final ThrowableType throwableType;
|
||||||
|
@NonNull
|
||||||
|
final String throwableTypeString;
|
||||||
|
|
||||||
|
ThrowableCategory(final ThrowableType throwableType) {
|
||||||
|
this(throwableType, throwableType.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
ThrowableCategory(final ThrowableType throwableType, final String throwableTypeString) {
|
||||||
|
this.throwableType = throwableType;
|
||||||
|
this.throwableTypeString = throwableTypeString;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ThrowableCategory throwableCategory(Throwable t) {
|
||||||
Throwable current = t;
|
Throwable current = t;
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
do {
|
do {
|
||||||
if (current.getMessage() != null && current.getMessage().startsWith("Acquire operation")) {
|
if (current.getMessage() != null && current.getMessage().startsWith("Acquire operation")) {
|
||||||
return "AcquireTimeout";
|
return ACQUIRE_TIMEOUT_CATEGORY;
|
||||||
}
|
}
|
||||||
if (current.getClass().getName().equals("io.netty.handler.timeout.ReadTimeoutException")) {
|
if (current.getClass().getName().equals("io.netty.handler.timeout.ReadTimeoutException")) {
|
||||||
return "ReadTimeout";
|
return READ_TIMEOUT_CATEGORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current.getCause() == null) {
|
if (current.getCause() == null) {
|
||||||
//
|
//
|
||||||
// At the bottom
|
// At the bottom
|
||||||
//
|
//
|
||||||
builder.append(current.getClass().getName() + ": " + current.getMessage());
|
builder.append(current.getClass().getName()).append(": ").append(current.getMessage());
|
||||||
} else {
|
} else {
|
||||||
builder.append(current.getClass().getSimpleName());
|
builder.append(current.getClass().getSimpleName());
|
||||||
builder.append("/");
|
builder.append("/");
|
||||||
}
|
}
|
||||||
current = current.getCause();
|
current = current.getCause();
|
||||||
} while (current != null);
|
} while (current != null);
|
||||||
return builder.toString();
|
return new ThrowableCategory(ThrowableType.OTHER, builder.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void recordsReceived(RecordFlow triggeringFlow, SubscribeToShardEvent recordBatchEvent) {
|
private void recordsReceived(RecordFlow triggeringFlow, SubscribeToShardEvent recordBatchEvent) {
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
package software.amazon.kinesis.retrieval.fanout;
|
package software.amazon.kinesis.retrieval.fanout;
|
||||||
|
|
||||||
import static org.hamcrest.CoreMatchers.containsString;
|
|
||||||
import static org.hamcrest.CoreMatchers.equalTo;
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
|
|
@ -21,6 +19,7 @@ import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import io.netty.handler.timeout.ReadTimeoutException;
|
||||||
import org.hamcrest.Description;
|
import org.hamcrest.Description;
|
||||||
import org.hamcrest.Matcher;
|
import org.hamcrest.Matcher;
|
||||||
import org.hamcrest.TypeSafeDiagnosingMatcher;
|
import org.hamcrest.TypeSafeDiagnosingMatcher;
|
||||||
|
|
@ -47,6 +46,7 @@ import software.amazon.kinesis.common.InitialPositionInStream;
|
||||||
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
||||||
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||||
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
||||||
|
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
|
||||||
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
||||||
|
|
||||||
@RunWith(MockitoJUnitRunner.class)
|
@RunWith(MockitoJUnitRunner.class)
|
||||||
|
|
@ -55,7 +55,6 @@ public class FanOutRecordsPublisherTest {
|
||||||
|
|
||||||
private static final String SHARD_ID = "shardId-000000000001";
|
private static final String SHARD_ID = "shardId-000000000001";
|
||||||
private static final String CONSUMER_ARN = "arn:consumer";
|
private static final String CONSUMER_ARN = "arn:consumer";
|
||||||
private static final boolean VALIDATE_RECORD_SHARD_MATCHING = true;
|
|
||||||
|
|
||||||
@Mock
|
@Mock
|
||||||
private KinesisAsyncClient kinesisClient;
|
private KinesisAsyncClient kinesisClient;
|
||||||
|
|
@ -226,6 +225,25 @@ public class FanOutRecordsPublisherTest {
|
||||||
assertThat(input.records().isEmpty(), equalTo(true));
|
assertThat(input.records().isEmpty(), equalTo(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReadTimeoutExceptionForShard() {
|
||||||
|
FanOutRecordsPublisher source = new FanOutRecordsPublisher(kinesisClient, SHARD_ID, CONSUMER_ARN);
|
||||||
|
|
||||||
|
ArgumentCaptor<FanOutRecordsPublisher.RecordFlow> flowCaptor = ArgumentCaptor
|
||||||
|
.forClass(FanOutRecordsPublisher.RecordFlow.class);
|
||||||
|
|
||||||
|
source.subscribe(subscriber);
|
||||||
|
|
||||||
|
verify(kinesisClient).subscribeToShard(any(SubscribeToShardRequest.class), flowCaptor.capture());
|
||||||
|
FanOutRecordsPublisher.RecordFlow recordFlow = flowCaptor.getValue();
|
||||||
|
recordFlow.exceptionOccurred(new RuntimeException(ReadTimeoutException.INSTANCE));
|
||||||
|
|
||||||
|
verify(subscriber).onSubscribe(any());
|
||||||
|
verify(subscriber).onError(any(RetryableRetrievalException.class));
|
||||||
|
verify(subscriber, never()).onNext(any());
|
||||||
|
verify(subscriber, never()).onComplete();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testContinuesAfterSequence() {
|
public void testContinuesAfterSequence() {
|
||||||
FanOutRecordsPublisher source = new FanOutRecordsPublisher(kinesisClient, SHARD_ID, CONSUMER_ARN);
|
FanOutRecordsPublisher source = new FanOutRecordsPublisher(kinesisClient, SHARD_ID, CONSUMER_ARN);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue