fix: add hard cap maxRetries for getRecord errors
Signed-off-by: Shiva Pentakota <spentakota@vmware.com>
This commit is contained in:
parent
66006caf89
commit
b5515931d1
3 changed files with 30 additions and 0 deletions
|
|
@ -136,6 +136,9 @@ const (
|
||||||
|
|
||||||
// DefaultLeaseSyncingIntervalMillis Number of milliseconds to wait before syncing with lease table (dynamodDB)
|
// DefaultLeaseSyncingIntervalMillis Number of milliseconds to wait before syncing with lease table (dynamodDB)
|
||||||
DefaultLeaseSyncingIntervalMillis = 60000
|
DefaultLeaseSyncingIntervalMillis = 60000
|
||||||
|
|
||||||
|
// DefaultMaxRetryCount The default maximum number of retries in case of error
|
||||||
|
DefaultMaxRetryCount = 5
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type (
|
||||||
|
|
@ -283,6 +286,9 @@ type (
|
||||||
|
|
||||||
// LeaseSyncingTimeInterval The number of milliseconds to wait before syncing with lease table (dynamoDB)
|
// LeaseSyncingTimeInterval The number of milliseconds to wait before syncing with lease table (dynamoDB)
|
||||||
LeaseSyncingTimeIntervalMillis int
|
LeaseSyncingTimeIntervalMillis int
|
||||||
|
|
||||||
|
// MaxRetryCount The maximum number of retries in case of error
|
||||||
|
MaxRetryCount int
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,7 @@ func NewKinesisClientLibConfigWithCredentials(applicationName, streamName, regio
|
||||||
LeaseStealingIntervalMillis: DefaultLeaseStealingIntervalMillis,
|
LeaseStealingIntervalMillis: DefaultLeaseStealingIntervalMillis,
|
||||||
LeaseStealingClaimTimeoutMillis: DefaultLeaseStealingClaimTimeoutMillis,
|
LeaseStealingClaimTimeoutMillis: DefaultLeaseStealingClaimTimeoutMillis,
|
||||||
LeaseSyncingTimeIntervalMillis: DefaultLeaseSyncingIntervalMillis,
|
LeaseSyncingTimeIntervalMillis: DefaultLeaseSyncingIntervalMillis,
|
||||||
|
MaxRetryCount: DefaultMaxRetryCount,
|
||||||
Logger: logger.GetDefaultLogger(),
|
Logger: logger.GetDefaultLogger(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -211,6 +212,13 @@ func (c *KinesisClientLibConfiguration) WithLogger(logger logger.Logger) *Kinesi
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithMaxRetryCount sets the max retry count in case of error.
|
||||||
|
func (c *KinesisClientLibConfiguration) WithMaxRetryCount(maxRetryCount int) *KinesisClientLibConfiguration {
|
||||||
|
checkIsValuePositive("maxRetryCount", maxRetryCount)
|
||||||
|
c.MaxRetryCount = maxRetryCount
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
// WithMonitoringService sets the monitoring service to use to publish metrics.
|
// WithMonitoringService sets the monitoring service to use to publish metrics.
|
||||||
func (c *KinesisClientLibConfiguration) WithMonitoringService(mService metrics.MonitoringService) *KinesisClientLibConfiguration {
|
func (c *KinesisClientLibConfiguration) WithMonitoringService(mService metrics.MonitoringService) *KinesisClientLibConfiguration {
|
||||||
// Nil case is handled downward (at worker creation) so no need to do it here.
|
// Nil case is handled downward (at worker creation) so no need to do it here.
|
||||||
|
|
|
||||||
|
|
@ -157,6 +157,14 @@ func (sc *PollingShardConsumer) getRecords() error {
|
||||||
var throughputExceededErr *types.ProvisionedThroughputExceededException
|
var throughputExceededErr *types.ProvisionedThroughputExceededException
|
||||||
var kmsThrottlingErr *types.KMSThrottlingException
|
var kmsThrottlingErr *types.KMSThrottlingException
|
||||||
if errors.As(err, &throughputExceededErr) || err == localTPSExceededError {
|
if errors.As(err, &throughputExceededErr) || err == localTPSExceededError {
|
||||||
|
retriedErrors++
|
||||||
|
if retriedErrors > sc.kclConfig.MaxRetryCount {
|
||||||
|
log.Errorf("message", "reached max retry count getting records from shard",
|
||||||
|
"shardId", sc.shard.ID,
|
||||||
|
"retryCount", retriedErrors,
|
||||||
|
"error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
// If there is insufficient provisioned throughput on the stream,
|
// If there is insufficient provisioned throughput on the stream,
|
||||||
// subsequent calls made within the next 1 second throw ProvisionedThroughputExceededException.
|
// subsequent calls made within the next 1 second throw ProvisionedThroughputExceededException.
|
||||||
// ref: https://docs.aws.amazon.com/streams/latest/dev/service-sizes-and-limits.html
|
// ref: https://docs.aws.amazon.com/streams/latest/dev/service-sizes-and-limits.html
|
||||||
|
|
@ -166,6 +174,14 @@ func (sc *PollingShardConsumer) getRecords() error {
|
||||||
if errors.As(err, &kmsThrottlingErr) {
|
if errors.As(err, &kmsThrottlingErr) {
|
||||||
log.Errorf("Error getting records from shard %v: %+v", sc.shard.ID, err)
|
log.Errorf("Error getting records from shard %v: %+v", sc.shard.ID, err)
|
||||||
retriedErrors++
|
retriedErrors++
|
||||||
|
// Greater than MaxRetryCount so we get the last retry
|
||||||
|
if retriedErrors > sc.kclConfig.MaxRetryCount {
|
||||||
|
log.Errorf("message", "reached max retry count getting records from shard",
|
||||||
|
"shardId", sc.shard.ID,
|
||||||
|
"retryCount", retriedErrors,
|
||||||
|
"error", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
// exponential backoff
|
// exponential backoff
|
||||||
// https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Programming.Errors.html#Programming.Errors.RetryAndBackoff
|
// https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Programming.Errors.html#Programming.Errors.RetryAndBackoff
|
||||||
time.Sleep(time.Duration(math.Exp2(float64(retriedErrors))*100) * time.Millisecond)
|
time.Sleep(time.Duration(math.Exp2(float64(retriedErrors))*100) * time.Millisecond)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue