2018-08-17 13:03:25 +00:00
/ *
* Copyright ( c ) 2018 VMware , Inc .
*
* Permission is hereby granted , free of charge , to any person obtaining a copy of this software and
* associated documentation files ( the "Software" ) , to deal in the Software without restriction , including
* without limitation the rights to use , copy , modify , merge , publish , distribute , sublicense , and / or sell
* copies of the Software , and to permit persons to whom the Software is furnished to do
* so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in all copies or substantial
* portions of the Software .
*
* THE SOFTWARE IS PROVIDED "AS IS" , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR IMPLIED , INCLUDING BUT
* NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY , FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT .
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER LIABILITY ,
* WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM , OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE .
* /
2021-11-08 14:00:48 +00:00
// Package config
2018-08-17 13:03:25 +00:00
// The implementation is derived from https://github.com/awslabs/amazon-kinesis-client
/ *
* Copyright 2014 - 2015 Amazon . com , Inc . or its affiliates . All Rights Reserved .
*
* Licensed under the Amazon Software License ( the "License" ) .
* You may not use this file except in compliance with the License .
* A copy of the License is located at
*
* http : //aws.amazon.com/asl/
*
* or in the "license" file accompanying this file . This file is distributed
* on an "AS IS" BASIS , WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either
* express or implied . See the License for the specific language governing
* permissions and limitations under the License .
* /
2021-12-21 19:49:47 +00:00
2018-04-11 03:50:18 +00:00
package config
import (
"log"
"math"
"strings"
"time"
2018-04-17 16:25:41 +00:00
2021-11-08 15:27:29 +00:00
"github.com/aws/aws-sdk-go-v2/aws"
2021-04-27 15:51:26 +00:00
2021-12-21 19:49:47 +00:00
"github.com/vmware/vmware-go-kcl-v2/clientlibrary/metrics"
"github.com/vmware/vmware-go-kcl-v2/logger"
2018-04-11 03:50:18 +00:00
)
const (
// LATEST start after the most recent data record (fetch new data).
2018-04-17 16:25:41 +00:00
LATEST InitialPositionInStream = iota + 1
2018-04-11 03:50:18 +00:00
// TRIM_HORIZON start from the oldest available data record
2018-04-17 16:25:41 +00:00
TRIM_HORIZON
2018-04-13 04:02:30 +00:00
// AT_TIMESTAMP start from the record at or after the specified server-side Timestamp.
2018-04-17 16:25:41 +00:00
AT_TIMESTAMP
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultInitialPositionInStream The location in the shard from which the KinesisClientLibrary will start fetching records from
2018-04-11 03:50:18 +00:00
// when the application starts for the first time and there is no checkpoint for the shard.
2020-12-23 19:22:01 +00:00
DefaultInitialPositionInStream = LATEST
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultFailoverTimeMillis Fail over time in milliseconds. A worker which does not renew it's lease within this time interval
2018-04-11 03:50:18 +00:00
// will be regarded as having problems and it's shards will be assigned to other workers.
// For applications that have a large number of shards, this may be set to a higher number to reduce
// the number of DynamoDB IOPS required for tracking leases.
2020-12-23 19:22:01 +00:00
DefaultFailoverTimeMillis = 10000
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultLeaseRefreshPeriodMillis Period before the end of lease during which a lease is refreshed by the owner.
2020-12-23 19:22:01 +00:00
DefaultLeaseRefreshPeriodMillis = 5000
2019-11-13 23:15:33 +00:00
2021-11-08 14:00:48 +00:00
// DefaultMaxRecords Max records to fetch from Kinesis in a single GetRecords call.
2020-12-23 19:22:01 +00:00
DefaultMaxRecords = 10000
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultIdleTimeBetweenReadsMillis The default value for how long the {@link ShardConsumer}
// should sleep if no records are returned from the call to
DefaultIdleTimeBetweenReadsMillis = 1000
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultDontCallProcessRecordsForEmptyRecordList Don't call processRecords() on the record processor for empty record lists.
2020-12-23 19:22:01 +00:00
DefaultDontCallProcessRecordsForEmptyRecordList = false
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultParentShardPollIntervalMillis Interval in milliseconds between polling to check for parent shard completion.
2018-04-11 03:50:18 +00:00
// Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on
// completion of parent shards).
2020-12-23 19:22:01 +00:00
DefaultParentShardPollIntervalMillis = 10000
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultShardSyncIntervalMillis Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks.
2020-12-23 19:22:01 +00:00
DefaultShardSyncIntervalMillis = 60000
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultCleanupLeasesUponShardsCompletion Cleanup leases upon shards completion (don't wait until they expire in Kinesis).
2018-08-17 13:03:25 +00:00
// Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by
// default we try to delete the ones we don't need any longer.
2020-12-23 19:22:01 +00:00
DefaultCleanupLeasesUponShardsCompletion = true
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultTaskBackoffTimeMillis Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures).
2020-12-23 19:22:01 +00:00
DefaultTaskBackoffTimeMillis = 500
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultValidateSequenceNumberBeforeCheckpointing KCL will validate client provided sequence numbers with a call to Amazon Kinesis before
2018-08-17 13:03:25 +00:00
// checkpointing for calls to {@link RecordProcessorCheckpointer#checkpoint(String)} by default.
2020-12-23 19:22:01 +00:00
DefaultValidateSequenceNumberBeforeCheckpointing = true
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultMaxLeasesForWorker The max number of leases (shards) this worker should process.
2018-04-11 03:50:18 +00:00
// This can be useful to avoid overloading (and thrashing) a worker when a host has resource constraints
// or during deployment.
// NOTE: Setting this to a low value can cause data loss if workers are not able to pick up all shards in the
// stream due to the max limit.
2020-12-23 19:22:01 +00:00
DefaultMaxLeasesForWorker = math . MaxInt16
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultMaxLeasesToStealAtOneTime Max leases to steal from another worker at one time (for load balancing).
2018-04-11 03:50:18 +00:00
// Setting this to a higher number can allow for faster load convergence (e.g. during deployments, cold starts),
// but can cause higher churn in the system.
2020-12-23 19:22:01 +00:00
DefaultMaxLeasesToStealAtOneTime = 1
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultInitialLeaseTableReadCapacity The Amazon DynamoDB table used for tracking leases will be provisioned with this read capacity.
2020-12-23 19:22:01 +00:00
DefaultInitialLeaseTableReadCapacity = 10
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultInitialLeaseTableWriteCapacity The Amazon DynamoDB table used for tracking leases will be provisioned with this write capacity.
2020-12-23 19:22:01 +00:00
DefaultInitialLeaseTableWriteCapacity = 10
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultSkipShardSyncAtStartupIfLeasesExist The Worker will skip shard sync during initialization if there are one or more leases in the lease table. This
2018-04-11 03:50:18 +00:00
// assumes that the shards and leases are in-sync. This enables customers to choose faster startup times (e.g.
// during incremental deployments of an application).
2020-12-23 19:22:01 +00:00
DefaultSkipShardSyncAtStartupIfLeasesExist = false
2018-04-11 03:50:18 +00:00
2021-11-08 14:00:48 +00:00
// DefaultShutdownGraceMillis The amount of milliseconds to wait before graceful shutdown forcefully terminates.
2020-12-23 19:22:01 +00:00
DefaultShutdownGraceMillis = 5000
2021-06-01 23:18:26 +00:00
2021-11-08 14:00:48 +00:00
// DefaultEnableLeaseStealing Lease stealing defaults to false for backwards compatibility.
2021-06-01 23:18:26 +00:00
DefaultEnableLeaseStealing = false
2021-11-08 14:00:48 +00:00
// DefaultLeaseStealingIntervalMillis Interval between rebalance tasks defaults to 5 seconds.
2021-06-01 23:18:26 +00:00
DefaultLeaseStealingIntervalMillis = 5000
2021-11-08 14:00:48 +00:00
// DefaultLeaseStealingClaimTimeoutMillis Number of milliseconds to wait before another worker can aquire a claimed shard
2021-06-01 23:18:26 +00:00
DefaultLeaseStealingClaimTimeoutMillis = 120000
2021-11-08 14:00:48 +00:00
// DefaultLeaseSyncingIntervalMillis Number of milliseconds to wait before syncing with lease table (dynamodDB)
2021-06-01 23:18:26 +00:00
DefaultLeaseSyncingIntervalMillis = 60000
2023-01-24 19:59:32 +00:00
// DefaultMaxRetryCount The default maximum number of retries in case of error
DefaultMaxRetryCount = 5
2018-04-11 03:50:18 +00:00
)
type (
2018-04-13 04:02:30 +00:00
// InitialPositionInStream Used to specify the Position in the stream where a new application should start from
2018-04-11 03:50:18 +00:00
// This is used during initial application bootstrap (when a checkpoint doesn't exist for a shard or its parents)
InitialPositionInStream int
2021-11-08 14:00:48 +00:00
// InitialPositionInStreamExtended Class that houses the entities needed to specify the Position in the stream from where a new application should
2018-04-11 03:50:18 +00:00
// start.
InitialPositionInStreamExtended struct {
2018-04-13 04:02:30 +00:00
Position InitialPositionInStream
2018-04-11 03:50:18 +00:00
// The time stamp of the data record from which to start reading. Used with
// shard iterator type AT_TIMESTAMP. A time stamp is the Unix epoch date with
// precision in milliseconds. For example, 2016-04-04T19:58:46.480-00:00 or
// 1459799926.480. If a record with this exact time stamp does not exist, the
// iterator returned is for the next (later) record. If the time stamp is older
// than the current trim horizon, the iterator returned is for the oldest untrimmed
// data record (TRIM_HORIZON).
2018-04-13 04:02:30 +00:00
Timestamp * time . Time ` type:"Timestamp" timestampFormat:"unix" `
2018-04-11 03:50:18 +00:00
}
2021-11-08 14:00:48 +00:00
// KinesisClientLibConfiguration Configuration for the Kinesis Client Library.
2018-04-17 16:25:41 +00:00
// Note: There is no need to configure credential provider. Credential can be get from InstanceProfile.
2018-04-11 03:50:18 +00:00
KinesisClientLibConfiguration struct {
2018-04-13 04:02:30 +00:00
// ApplicationName is name of application. Kinesis allows multiple applications to consume the same stream.
ApplicationName string
2018-04-11 03:50:18 +00:00
2019-02-09 16:23:54 +00:00
// DynamoDBEndpoint is an optional endpoint URL that overrides the default generated endpoint for a DynamoDB client.
// If this is empty, the default generated endpoint will be used.
DynamoDBEndpoint string
// KinesisEndpoint is an optional endpoint URL that overrides the default generated endpoint for a Kinesis client.
// If this is empty, the default generated endpoint will be used.
KinesisEndpoint string
2019-03-16 13:11:09 +00:00
// KinesisCredentials is used to access Kinesis
2022-01-07 02:13:32 +00:00
KinesisCredentials aws . CredentialsProvider
2019-03-16 13:11:09 +00:00
// DynamoDBCredentials is used to access DynamoDB
2022-01-07 02:13:32 +00:00
DynamoDBCredentials aws . CredentialsProvider
2019-03-16 13:11:09 +00:00
2018-04-13 04:02:30 +00:00
// TableName is name of the dynamo db table for managing kinesis stream default to ApplicationName
TableName string
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// StreamName is the name of Kinesis stream
StreamName string
2018-04-11 03:50:18 +00:00
2021-04-27 15:51:26 +00:00
// EnableEnhancedFanOutConsumer enables enhanced fan-out consumer
// See: https://docs.aws.amazon.com/streams/latest/dev/enhanced-consumers.html
// Either consumer name or consumer ARN must be specified when Enhanced Fan-Out is enabled.
EnableEnhancedFanOutConsumer bool
2021-04-29 02:19:12 +00:00
// EnhancedFanOutConsumerName is the name of the enhanced fan-out consumer to create. If this isn't set the ApplicationName will be used.
2021-04-27 15:51:26 +00:00
EnhancedFanOutConsumerName string
// EnhancedFanOutConsumerARN is the ARN of an already created enhanced fan-out consumer, if this is set no automatic consumer creation will be attempted
EnhancedFanOutConsumerARN string
2018-04-13 04:02:30 +00:00
// WorkerID used to distinguish different workers/processes of a Kinesis application
WorkerID string
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// InitialPositionInStream specifies the Position in the stream where a new application should start from
InitialPositionInStream InitialPositionInStream
2018-04-11 03:50:18 +00:00
2021-04-27 15:51:26 +00:00
// InitialPositionInStreamExtended provides actual AT_TIMESTAMP value
2018-04-13 04:02:30 +00:00
InitialPositionInStreamExtended InitialPositionInStreamExtended
2018-04-11 03:50:18 +00:00
2019-11-06 13:53:21 +00:00
// credentials to access Kinesis/Dynamo: https://docs.aws.amazon.com/sdk-for-go/api/aws/credentials/
2018-04-11 03:50:18 +00:00
// Note: No need to configure here. Use NewEnvCredentials for testing and EC2RoleProvider for production
2018-04-13 04:02:30 +00:00
// FailoverTimeMillis Lease duration (leases not renewed within this period will be claimed by others)
FailoverTimeMillis int
2018-04-11 03:50:18 +00:00
2019-11-13 23:15:33 +00:00
// LeaseRefreshPeriodMillis is the period before the end of lease during which a lease is refreshed by the owner.
LeaseRefreshPeriodMillis int
// MaxRecords Max records to read per Kinesis getRecords() call
2018-04-13 04:02:30 +00:00
MaxRecords int
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// IdleTimeBetweenReadsInMillis Idle time between calls to fetch data from Kinesis
IdleTimeBetweenReadsInMillis int
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// CallProcessRecordsEvenForEmptyRecordList Call the IRecordProcessor::processRecords() API even if
2018-04-11 03:50:18 +00:00
// GetRecords returned an empty record list.
2018-04-13 04:02:30 +00:00
CallProcessRecordsEvenForEmptyRecordList bool
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// ParentShardPollIntervalMillis Wait for this long between polls to check if parent shards are done
ParentShardPollIntervalMillis int
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// ShardSyncIntervalMillis Time between tasks to sync leases and Kinesis shards
ShardSyncIntervalMillis int
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// CleanupTerminatedShardsBeforeExpiry Clean up shards we've finished processing (don't wait for expiration)
CleanupTerminatedShardsBeforeExpiry bool
2018-04-11 03:50:18 +00:00
// kinesisClientConfig Client Configuration used by Kinesis client
// dynamoDBClientConfig Client Configuration used by DynamoDB client
// Note: we will use default client provided by AWS SDK
2018-04-13 04:02:30 +00:00
// TaskBackoffTimeMillis Backoff period when tasks encounter an exception
TaskBackoffTimeMillis int
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// ValidateSequenceNumberBeforeCheckpointing whether KCL should validate client provided sequence numbers
ValidateSequenceNumberBeforeCheckpointing bool
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// RegionName The region name for the service
RegionName string
2018-04-11 03:50:18 +00:00
2018-04-13 04:02:30 +00:00
// ShutdownGraceMillis The number of milliseconds before graceful shutdown terminates forcefully
ShutdownGraceMillis int
2018-04-11 03:50:18 +00:00
// Operation parameters
// Max leases this Worker can handle at a time
2018-04-13 04:02:30 +00:00
MaxLeasesForWorker int
2018-04-11 03:50:18 +00:00
// Max leases to steal at one time (for load balancing)
2018-04-13 04:02:30 +00:00
MaxLeasesToStealAtOneTime int
2018-04-11 03:50:18 +00:00
// Read capacity to provision when creating the lease table (dynamoDB).
2018-04-13 04:02:30 +00:00
InitialLeaseTableReadCapacity int
2018-04-11 03:50:18 +00:00
// Write capacity to provision when creating the lease table.
2018-04-13 04:02:30 +00:00
InitialLeaseTableWriteCapacity int
2018-04-11 03:50:18 +00:00
// Worker should skip syncing shards and leases at startup if leases are present
// This is useful for optimizing deployments to large fleets working on a stable stream.
2018-04-13 04:02:30 +00:00
SkipShardSyncAtWorkerInitializationIfLeasesExist bool
2019-10-28 12:08:18 +00:00
// Logger used to log message.
Logger logger . Logger
2019-11-06 13:53:21 +00:00
// MonitoringService publishes per worker-scoped metrics.
MonitoringService metrics . MonitoringService
2021-06-01 23:18:26 +00:00
// EnableLeaseStealing turns on lease stealing
EnableLeaseStealing bool
// LeaseStealingIntervalMillis The number of milliseconds between rebalance tasks
LeaseStealingIntervalMillis int
// LeaseStealingClaimTimeoutMillis The number of milliseconds to wait before another worker can aquire a claimed shard
LeaseStealingClaimTimeoutMillis int
// LeaseSyncingTimeInterval The number of milliseconds to wait before syncing with lease table (dynamoDB)
LeaseSyncingTimeIntervalMillis int
2023-01-24 19:59:32 +00:00
// MaxRetryCount The maximum number of retries in case of error
MaxRetryCount int
2018-04-11 03:50:18 +00:00
}
)
2018-04-17 16:25:41 +00:00
var positionMap = map [ InitialPositionInStream ] * string {
LATEST : aws . String ( "LATEST" ) ,
TRIM_HORIZON : aws . String ( "TRIM_HORIZON" ) ,
AT_TIMESTAMP : aws . String ( "AT_TIMESTAMP" ) ,
}
func InitalPositionInStreamToShardIteratorType ( pos InitialPositionInStream ) * string {
return positionMap [ pos ]
}
2018-04-11 03:50:18 +00:00
func empty ( s string ) bool {
return len ( strings . TrimSpace ( s ) ) == 0
}
2021-04-27 15:51:26 +00:00
// checkIsValueNotEmpty makes sure the value is not empty.
2018-04-11 03:50:18 +00:00
func checkIsValueNotEmpty ( key string , value string ) {
if empty ( value ) {
// There is no point to continue for incorrect configuration. Fail fast!
2021-04-27 15:51:26 +00:00
log . Panicf ( "Non-empty value expected for %v, actual: %v" , key , value )
2018-04-11 03:50:18 +00:00
}
}
2021-04-27 15:51:26 +00:00
// checkIsValuePositive makes sure the value is possitive.
2018-04-11 03:50:18 +00:00
func checkIsValuePositive ( key string , value int ) {
if value <= 0 {
// There is no point to continue for incorrect configuration. Fail fast!
2021-04-27 15:51:26 +00:00
log . Panicf ( "Positive value expected for %v, actual: %v" , key , value )
2018-04-11 03:50:18 +00:00
}
}