This is a squash of commits a144dfaac117415c400e8786b98060f5660d4276 through 4185f6e72520744e8a18cd04c550bc57a1bfd298.
167 lines
8.4 KiB
Properties
167 lines
8.4 KiB
Properties
# The script that abides by the multi-language protocol. This script will
|
|
# be executed by the MultiLangDaemon, which will communicate with this script
|
|
# over STDIN and STDOUT according to the multi-language protocol.
|
|
executableName = sample_kclpy_app.py
|
|
|
|
# The Stream arn: arn:aws:kinesis:<region>:<account id>:stream/<stream name>
|
|
# Important: streamArn takes precedence over streamName if both are set
|
|
streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample
|
|
|
|
# The name of an Amazon Kinesis stream to process.
|
|
# Important: streamArn takes precedence over streamName if both are set
|
|
streamName = kclpysample
|
|
|
|
# Used by the KCL as the name of this application. Will be used as the name
|
|
# of an Amazon DynamoDB table which will store the lease and checkpoint
|
|
# information for workers with this application name
|
|
applicationName = MultiLangTest
|
|
|
|
# Users can change the credentials provider the KCL will use to retrieve credentials.
|
|
# The DefaultAWSCredentialsProviderChain checks several other providers, which is
|
|
# described here:
|
|
# http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
|
|
AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
|
|
|
|
# Appended to the user agent of the KCL. Does not impact the functionality of the
|
|
# KCL in any other way.
|
|
processingLanguage = python/3.8
|
|
|
|
# Valid options at TRIM_HORIZON or LATEST.
|
|
# See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
|
|
initialPositionInStream = TRIM_HORIZON
|
|
|
|
# To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended',
|
|
# and uncomment below line with right timestamp value.
|
|
# See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
|
|
#initialPositionInStreamExtended = 1636609142
|
|
|
|
# The following properties are also available for configuring the KCL Worker that is created
|
|
# by the MultiLangDaemon.
|
|
|
|
# The KCL defaults to us-east-1
|
|
regionName = us-east-1
|
|
|
|
# Fail over time in milliseconds. A worker which does not renew it's lease within this time interval
|
|
# will be regarded as having problems and it's shards will be assigned to other workers.
|
|
# For applications that have a large number of shards, this msy be set to a higher number to reduce
|
|
# the number of DynamoDB IOPS required for tracking leases
|
|
failoverTimeMillis = 10000
|
|
|
|
# A worker id that uniquely identifies this worker among all workers using the same applicationName
|
|
# If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself.
|
|
workerId = "workerId"
|
|
|
|
# Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks.
|
|
shardSyncIntervalMillis = 60000
|
|
|
|
# Max records to fetch from Kinesis in a single GetRecords call.
|
|
maxRecords = 10000
|
|
|
|
# Idle time between record reads in milliseconds.
|
|
idleTimeBetweenReadsInMillis = 1000
|
|
|
|
# Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while)
|
|
callProcessRecordsEvenForEmptyRecordList = false
|
|
|
|
# Interval in milliseconds between polling to check for parent shard completion.
|
|
# Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on
|
|
# completion of parent shards).
|
|
parentShardPollIntervalMillis = 10000
|
|
|
|
# Cleanup leases upon shards completion (don't wait until they expire in Kinesis).
|
|
# Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try
|
|
# to delete the ones we don't need any longer.
|
|
cleanupLeasesUponShardCompletion = true
|
|
|
|
# Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures).
|
|
taskBackoffTimeMillis = 500
|
|
|
|
# Buffer metrics for at most this long before publishing to CloudWatch.
|
|
metricsBufferTimeMillis = 10000
|
|
|
|
# Buffer at most this many metrics before publishing to CloudWatch.
|
|
metricsMaxQueueSize = 10000
|
|
|
|
# KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls
|
|
# to RecordProcessorCheckpointer#checkpoint(String) by default.
|
|
validateSequenceNumberBeforeCheckpointing = true
|
|
|
|
# The maximum number of active threads for the MultiLangDaemon to permit.
|
|
# If a value is provided then a FixedThreadPool is used with the maximum
|
|
# active threads set to the provided value. If a non-positive integer or no
|
|
# value is provided a CachedThreadPool is used.
|
|
maxActiveThreads = -1
|
|
|
|
################### KclV3 configurations ###################
|
|
# Coordinator config
|
|
clientVersionConfig = CLIENT_VERSION_CONFIG_3x
|
|
|
|
## Let all other config be defaults
|
|
## TODO: include table deletion protection and pitr config once its added
|
|
## Configurations to control how the CoordinatorState DDB table is created
|
|
## Default name is applicationName-CoordinatorState in PAY_PER_REQUEST
|
|
#coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName
|
|
#coordinatorStateBillingMode = PROVISIONED
|
|
#coordinatorStateReadCapacity = 1000
|
|
#coordinatorStateWriteCapacity = 500
|
|
#
|
|
## Graceful handoff config - tuning of the shutdown behavior during lease transfers
|
|
## default values are 30000 and true respectively
|
|
#gracefulLeaseHandoffTimeoutMillis = 10000
|
|
#isGracefulLeaseHandoffEnabled = false
|
|
#
|
|
## WorkerMetricStats table config - control how the DDB table is created
|
|
### Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST
|
|
## TODO: include table deletion protection and pitr config once its added
|
|
#workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName
|
|
#workerMetricsBillingMode = PROVISIONED
|
|
#workerMetricsReadCapacity = 250
|
|
#workerMetricsWriteCapacity = 90
|
|
#
|
|
## WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
|
|
##
|
|
## frequency of capturing worker metrics in memory. Default is 1s
|
|
#inMemoryWorkerMetricsCaptureFrequencyMillis = 5000
|
|
## frequency of reporting worker metric stats to storage. Default is 30s
|
|
#workerMetricsReporterFreqInMillis = 60000
|
|
## No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10.
|
|
## This provides historic values that are used to compute the workers current
|
|
## utilization using an exponential-moving-average.
|
|
#noOfPersistedMetricsPerWorkerMetrics = 50
|
|
## Disable use of worker metrics to balance lease, default is false.
|
|
## If it is true, the algorithm balances lease based on worker's processing throughput.
|
|
#disableWorkerMetrics = true
|
|
## Max throughput per host 10 MBps, to limit processing to the given value
|
|
## Default is unlimited.
|
|
#maxThroughputPerHostKBps = 10000
|
|
## Dampen the load that is rebalanced during lease re-balancing, default is 60%
|
|
#dampeningPercentage = 90
|
|
## Configures the allowed variance range for worker utilization. The upper
|
|
## limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
|
|
## The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
|
|
## any worker's utilization falls outside this range, lease re-balancing is
|
|
## triggered. The re-balancing algorithm aims to bring variance within the
|
|
## specified range. It also avoids thrashing by ensuring the utilization of
|
|
## the worker receiving the load after re-balancing doesn't exceed the fleet
|
|
## average. This might cause no re-balancing action even the utilization is
|
|
## out of the variance range. The default value is 10, representing +/-10%
|
|
## variance from the average value.
|
|
#reBalanceThresholdPercentage = 5
|
|
## Whether at-least one lease must be taken from a high utilization worker
|
|
## during re-balancing when there is no lease assigned to that worker which has
|
|
## throughput is less than or equal to the minimum throughput that needs to be
|
|
## moved away from that worker to bring the worker back into the allowed variance.
|
|
## Default is true.
|
|
#allowThroughputOvershoot = false
|
|
## Lease assignment is performed every failoverTimeMillis but re-balance will
|
|
## be attempted only once in 5 times based on the below config. Default is 3.
|
|
#varianceBalancingFrequency = 5
|
|
## Alpha value used for calculating exponential moving average of worker's metricStats.
|
|
## Default is 0.5, a higher alpha value will make re-balancing more sensitive
|
|
## to recent metricStats.
|
|
#workerMetricsEMAAlpha = 0.18
|
|
## Duration after which workerMetricStats entry from WorkerMetricStats table will
|
|
## be cleaned up. Default is 1 day.
|
|
## Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
|
|
## Refer to Duration.parse javadocs for more details
|
|
#staleWorkerMetricsEntryCleanupDuration = PT12H
|