# The script that abides by the multi-language protocol. This script will # be executed by the MultiLangDaemon, which will communicate with this script # over STDIN and STDOUT according to the multi-language protocol. executableName = sample_kclpy_app.py # The Stream arn: arn:aws:kinesis:::stream/ # Important: streamArn takes precedence over streamName if both are set streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample # The name of an Amazon Kinesis stream to process. # Important: streamArn takes precedence over streamName if both are set streamName = kclpysample # Used by the KCL as the name of this application. Will be used as the name # of an Amazon DynamoDB table which will store the lease and checkpoint # information for workers with this application name applicationName = MultiLangTest # Users can change the credentials provider the KCL will use to retrieve credentials. # The DefaultAWSCredentialsProviderChain checks several other providers, which is # described here: # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html AWSCredentialsProvider = DefaultAWSCredentialsProviderChain # Appended to the user agent of the KCL. Does not impact the functionality of the # KCL in any other way. processingLanguage = python/3.8 # Valid options at TRIM_HORIZON or LATEST. # See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax initialPositionInStream = TRIM_HORIZON # To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended', # and uncomment below line with right timestamp value. # See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax #initialPositionInStreamExtended = 1636609142 # The following properties are also available for configuring the KCL Worker that is created # by the MultiLangDaemon. # The KCL defaults to us-east-1 regionName = us-east-1 # Fail over time in milliseconds. A worker which does not renew it's lease within this time interval # will be regarded as having problems and it's shards will be assigned to other workers. # For applications that have a large number of shards, this msy be set to a higher number to reduce # the number of DynamoDB IOPS required for tracking leases failoverTimeMillis = 10000 # A worker id that uniquely identifies this worker among all workers using the same applicationName # If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself. workerId = "workerId" # Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks. shardSyncIntervalMillis = 60000 # Max records to fetch from Kinesis in a single GetRecords call. maxRecords = 10000 # Idle time between record reads in milliseconds. idleTimeBetweenReadsInMillis = 1000 # Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while) callProcessRecordsEvenForEmptyRecordList = false # Interval in milliseconds between polling to check for parent shard completion. # Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on # completion of parent shards). parentShardPollIntervalMillis = 10000 # Cleanup leases upon shards completion (don't wait until they expire in Kinesis). # Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try # to delete the ones we don't need any longer. cleanupLeasesUponShardCompletion = true # Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures). taskBackoffTimeMillis = 500 # Buffer metrics for at most this long before publishing to CloudWatch. metricsBufferTimeMillis = 10000 # Buffer at most this many metrics before publishing to CloudWatch. metricsMaxQueueSize = 10000 # KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls # to RecordProcessorCheckpointer#checkpoint(String) by default. validateSequenceNumberBeforeCheckpointing = true # The maximum number of active threads for the MultiLangDaemon to permit. # If a value is provided then a FixedThreadPool is used with the maximum # active threads set to the provided value. If a non-positive integer or no # value is provided a CachedThreadPool is used. maxActiveThreads = -1 ################### KclV3 configurations ################### # Coordinator config clientVersionConfig = CLIENT_VERSION_CONFIG_3x ## Let all other config be defaults ## TODO: include table deletion protection and pitr config once its added ## Configurations to control how the CoordinatorState DDB table is created ## Default name is applicationName-CoordinatorState in PAY_PER_REQUEST #coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName #coordinatorStateBillingMode = PROVISIONED #coordinatorStateReadCapacity = 1000 #coordinatorStateWriteCapacity = 500 # ## Graceful handoff config - tuning of the shutdown behavior during lease transfers ## default values are 30000 and true respectively #gracefulLeaseHandoffTimeoutMillis = 10000 #isGracefulLeaseHandoffEnabled = false # ## WorkerMetricStats table config - control how the DDB table is created ### Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST ## TODO: include table deletion protection and pitr config once its added #workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName #workerMetricsBillingMode = PROVISIONED #workerMetricsReadCapacity = 250 #workerMetricsWriteCapacity = 90 # ## WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm ## ## frequency of capturing worker metrics in memory. Default is 1s #inMemoryWorkerMetricsCaptureFrequencyMillis = 5000 ## frequency of reporting worker metric stats to storage. Default is 30s #workerMetricsReporterFreqInMillis = 60000 ## No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10. ## This provides historic values that are used to compute the workers current ## utilization using an exponential-moving-average. #noOfPersistedMetricsPerWorkerMetrics = 50 ## Disable use of worker metrics to balance lease, default is false. ## If it is true, the algorithm balances lease based on worker's processing throughput. #disableWorkerMetrics = true ## Max throughput per host 10 MBps, to limit processing to the given value ## Default is unlimited. #maxThroughputPerHostKBps = 10000 ## Dampen the load that is rebalanced during lease re-balancing, default is 60% #dampeningPercentage = 90 ## Configures the allowed variance range for worker utilization. The upper ## limit is calculated as average * (1 + reBalanceThresholdPercentage/100). ## The lower limit is average * (1 - reBalanceThresholdPercentage/100). If ## any worker's utilization falls outside this range, lease re-balancing is ## triggered. The re-balancing algorithm aims to bring variance within the ## specified range. It also avoids thrashing by ensuring the utilization of ## the worker receiving the load after re-balancing doesn't exceed the fleet ## average. This might cause no re-balancing action even the utilization is ## out of the variance range. The default value is 10, representing +/-10% ## variance from the average value. #reBalanceThresholdPercentage = 5 ## Whether at-least one lease must be taken from a high utilization worker ## during re-balancing when there is no lease assigned to that worker which has ## throughput is less than or equal to the minimum throughput that needs to be ## moved away from that worker to bring the worker back into the allowed variance. ## Default is true. #allowThroughputOvershoot = false ## Lease assignment is performed every failoverTimeMillis but re-balance will ## be attempted only once in 5 times based on the below config. Default is 3. #varianceBalancingFrequency = 5 ## Alpha value used for calculating exponential moving average of worker's metricStats. ## Default is 0.5, a higher alpha value will make re-balancing more sensitive ## to recent metricStats. #workerMetricsEMAAlpha = 0.18 ## Duration after which workerMetricStats entry from WorkerMetricStats table will ## be cleaned up. Default is 1 day. ## Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days) ## Refer to Duration.parse javadocs for more details #staleWorkerMetricsEntryCleanupDuration = PT12H