Merge 88ff519d51 into a159fa31fb
This commit is contained in:
commit
b80e6d15b9
206 changed files with 21374 additions and 3045 deletions
|
|
@ -21,16 +21,12 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>amazon-kinesis-client-pom</artifactId>
|
<artifactId>amazon-kinesis-client-pom</artifactId>
|
||||||
<groupId>software.amazon.kinesis</groupId>
|
<groupId>software.amazon.kinesis</groupId>
|
||||||
<version>2.6.1-SNAPSHOT</version>
|
<version>3.0.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<artifactId>amazon-kinesis-client-multilang</artifactId>
|
<artifactId>amazon-kinesis-client-multilang</artifactId>
|
||||||
|
|
||||||
<properties>
|
|
||||||
<aws-java-sdk.version>1.12.668</aws-java-sdk.version>
|
|
||||||
</properties>
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>software.amazon.kinesis</groupId>
|
<groupId>software.amazon.kinesis</groupId>
|
||||||
|
|
@ -43,36 +39,10 @@
|
||||||
<version>${awssdk.version}</version>
|
<version>${awssdk.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.amazonaws</groupId>
|
|
||||||
<artifactId>aws-java-sdk-core</artifactId>
|
|
||||||
<version>${aws-java-sdk.version}</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-databind</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
|
||||||
<artifactId>jackson-dataformat-cbor</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpclient</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.amazonaws</groupId>
|
|
||||||
<artifactId>aws-java-sdk-sts</artifactId>
|
|
||||||
<version>${aws-java-sdk.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.projectlombok</groupId>
|
<groupId>org.projectlombok</groupId>
|
||||||
<artifactId>lombok</artifactId>
|
<artifactId>lombok</artifactId>
|
||||||
<version>1.18.24</version>
|
<version>1.18.28</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|
@ -104,6 +74,12 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Test -->
|
<!-- Test -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter-api</artifactId>
|
||||||
|
<version>5.11.3</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
|
@ -122,6 +98,13 @@
|
||||||
<version>1.3</version>
|
<version>1.3</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- Using older version to be compatible with Java 8 -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-junit-jupiter</artifactId>
|
||||||
|
<version>3.12.4</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
|
||||||
|
|
@ -61,10 +61,10 @@ import software.amazon.kinesis.coordinator.Scheduler;
|
||||||
* applicationName = PythonKCLSample
|
* applicationName = PythonKCLSample
|
||||||
*
|
*
|
||||||
* # Users can change the credentials provider the KCL will use to retrieve credentials.
|
* # Users can change the credentials provider the KCL will use to retrieve credentials.
|
||||||
* # The DefaultAWSCredentialsProviderChain checks several other providers, which is
|
* # The DefaultCredentialsProvider checks several other providers, which is
|
||||||
* # described here:
|
* # described here:
|
||||||
* # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
|
* # https://sdk.amazonaws.com/java/api/2.0.0-preview-11/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
|
||||||
* AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
|
* AwsCredentialsProvider = DefaultCredentialsProvider
|
||||||
* </pre>
|
* </pre>
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
|
@ -141,7 +141,7 @@ public class MultiLangDaemon {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String propertiesFile(final MultiLangDaemonArguments arguments) {
|
String validateAndGetPropertiesFileName(final MultiLangDaemonArguments arguments) {
|
||||||
String propertiesFile = "";
|
String propertiesFile = "";
|
||||||
|
|
||||||
if (CollectionUtils.isNotEmpty(arguments.parameters)) {
|
if (CollectionUtils.isNotEmpty(arguments.parameters)) {
|
||||||
|
|
@ -216,9 +216,9 @@ public class MultiLangDaemon {
|
||||||
MultiLangDaemonArguments arguments = new MultiLangDaemonArguments();
|
MultiLangDaemonArguments arguments = new MultiLangDaemonArguments();
|
||||||
JCommander jCommander = daemon.buildJCommanderAndParseArgs(arguments, args);
|
JCommander jCommander = daemon.buildJCommanderAndParseArgs(arguments, args);
|
||||||
try {
|
try {
|
||||||
String propertiesFile = daemon.propertiesFile(arguments);
|
String propertiesFileName = daemon.validateAndGetPropertiesFileName(arguments);
|
||||||
daemon.configureLogging(arguments.logConfiguration);
|
daemon.configureLogging(arguments.logConfiguration);
|
||||||
MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFile);
|
MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFileName);
|
||||||
|
|
||||||
Scheduler scheduler = daemon.buildScheduler(config);
|
Scheduler scheduler = daemon.buildScheduler(config);
|
||||||
MultiLangRunner runner = new MultiLangRunner(scheduler);
|
MultiLangRunner runner = new MultiLangRunner(scheduler);
|
||||||
|
|
|
||||||
|
|
@ -15,13 +15,14 @@
|
||||||
package software.amazon.kinesis.multilang;
|
package software.amazon.kinesis.multilang;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import com.amazonaws.regions.Regions;
|
|
||||||
import com.google.common.base.CaseFormat;
|
import com.google.common.base.CaseFormat;
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.regions.Region;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Key-Value pairs which may be nested in, and extracted from, a property value
|
* Key-Value pairs which may be nested in, and extracted from, a property value
|
||||||
|
|
@ -73,8 +74,13 @@ public enum NestedPropertyKey {
|
||||||
* @see <a href="https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions">Available Regions</a>
|
* @see <a href="https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions">Available Regions</a>
|
||||||
*/
|
*/
|
||||||
ENDPOINT_REGION {
|
ENDPOINT_REGION {
|
||||||
void visit(final NestedPropertyProcessor processor, final String region) {
|
void visit(final NestedPropertyProcessor processor, final String regionName) {
|
||||||
processor.acceptEndpointRegion(Regions.fromName(region));
|
List<Region> validRegions = Region.regions();
|
||||||
|
Region region = Region.of(regionName);
|
||||||
|
if (!validRegions.contains(region)) {
|
||||||
|
throw new IllegalArgumentException("Invalid region name: " + regionName);
|
||||||
|
}
|
||||||
|
processor.acceptEndpointRegion(region);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@
|
||||||
*/
|
*/
|
||||||
package software.amazon.kinesis.multilang;
|
package software.amazon.kinesis.multilang;
|
||||||
|
|
||||||
import com.amazonaws.regions.Regions;
|
import software.amazon.awssdk.regions.Region;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines methods to process {@link NestedPropertyKey}s.
|
* Defines methods to process {@link NestedPropertyKey}s.
|
||||||
|
|
@ -28,7 +28,7 @@ public interface NestedPropertyProcessor {
|
||||||
* (e.g., https://sns.us-west-1.amazonaws.com, sns.us-west-1.amazonaws.com)
|
* (e.g., https://sns.us-west-1.amazonaws.com, sns.us-west-1.amazonaws.com)
|
||||||
* @param signingRegion the region to use for SigV4 signing of requests (e.g. us-west-1)
|
* @param signingRegion the region to use for SigV4 signing of requests (e.g. us-west-1)
|
||||||
*
|
*
|
||||||
* @see #acceptEndpointRegion(Regions)
|
* @see #acceptEndpointRegion(Region)
|
||||||
* @see <a href="https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html">
|
* @see <a href="https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html">
|
||||||
* AwsClientBuilder.EndpointConfiguration</a>
|
* AwsClientBuilder.EndpointConfiguration</a>
|
||||||
*/
|
*/
|
||||||
|
|
@ -42,7 +42,7 @@ public interface NestedPropertyProcessor {
|
||||||
*
|
*
|
||||||
* @see #acceptEndpoint(String, String)
|
* @see #acceptEndpoint(String, String)
|
||||||
*/
|
*/
|
||||||
void acceptEndpointRegion(Regions region);
|
void acceptEndpointRegion(Region region);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the external id, an optional field to designate who can assume an IAM role.
|
* Set the external id, an optional field to designate who can assume an IAM role.
|
||||||
|
|
|
||||||
|
|
@ -1,86 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2023 Amazon.com, Inc. or its affiliates.
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package software.amazon.kinesis.multilang.auth;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import com.amazonaws.auth.AWSSessionCredentials;
|
|
||||||
import com.amazonaws.auth.AWSSessionCredentialsProvider;
|
|
||||||
import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
|
|
||||||
import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.Builder;
|
|
||||||
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
|
|
||||||
import com.amazonaws.regions.Regions;
|
|
||||||
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
|
|
||||||
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
|
|
||||||
import software.amazon.kinesis.multilang.NestedPropertyKey;
|
|
||||||
import software.amazon.kinesis.multilang.NestedPropertyProcessor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An {@link AWSSessionCredentialsProvider} that is backed by STSAssumeRole.
|
|
||||||
*/
|
|
||||||
public class KclSTSAssumeRoleSessionCredentialsProvider
|
|
||||||
implements AWSSessionCredentialsProvider, NestedPropertyProcessor {
|
|
||||||
|
|
||||||
private final Builder builder;
|
|
||||||
|
|
||||||
private final STSAssumeRoleSessionCredentialsProvider provider;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param params vararg parameters which must include roleArn at index=0,
|
|
||||||
* and roleSessionName at index=1
|
|
||||||
*/
|
|
||||||
public KclSTSAssumeRoleSessionCredentialsProvider(final String[] params) {
|
|
||||||
this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length));
|
|
||||||
}
|
|
||||||
|
|
||||||
public KclSTSAssumeRoleSessionCredentialsProvider(
|
|
||||||
final String roleArn, final String roleSessionName, final String... params) {
|
|
||||||
builder = new Builder(roleArn, roleSessionName);
|
|
||||||
NestedPropertyKey.parse(this, params);
|
|
||||||
provider = builder.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AWSSessionCredentials getCredentials() {
|
|
||||||
return provider.getCredentials();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void acceptEndpoint(final String serviceEndpoint, final String signingRegion) {
|
|
||||||
final EndpointConfiguration endpoint = new EndpointConfiguration(serviceEndpoint, signingRegion);
|
|
||||||
final AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClient.builder()
|
|
||||||
.withEndpointConfiguration(endpoint)
|
|
||||||
.build();
|
|
||||||
builder.withStsClient(stsClient);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void acceptEndpointRegion(final Regions region) {
|
|
||||||
final AWSSecurityTokenService stsClient =
|
|
||||||
AWSSecurityTokenServiceClient.builder().withRegion(region).build();
|
|
||||||
builder.withStsClient(stsClient);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void acceptExternalId(final String externalId) {
|
|
||||||
builder.withExternalId(externalId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
package software.amazon.kinesis.multilang.auth;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||||
|
import software.amazon.awssdk.regions.Region;
|
||||||
|
import software.amazon.awssdk.services.sts.StsClient;
|
||||||
|
import software.amazon.awssdk.services.sts.StsClientBuilder;
|
||||||
|
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
|
||||||
|
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest;
|
||||||
|
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest.Builder;
|
||||||
|
import software.amazon.kinesis.multilang.NestedPropertyKey;
|
||||||
|
import software.amazon.kinesis.multilang.NestedPropertyProcessor;
|
||||||
|
|
||||||
|
public class KclStsAssumeRoleCredentialsProvider implements AwsCredentialsProvider, NestedPropertyProcessor {
|
||||||
|
private final Builder assumeRoleRequestBuilder;
|
||||||
|
private final StsClientBuilder stsClientBuilder;
|
||||||
|
private final StsAssumeRoleCredentialsProvider stsAssumeRoleCredentialsProvider;
|
||||||
|
|
||||||
|
public KclStsAssumeRoleCredentialsProvider(String[] params) {
|
||||||
|
this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
public KclStsAssumeRoleCredentialsProvider(String roleArn, String roleSessionName, String... params) {
|
||||||
|
this.assumeRoleRequestBuilder =
|
||||||
|
AssumeRoleRequest.builder().roleArn(roleArn).roleSessionName(roleSessionName);
|
||||||
|
this.stsClientBuilder = StsClient.builder();
|
||||||
|
NestedPropertyKey.parse(this, params);
|
||||||
|
this.stsAssumeRoleCredentialsProvider = StsAssumeRoleCredentialsProvider.builder()
|
||||||
|
.refreshRequest(assumeRoleRequestBuilder.build())
|
||||||
|
.asyncCredentialUpdateEnabled(true)
|
||||||
|
.stsClient(stsClientBuilder.build())
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AwsCredentials resolveCredentials() {
|
||||||
|
return stsAssumeRoleCredentialsProvider.resolveCredentials();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void acceptEndpoint(String serviceEndpoint, String signingRegion) {
|
||||||
|
if (!serviceEndpoint.startsWith("http://") && !serviceEndpoint.startsWith("https://")) {
|
||||||
|
serviceEndpoint = "https://" + serviceEndpoint;
|
||||||
|
}
|
||||||
|
stsClientBuilder.endpointOverride(URI.create(serviceEndpoint));
|
||||||
|
stsClientBuilder.region(Region.of(signingRegion));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void acceptEndpointRegion(Region region) {
|
||||||
|
stsClientBuilder.region(region);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void acceptExternalId(String externalId) {
|
||||||
|
assumeRoleRequestBuilder.externalId(externalId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,182 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package software.amazon.kinesis.multilang.config;
|
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProviderChain;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get AWSCredentialsProvider property.
|
|
||||||
*/
|
|
||||||
@Slf4j
|
|
||||||
class AWSCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder<AWSCredentialsProvider> {
|
|
||||||
private static final String LIST_DELIMITER = ",";
|
|
||||||
private static final String ARG_DELIMITER = "|";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
*/
|
|
||||||
AWSCredentialsProviderPropertyValueDecoder() {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get AWSCredentialsProvider property.
|
|
||||||
*
|
|
||||||
* @param value
|
|
||||||
* property value as String
|
|
||||||
* @return corresponding variable in correct type
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public AWSCredentialsProvider decodeValue(String value) {
|
|
||||||
if (value != null) {
|
|
||||||
List<String> providerNames = getProviderNames(value);
|
|
||||||
List<AWSCredentialsProvider> providers = getValidCredentialsProviders(providerNames);
|
|
||||||
AWSCredentialsProvider[] ps = new AWSCredentialsProvider[providers.size()];
|
|
||||||
providers.toArray(ps);
|
|
||||||
return new AWSCredentialsProviderChain(providers);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Property AWSCredentialsProvider is missing.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return list of supported types
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public List<Class<AWSCredentialsProvider>> getSupportedTypes() {
|
|
||||||
return Collections.singletonList(AWSCredentialsProvider.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert string list to a list of valid credentials providers.
|
|
||||||
*/
|
|
||||||
private static List<AWSCredentialsProvider> getValidCredentialsProviders(List<String> providerNames) {
|
|
||||||
List<AWSCredentialsProvider> credentialsProviders = new ArrayList<>();
|
|
||||||
|
|
||||||
for (String providerName : providerNames) {
|
|
||||||
final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER);
|
|
||||||
final Class<? extends AWSCredentialsProvider> clazz;
|
|
||||||
try {
|
|
||||||
final Class<?> c = Class.forName(nameAndArgs[0]);
|
|
||||||
if (!AWSCredentialsProvider.class.isAssignableFrom(c)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
clazz = (Class<? extends AWSCredentialsProvider>) c;
|
|
||||||
} catch (ClassNotFoundException cnfe) {
|
|
||||||
// Providers are a product of prefixed Strings to cover multiple
|
|
||||||
// namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }).
|
|
||||||
// It's expected that many class names will not resolve.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
log.info("Attempting to construct {}", clazz);
|
|
||||||
|
|
||||||
AWSCredentialsProvider provider = null;
|
|
||||||
if (nameAndArgs.length > 1) {
|
|
||||||
final String[] varargs = Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length);
|
|
||||||
|
|
||||||
// attempt to invoke an explicit N-arg constructor of FooClass(String, String, ...)
|
|
||||||
provider = constructProvider(providerName, () -> {
|
|
||||||
Class<?>[] argTypes = new Class<?>[nameAndArgs.length - 1];
|
|
||||||
Arrays.fill(argTypes, String.class);
|
|
||||||
return clazz.getConstructor(argTypes).newInstance(varargs);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (provider == null) {
|
|
||||||
// attempt to invoke a public varargs/array constructor of FooClass(String[])
|
|
||||||
provider = constructProvider(providerName, () -> clazz.getConstructor(String[].class)
|
|
||||||
.newInstance((Object) varargs));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (provider == null) {
|
|
||||||
// regardless of parameters, fallback to invoke a public no-arg constructor
|
|
||||||
provider = constructProvider(providerName, clazz::newInstance);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (provider != null) {
|
|
||||||
credentialsProviders.add(provider);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return credentialsProviders;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> getProviderNames(String property) {
|
|
||||||
// assume list delimiter is ","
|
|
||||||
String[] elements = property.split(LIST_DELIMITER);
|
|
||||||
List<String> result = new ArrayList<>();
|
|
||||||
for (int i = 0; i < elements.length; i++) {
|
|
||||||
String string = elements[i].trim();
|
|
||||||
if (!string.isEmpty()) {
|
|
||||||
// find all possible names and add them to name list
|
|
||||||
result.addAll(getPossibleFullClassNames(string));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<String> getPossibleFullClassNames(final String provider) {
|
|
||||||
return Stream.of(
|
|
||||||
// Customer provides a short name of common providers in com.amazonaws.auth package
|
|
||||||
// (e.g., any classes implementing the AWSCredentialsProvider interface)
|
|
||||||
// @see
|
|
||||||
// http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/AWSCredentialsProvider.html
|
|
||||||
"com.amazonaws.auth.",
|
|
||||||
|
|
||||||
// Customer provides a short name of a provider offered by this multi-lang package
|
|
||||||
"software.amazon.kinesis.multilang.auth.",
|
|
||||||
|
|
||||||
// Customer provides a fully-qualified provider name, or a custom credentials provider
|
|
||||||
// (e.g., com.amazonaws.auth.ClasspathFileCredentialsProvider, org.mycompany.FooProvider)
|
|
||||||
"")
|
|
||||||
.map(prefix -> prefix + provider)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
@FunctionalInterface
|
|
||||||
private interface CredentialsProviderConstructor<T extends AWSCredentialsProvider> {
|
|
||||||
T construct()
|
|
||||||
throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Attempts to construct an {@link AWSCredentialsProvider}.
|
|
||||||
*
|
|
||||||
* @param providerName Raw, unmodified provider name. Should there be an
|
|
||||||
* Exeception during construction, this parameter will be logged.
|
|
||||||
* @param constructor supplier-like function that will perform the construction
|
|
||||||
* @return the constructed provider, if successful; otherwise, null
|
|
||||||
*
|
|
||||||
* @param <T> type of the CredentialsProvider to construct
|
|
||||||
*/
|
|
||||||
private static <T extends AWSCredentialsProvider> T constructProvider(
|
|
||||||
final String providerName, final CredentialsProviderConstructor<T> constructor) {
|
|
||||||
try {
|
|
||||||
return constructor.construct();
|
|
||||||
} catch (NoSuchMethodException ignored) {
|
|
||||||
// ignore
|
|
||||||
} catch (IllegalAccessException | InstantiationException | InvocationTargetException | RuntimeException e) {
|
|
||||||
log.warn("Failed to construct {}", providerName, e);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,261 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.lang.reflect.Modifier;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain;
|
||||||
|
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
|
||||||
|
import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get AwsCredentialsProvider property.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
class AwsCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder<AwsCredentialsProvider> {
|
||||||
|
private static final String LIST_DELIMITER = ",";
|
||||||
|
private static final String ARG_DELIMITER = "|";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*/
|
||||||
|
AwsCredentialsProviderPropertyValueDecoder() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get AwsCredentialsProvider property.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* property value as String
|
||||||
|
* @return corresponding variable in correct type
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public AwsCredentialsProvider decodeValue(String value) {
|
||||||
|
if (value != null) {
|
||||||
|
List<String> providerNames = getProviderNames(value);
|
||||||
|
List<AwsCredentialsProvider> providers = getValidCredentialsProviders(providerNames);
|
||||||
|
AwsCredentialsProvider[] ps = new AwsCredentialsProvider[providers.size()];
|
||||||
|
providers.toArray(ps);
|
||||||
|
if (providers.isEmpty()) {
|
||||||
|
log.warn("Unable to construct any provider with name {}", value);
|
||||||
|
log.warn("Please verify that all AwsCredentialsProvider properties are passed correctly");
|
||||||
|
}
|
||||||
|
return AwsCredentialsProviderChain.builder()
|
||||||
|
.credentialsProviders(providers)
|
||||||
|
.build();
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Property AwsCredentialsProvider is missing.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return list of supported types
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<Class<AwsCredentialsProvider>> getSupportedTypes() {
|
||||||
|
return Collections.singletonList(AwsCredentialsProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert string list to a list of valid credentials providers.
|
||||||
|
*/
|
||||||
|
private static List<AwsCredentialsProvider> getValidCredentialsProviders(List<String> providerNames) {
|
||||||
|
List<AwsCredentialsProvider> credentialsProviders = new ArrayList<>();
|
||||||
|
|
||||||
|
for (String providerName : providerNames) {
|
||||||
|
final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER);
|
||||||
|
final Class<? extends AwsCredentialsProvider> clazz = getClass(nameAndArgs[0]);
|
||||||
|
if (clazz == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
log.info("Attempting to construct {}", clazz);
|
||||||
|
final String[] varargs =
|
||||||
|
nameAndArgs.length > 1 ? Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length) : new String[0];
|
||||||
|
AwsCredentialsProvider provider = tryConstructor(providerName, clazz, varargs);
|
||||||
|
if (provider == null) {
|
||||||
|
provider = tryCreate(providerName, clazz, varargs);
|
||||||
|
}
|
||||||
|
if (provider != null) {
|
||||||
|
log.info("Provider constructed successfully: {}", provider);
|
||||||
|
credentialsProviders.add(provider);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return credentialsProviders;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AwsCredentialsProvider tryConstructor(
|
||||||
|
String providerName, Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||||
|
AwsCredentialsProvider provider =
|
||||||
|
constructProvider(providerName, () -> getConstructorWithVarArgs(clazz, varargs));
|
||||||
|
if (provider == null) {
|
||||||
|
provider = constructProvider(providerName, () -> getConstructorWithArgs(clazz, varargs));
|
||||||
|
}
|
||||||
|
if (provider == null) {
|
||||||
|
provider = constructProvider(providerName, clazz::newInstance);
|
||||||
|
}
|
||||||
|
return provider;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AwsCredentialsProvider tryCreate(
|
||||||
|
String providerName, Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||||
|
AwsCredentialsProvider provider =
|
||||||
|
constructProvider(providerName, () -> getCreateMethod(clazz, (Object) varargs));
|
||||||
|
if (provider == null) {
|
||||||
|
provider = constructProvider(providerName, () -> getCreateMethod(clazz, varargs));
|
||||||
|
}
|
||||||
|
if (provider == null) {
|
||||||
|
provider = constructProvider(providerName, () -> getCreateMethod(clazz));
|
||||||
|
}
|
||||||
|
return provider;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AwsCredentialsProvider getConstructorWithVarArgs(
|
||||||
|
Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||||
|
try {
|
||||||
|
return clazz.getConstructor(String[].class).newInstance((Object) varargs);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AwsCredentialsProvider getConstructorWithArgs(
|
||||||
|
Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||||
|
try {
|
||||||
|
Class<?>[] argTypes = new Class<?>[varargs.length];
|
||||||
|
Arrays.fill(argTypes, String.class);
|
||||||
|
return clazz.getConstructor(argTypes).newInstance((Object[]) varargs);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AwsCredentialsProvider getCreateMethod(
|
||||||
|
Class<? extends AwsCredentialsProvider> clazz, Object... args) {
|
||||||
|
try {
|
||||||
|
Class<?>[] argTypes = new Class<?>[args.length];
|
||||||
|
for (int i = 0; i < args.length; i++) {
|
||||||
|
argTypes[i] = args[i].getClass();
|
||||||
|
}
|
||||||
|
Method createMethod = clazz.getDeclaredMethod("create", argTypes);
|
||||||
|
if (Modifier.isStatic(createMethod.getModifiers())) {
|
||||||
|
return clazz.cast(createMethod.invoke(null, args));
|
||||||
|
} else {
|
||||||
|
log.warn("Found non-static create() method in {}", clazz.getName());
|
||||||
|
}
|
||||||
|
} catch (NoSuchMethodException e) {
|
||||||
|
// No matching create method found for class
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to invoke create() method in {}", clazz.getName(), e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves the class for the given provider name.
|
||||||
|
*
|
||||||
|
* @param providerName A string containing the provider name.
|
||||||
|
*
|
||||||
|
* @return The Class object representing the resolved AwsCredentialsProvider implementation,
|
||||||
|
* or null if the class cannot be resolved or does not extend AwsCredentialsProvider.
|
||||||
|
*/
|
||||||
|
private static Class<? extends AwsCredentialsProvider> getClass(String providerName) {
|
||||||
|
// Convert any form of StsAssumeRoleCredentialsProvider string to KclStsAssumeRoleCredentialsProvider
|
||||||
|
if (providerName.equals(StsAssumeRoleCredentialsProvider.class.getSimpleName())
|
||||||
|
|| providerName.equals(StsAssumeRoleCredentialsProvider.class.getName())) {
|
||||||
|
providerName = KclStsAssumeRoleCredentialsProvider.class.getName();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
final Class<?> c = Class.forName(providerName);
|
||||||
|
if (!AwsCredentialsProvider.class.isAssignableFrom(c)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return (Class<? extends AwsCredentialsProvider>) c;
|
||||||
|
} catch (ClassNotFoundException cnfe) {
|
||||||
|
// Providers are a product of prefixed Strings to cover multiple
|
||||||
|
// namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }).
|
||||||
|
// It's expected that many class names will not resolve.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> getProviderNames(String property) {
|
||||||
|
// assume list delimiter is ","
|
||||||
|
String[] elements = property.split(LIST_DELIMITER);
|
||||||
|
List<String> result = new ArrayList<>();
|
||||||
|
for (int i = 0; i < elements.length; i++) {
|
||||||
|
String string = elements[i].trim();
|
||||||
|
if (!string.isEmpty()) {
|
||||||
|
// find all possible names and add them to name list
|
||||||
|
result.addAll(getPossibleFullClassNames(string));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> getPossibleFullClassNames(final String provider) {
|
||||||
|
return Stream.of(
|
||||||
|
// Customer provides a short name of a provider offered by this multi-lang package
|
||||||
|
"software.amazon.kinesis.multilang.auth.",
|
||||||
|
// Customer provides a short name of common providers in software.amazon.awssdk.auth.credentials
|
||||||
|
// package (e.g., any classes implementing the AwsCredentialsProvider interface)
|
||||||
|
// @see
|
||||||
|
// https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.html
|
||||||
|
"software.amazon.awssdk.auth.credentials.",
|
||||||
|
// Customer provides a fully-qualified provider name, or a custom credentials provider
|
||||||
|
// (e.g., org.mycompany.FooProvider)
|
||||||
|
"")
|
||||||
|
.map(prefix -> prefix + provider)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
@FunctionalInterface
|
||||||
|
private interface CredentialsProviderConstructor<T extends AwsCredentialsProvider> {
|
||||||
|
T construct()
|
||||||
|
throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to construct an {@link AwsCredentialsProvider}.
|
||||||
|
*
|
||||||
|
* @param providerName Raw, unmodified provider name. Should there be an
|
||||||
|
* Exception during construction, this parameter will be logged.
|
||||||
|
* @param constructor supplier-like function that will perform the construction
|
||||||
|
* @return the constructed provider, if successful; otherwise, null
|
||||||
|
*
|
||||||
|
* @param <T> type of the CredentialsProvider to construct
|
||||||
|
*/
|
||||||
|
private static <T extends AwsCredentialsProvider> T constructProvider(
|
||||||
|
final String providerName, final CredentialsProviderConstructor<T> constructor) {
|
||||||
|
try {
|
||||||
|
return constructor.construct();
|
||||||
|
} catch (NoSuchMethodException
|
||||||
|
| IllegalAccessException
|
||||||
|
| InstantiationException
|
||||||
|
| InvocationTargetException
|
||||||
|
| RuntimeException ignored) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
public class CoordinatorStateTableConfigBean {
|
||||||
|
|
||||||
|
interface CoordinatorStateConfigBeanDelegate {
|
||||||
|
String getCoordinatorStateTableName();
|
||||||
|
|
||||||
|
void setCoordinatorStateTableName(String value);
|
||||||
|
|
||||||
|
BillingMode getCoordinatorStateBillingMode();
|
||||||
|
|
||||||
|
void setCoordinatorStateBillingMode(BillingMode value);
|
||||||
|
|
||||||
|
long getCoordinatorStateReadCapacity();
|
||||||
|
|
||||||
|
void setCoordinatorStateReadCapacity(long value);
|
||||||
|
|
||||||
|
long getCoordinatorStateWriteCapacity();
|
||||||
|
|
||||||
|
void setCoordinatorStateWriteCapacity(long value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "tableName")
|
||||||
|
private String coordinatorStateTableName;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "billingMode")
|
||||||
|
private BillingMode coordinatorStateBillingMode;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "readCapacity")
|
||||||
|
private long coordinatorStateReadCapacity;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "writeCapacity")
|
||||||
|
private long coordinatorStateWriteCapacity;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
public class GracefulLeaseHandoffConfigBean {
|
||||||
|
|
||||||
|
interface GracefulLeaseHandoffConfigBeanDelegate {
|
||||||
|
Long getGracefulLeaseHandoffTimeoutMillis();
|
||||||
|
|
||||||
|
void setGracefulLeaseHandoffTimeoutMillis(Long value);
|
||||||
|
|
||||||
|
Boolean getIsGracefulLeaseHandoffEnabled();
|
||||||
|
|
||||||
|
void setIsGracefulLeaseHandoffEnabled(Boolean value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class)
|
||||||
|
private Long gracefulLeaseHandoffTimeoutMillis;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class)
|
||||||
|
private Boolean isGracefulLeaseHandoffEnabled;
|
||||||
|
}
|
||||||
|
|
@ -28,7 +28,7 @@ import software.amazon.kinesis.common.StreamIdentifier;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* KinesisClientLibConfigurator constructs a KinesisClientLibConfiguration from java properties file. The following
|
* KinesisClientLibConfigurator constructs a KinesisClientLibConfiguration from java properties file. The following
|
||||||
* three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AWSCredentialsProvider"
|
* three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AwsCredentialsProvider"
|
||||||
* KinesisClientLibConfigurator will help to automatically assign the value of "workerId" if this property is not
|
* KinesisClientLibConfigurator will help to automatically assign the value of "workerId" if this property is not
|
||||||
* provided. In the specified properties file, any properties, which matches the variable name in
|
* provided. In the specified properties file, any properties, which matches the variable name in
|
||||||
* KinesisClientLibConfiguration and has a corresponding "with{variableName}" setter method, will be read in, and its
|
* KinesisClientLibConfiguration and has a corresponding "with{variableName}" setter method, will be read in, and its
|
||||||
|
|
@ -62,7 +62,7 @@ public class KinesisClientLibConfigurator {
|
||||||
properties.entrySet().forEach(e -> {
|
properties.entrySet().forEach(e -> {
|
||||||
try {
|
try {
|
||||||
log.info("Processing (key={}, value={})", e.getKey(), e.getValue());
|
log.info("Processing (key={}, value={})", e.getKey(), e.getValue());
|
||||||
utilsBean.setProperty(configuration, (String) e.getKey(), e.getValue());
|
utilsBean.setProperty(configuration, processKey((String) e.getKey()), e.getValue());
|
||||||
} catch (IllegalAccessException | InvocationTargetException ex) {
|
} catch (IllegalAccessException | InvocationTargetException ex) {
|
||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
|
@ -110,4 +110,17 @@ public class KinesisClientLibConfigurator {
|
||||||
}
|
}
|
||||||
return getConfiguration(properties);
|
return getConfiguration(properties);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes a configuration key to normalize AWS credentials provider naming. Necessary to conform to
|
||||||
|
* autogenerated setters.
|
||||||
|
* @param key the config param key
|
||||||
|
* @return case-configured param key name
|
||||||
|
*/
|
||||||
|
String processKey(String key) {
|
||||||
|
if (key.toLowerCase().startsWith("awscredentialsprovider")) {
|
||||||
|
key = key.replaceAll("(?i)awscredentialsprovider", "awsCredentialsProvider");
|
||||||
|
}
|
||||||
|
return key;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.time.Duration;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
@ -41,6 +42,7 @@ import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||||
import software.amazon.awssdk.regions.Region;
|
import software.amazon.awssdk.regions.Region;
|
||||||
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
|
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
|
||||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClientBuilder;
|
import software.amazon.awssdk.services.kinesis.KinesisAsyncClientBuilder;
|
||||||
import software.amazon.kinesis.checkpoint.CheckpointConfig;
|
import software.amazon.kinesis.checkpoint.CheckpointConfig;
|
||||||
|
|
@ -55,7 +57,7 @@ import software.amazon.kinesis.leases.ShardPrioritization;
|
||||||
import software.amazon.kinesis.lifecycle.LifecycleConfig;
|
import software.amazon.kinesis.lifecycle.LifecycleConfig;
|
||||||
import software.amazon.kinesis.metrics.MetricsConfig;
|
import software.amazon.kinesis.metrics.MetricsConfig;
|
||||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
import software.amazon.kinesis.multilang.config.credentials.V2CredentialWrapper;
|
import software.amazon.kinesis.multilang.config.converter.DurationConverter;
|
||||||
import software.amazon.kinesis.processor.ProcessorConfig;
|
import software.amazon.kinesis.processor.ProcessorConfig;
|
||||||
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
||||||
import software.amazon.kinesis.retrieval.RetrievalConfig;
|
import software.amazon.kinesis.retrieval.RetrievalConfig;
|
||||||
|
|
@ -156,6 +158,9 @@ public class MultiLangDaemonConfiguration {
|
||||||
@ConfigurationSettable(configurationClass = CoordinatorConfig.class)
|
@ConfigurationSettable(configurationClass = CoordinatorConfig.class)
|
||||||
private long schedulerInitializationBackoffTimeMillis;
|
private long schedulerInitializationBackoffTimeMillis;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = CoordinatorConfig.class)
|
||||||
|
private CoordinatorConfig.ClientVersionConfig clientVersionConfig;
|
||||||
|
|
||||||
@ConfigurationSettable(configurationClass = LifecycleConfig.class)
|
@ConfigurationSettable(configurationClass = LifecycleConfig.class)
|
||||||
private long taskBackoffTimeMillis;
|
private long taskBackoffTimeMillis;
|
||||||
|
|
||||||
|
|
@ -189,6 +194,22 @@ public class MultiLangDaemonConfiguration {
|
||||||
@Delegate(types = PollingConfigBean.PollingConfigBeanDelegate.class)
|
@Delegate(types = PollingConfigBean.PollingConfigBeanDelegate.class)
|
||||||
private final PollingConfigBean pollingConfig = new PollingConfigBean();
|
private final PollingConfigBean pollingConfig = new PollingConfigBean();
|
||||||
|
|
||||||
|
@Delegate(types = GracefulLeaseHandoffConfigBean.GracefulLeaseHandoffConfigBeanDelegate.class)
|
||||||
|
private final GracefulLeaseHandoffConfigBean gracefulLeaseHandoffConfigBean = new GracefulLeaseHandoffConfigBean();
|
||||||
|
|
||||||
|
@Delegate(
|
||||||
|
types = WorkerUtilizationAwareAssignmentConfigBean.WorkerUtilizationAwareAssignmentConfigBeanDelegate.class)
|
||||||
|
private final WorkerUtilizationAwareAssignmentConfigBean workerUtilizationAwareAssignmentConfigBean =
|
||||||
|
new WorkerUtilizationAwareAssignmentConfigBean();
|
||||||
|
|
||||||
|
@Delegate(types = WorkerMetricStatsTableConfigBean.WorkerMetricsTableConfigBeanDelegate.class)
|
||||||
|
private final WorkerMetricStatsTableConfigBean workerMetricStatsTableConfigBean =
|
||||||
|
new WorkerMetricStatsTableConfigBean();
|
||||||
|
|
||||||
|
@Delegate(types = CoordinatorStateTableConfigBean.CoordinatorStateConfigBeanDelegate.class)
|
||||||
|
private final CoordinatorStateTableConfigBean coordinatorStateTableConfigBean =
|
||||||
|
new CoordinatorStateTableConfigBean();
|
||||||
|
|
||||||
private boolean validateSequenceNumberBeforeCheckpointing;
|
private boolean validateSequenceNumberBeforeCheckpointing;
|
||||||
|
|
||||||
private long shutdownGraceMillis;
|
private long shutdownGraceMillis;
|
||||||
|
|
@ -196,19 +217,19 @@ public class MultiLangDaemonConfiguration {
|
||||||
|
|
||||||
private final BuilderDynaBean kinesisCredentialsProvider;
|
private final BuilderDynaBean kinesisCredentialsProvider;
|
||||||
|
|
||||||
public void setAWSCredentialsProvider(String providerString) {
|
public void setAwsCredentialsProvider(String providerString) {
|
||||||
kinesisCredentialsProvider.set("", providerString);
|
kinesisCredentialsProvider.set("", providerString);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final BuilderDynaBean dynamoDBCredentialsProvider;
|
private final BuilderDynaBean dynamoDBCredentialsProvider;
|
||||||
|
|
||||||
public void setAWSCredentialsProviderDynamoDB(String providerString) {
|
public void setAwsCredentialsProviderDynamoDB(String providerString) {
|
||||||
dynamoDBCredentialsProvider.set("", providerString);
|
dynamoDBCredentialsProvider.set("", providerString);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final BuilderDynaBean cloudWatchCredentialsProvider;
|
private final BuilderDynaBean cloudWatchCredentialsProvider;
|
||||||
|
|
||||||
public void setAWSCredentialsProviderCloudWatch(String providerString) {
|
public void setAwsCredentialsProviderCloudWatch(String providerString) {
|
||||||
cloudWatchCredentialsProvider.set("", providerString);
|
cloudWatchCredentialsProvider.set("", providerString);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -252,6 +273,25 @@ public class MultiLangDaemonConfiguration {
|
||||||
},
|
},
|
||||||
InitialPositionInStream.class);
|
InitialPositionInStream.class);
|
||||||
|
|
||||||
|
convertUtilsBean.register(
|
||||||
|
new Converter() {
|
||||||
|
@Override
|
||||||
|
public <T> T convert(Class<T> type, Object value) {
|
||||||
|
return type.cast(CoordinatorConfig.ClientVersionConfig.valueOf(
|
||||||
|
value.toString().toUpperCase()));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
CoordinatorConfig.ClientVersionConfig.class);
|
||||||
|
|
||||||
|
convertUtilsBean.register(
|
||||||
|
new Converter() {
|
||||||
|
@Override
|
||||||
|
public <T> T convert(Class<T> type, Object value) {
|
||||||
|
return type.cast(BillingMode.valueOf(value.toString().toUpperCase()));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
BillingMode.class);
|
||||||
|
|
||||||
convertUtilsBean.register(
|
convertUtilsBean.register(
|
||||||
new Converter() {
|
new Converter() {
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -279,12 +319,14 @@ public class MultiLangDaemonConfiguration {
|
||||||
},
|
},
|
||||||
Region.class);
|
Region.class);
|
||||||
|
|
||||||
|
convertUtilsBean.register(new DurationConverter(), Duration.class);
|
||||||
|
|
||||||
ArrayConverter arrayConverter = new ArrayConverter(String[].class, new StringConverter());
|
ArrayConverter arrayConverter = new ArrayConverter(String[].class, new StringConverter());
|
||||||
arrayConverter.setDelimiter(',');
|
arrayConverter.setDelimiter(',');
|
||||||
convertUtilsBean.register(arrayConverter, String[].class);
|
convertUtilsBean.register(arrayConverter, String[].class);
|
||||||
AWSCredentialsProviderPropertyValueDecoder oldCredentialsDecoder =
|
AwsCredentialsProviderPropertyValueDecoder credentialsDecoder =
|
||||||
new AWSCredentialsProviderPropertyValueDecoder();
|
new AwsCredentialsProviderPropertyValueDecoder();
|
||||||
Function<String, ?> converter = s -> new V2CredentialWrapper(oldCredentialsDecoder.decodeValue(s));
|
Function<String, ?> converter = credentialsDecoder::decodeValue;
|
||||||
|
|
||||||
this.kinesisCredentialsProvider = new BuilderDynaBean(
|
this.kinesisCredentialsProvider = new BuilderDynaBean(
|
||||||
AwsCredentialsProvider.class, convertUtilsBean, converter, CREDENTIALS_DEFAULT_SEARCH_PATH);
|
AwsCredentialsProvider.class, convertUtilsBean, converter, CREDENTIALS_DEFAULT_SEARCH_PATH);
|
||||||
|
|
@ -370,6 +412,22 @@ public class MultiLangDaemonConfiguration {
|
||||||
retrievalMode.builder(this).build(configsBuilder.kinesisClient(), this));
|
retrievalMode.builder(this).build(configsBuilder.kinesisClient(), this));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void handleCoordinatorConfig(CoordinatorConfig coordinatorConfig) {
|
||||||
|
ConfigurationSettableUtils.resolveFields(
|
||||||
|
this.coordinatorStateTableConfigBean, coordinatorConfig.coordinatorStateTableConfig());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleLeaseManagementConfig(LeaseManagementConfig leaseManagementConfig) {
|
||||||
|
ConfigurationSettableUtils.resolveFields(
|
||||||
|
this.gracefulLeaseHandoffConfigBean, leaseManagementConfig.gracefulLeaseHandoffConfig());
|
||||||
|
ConfigurationSettableUtils.resolveFields(
|
||||||
|
this.workerUtilizationAwareAssignmentConfigBean,
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
|
||||||
|
ConfigurationSettableUtils.resolveFields(
|
||||||
|
this.workerMetricStatsTableConfigBean,
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig());
|
||||||
|
}
|
||||||
|
|
||||||
private Object adjustKinesisHttpConfiguration(Object builderObj) {
|
private Object adjustKinesisHttpConfiguration(Object builderObj) {
|
||||||
if (builderObj instanceof KinesisAsyncClientBuilder) {
|
if (builderObj instanceof KinesisAsyncClientBuilder) {
|
||||||
KinesisAsyncClientBuilder builder = (KinesisAsyncClientBuilder) builderObj;
|
KinesisAsyncClientBuilder builder = (KinesisAsyncClientBuilder) builderObj;
|
||||||
|
|
@ -448,6 +506,8 @@ public class MultiLangDaemonConfiguration {
|
||||||
processorConfig,
|
processorConfig,
|
||||||
retrievalConfig);
|
retrievalConfig);
|
||||||
|
|
||||||
|
handleCoordinatorConfig(coordinatorConfig);
|
||||||
|
handleLeaseManagementConfig(leaseManagementConfig);
|
||||||
handleRetrievalConfig(retrievalConfig, configsBuilder);
|
handleRetrievalConfig(retrievalConfig, configsBuilder);
|
||||||
|
|
||||||
resolveFields(configObjects, null, new HashSet<>(Arrays.asList(ConfigsBuilder.class, PollingConfig.class)));
|
resolveFields(configObjects, null, new HashSet<>(Arrays.asList(ConfigsBuilder.class, PollingConfig.class)));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
public class WorkerMetricStatsTableConfigBean {
|
||||||
|
|
||||||
|
interface WorkerMetricsTableConfigBeanDelegate {
|
||||||
|
String getWorkerMetricsTableName();
|
||||||
|
|
||||||
|
void setWorkerMetricsTableName(String value);
|
||||||
|
|
||||||
|
BillingMode getWorkerMetricsBillingMode();
|
||||||
|
|
||||||
|
void setWorkerMetricsBillingMode(BillingMode value);
|
||||||
|
|
||||||
|
long getWorkerMetricsReadCapacity();
|
||||||
|
|
||||||
|
void setWorkerMetricsReadCapacity(long value);
|
||||||
|
|
||||||
|
long getWorkerMetricsWriteCapacity();
|
||||||
|
|
||||||
|
void setWorkerMetricsWriteCapacity(long value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "tableName")
|
||||||
|
private String workerMetricsTableName;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "billingMode")
|
||||||
|
private BillingMode workerMetricsBillingMode;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "readCapacity")
|
||||||
|
private long workerMetricsReadCapacity;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "writeCapacity")
|
||||||
|
private long workerMetricsWriteCapacity;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
public class WorkerUtilizationAwareAssignmentConfigBean {
|
||||||
|
|
||||||
|
interface WorkerUtilizationAwareAssignmentConfigBeanDelegate {
|
||||||
|
long getInMemoryWorkerMetricsCaptureFrequencyMillis();
|
||||||
|
|
||||||
|
void setInMemoryWorkerMetricsCaptureFrequencyMillis(long value);
|
||||||
|
|
||||||
|
long getWorkerMetricsReporterFreqInMillis();
|
||||||
|
|
||||||
|
void setWorkerMetricsReporterFreqInMillis(long value);
|
||||||
|
|
||||||
|
int getNoOfPersistedMetricsPerWorkerMetrics();
|
||||||
|
|
||||||
|
void setNoOfPersistedMetricsPerWorkerMetrics(int value);
|
||||||
|
|
||||||
|
Boolean getDisableWorkerMetrics();
|
||||||
|
|
||||||
|
void setDisableWorkerMetrics(Boolean value);
|
||||||
|
|
||||||
|
double getMaxThroughputPerHostKBps();
|
||||||
|
|
||||||
|
void setMaxThroughputPerHostKBps(double value);
|
||||||
|
|
||||||
|
int getDampeningPercentage();
|
||||||
|
|
||||||
|
void setDampeningPercentage(int value);
|
||||||
|
|
||||||
|
int getReBalanceThresholdPercentage();
|
||||||
|
|
||||||
|
void setReBalanceThresholdPercentage(int value);
|
||||||
|
|
||||||
|
Boolean getAllowThroughputOvershoot();
|
||||||
|
|
||||||
|
void setAllowThroughputOvershoot(Boolean value);
|
||||||
|
|
||||||
|
int getVarianceBalancingFrequency();
|
||||||
|
|
||||||
|
void setVarianceBalancingFrequency(int value);
|
||||||
|
|
||||||
|
double getWorkerMetricsEMAAlpha();
|
||||||
|
|
||||||
|
void setWorkerMetricsEMAAlpha(double value);
|
||||||
|
|
||||||
|
void setStaleWorkerMetricsEntryCleanupDuration(Duration value);
|
||||||
|
|
||||||
|
Duration getStaleWorkerMetricsEntryCleanupDuration();
|
||||||
|
}
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private long inMemoryWorkerMetricsCaptureFrequencyMillis;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private long workerMetricsReporterFreqInMillis;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private int noOfPersistedMetricsPerWorkerMetrics;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private Boolean disableWorkerMetrics;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private double maxThroughputPerHostKBps;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private int dampeningPercentage;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private int reBalanceThresholdPercentage;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private Boolean allowThroughputOvershoot;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private int varianceBalancingFrequency;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private double workerMetricsEMAAlpha;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||||
|
private Duration staleWorkerMetricsEntryCleanupDuration;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
package software.amazon.kinesis.multilang.config.converter;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
import org.apache.commons.beanutils.Converter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converter that converts Duration text representation to a Duration object.
|
||||||
|
* Refer to {@code Duration.parse} javadocs for the exact text representation.
|
||||||
|
*/
|
||||||
|
public class DurationConverter implements Converter {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public <T> T convert(Class<T> type, Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type != Duration.class) {
|
||||||
|
throw new ConversionException("Can only convert to Duration");
|
||||||
|
}
|
||||||
|
|
||||||
|
String durationString = value.toString().trim();
|
||||||
|
final Duration duration = Duration.parse(durationString);
|
||||||
|
if (duration.isNegative()) {
|
||||||
|
throw new ConversionException("Negative values are not permitted for duration: " + durationString);
|
||||||
|
}
|
||||||
|
|
||||||
|
return type.cast(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ConversionException extends RuntimeException {
|
||||||
|
public ConversionException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,52 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package software.amazon.kinesis.multilang.config.credentials;
|
|
||||||
|
|
||||||
import com.amazonaws.auth.AWSCredentials;
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
||||||
import com.amazonaws.auth.AWSSessionCredentials;
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
|
||||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
|
||||||
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
|
|
||||||
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
public class V2CredentialWrapper implements AwsCredentialsProvider {
|
|
||||||
|
|
||||||
private final AWSCredentialsProvider oldCredentialsProvider;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AwsCredentials resolveCredentials() {
|
|
||||||
AWSCredentials current = oldCredentialsProvider.getCredentials();
|
|
||||||
if (current instanceof AWSSessionCredentials) {
|
|
||||||
return AwsSessionCredentials.create(
|
|
||||||
current.getAWSAccessKeyId(),
|
|
||||||
current.getAWSSecretKey(),
|
|
||||||
((AWSSessionCredentials) current).getSessionToken());
|
|
||||||
}
|
|
||||||
return new AwsCredentials() {
|
|
||||||
@Override
|
|
||||||
public String accessKeyId() {
|
|
||||||
return current.getAWSAccessKeyId();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String secretAccessKey() {
|
|
||||||
return current.getAWSSecretKey();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -65,7 +65,7 @@ public class MultiLangDaemonConfigTest {
|
||||||
String properties = String.format(
|
String properties = String.format(
|
||||||
"executableName = %s\n"
|
"executableName = %s\n"
|
||||||
+ "applicationName = %s\n"
|
+ "applicationName = %s\n"
|
||||||
+ "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n"
|
+ "AwsCredentialsProvider = DefaultCredentialsProvider\n"
|
||||||
+ "processingLanguage = malbolge\n"
|
+ "processingLanguage = malbolge\n"
|
||||||
+ "regionName = %s\n",
|
+ "regionName = %s\n",
|
||||||
EXE, APPLICATION_NAME, "us-east-1");
|
EXE, APPLICATION_NAME, "us-east-1");
|
||||||
|
|
@ -182,7 +182,7 @@ public class MultiLangDaemonConfigTest {
|
||||||
@Test
|
@Test
|
||||||
public void testPropertyValidation() {
|
public void testPropertyValidation() {
|
||||||
String propertiesNoExecutableName = "applicationName = testApp \n" + "streamName = fakeStream \n"
|
String propertiesNoExecutableName = "applicationName = testApp \n" + "streamName = fakeStream \n"
|
||||||
+ "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n" + "processingLanguage = malbolge";
|
+ "AwsCredentialsProvider = DefaultCredentialsProvider\n" + "processingLanguage = malbolge";
|
||||||
ClassLoader classLoader = Mockito.mock(ClassLoader.class);
|
ClassLoader classLoader = Mockito.mock(ClassLoader.class);
|
||||||
|
|
||||||
Mockito.doReturn(new ByteArrayInputStream(propertiesNoExecutableName.getBytes()))
|
Mockito.doReturn(new ByteArrayInputStream(propertiesNoExecutableName.getBytes()))
|
||||||
|
|
|
||||||
|
|
@ -157,7 +157,7 @@ public class MultiLangDaemonTest {
|
||||||
|
|
||||||
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
||||||
|
|
||||||
daemon.propertiesFile(arguments);
|
daemon.validateAndGetPropertiesFileName(arguments);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -166,7 +166,7 @@ public class MultiLangDaemonTest {
|
||||||
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
||||||
arguments.parameters = Collections.singletonList(expectedPropertiesFile);
|
arguments.parameters = Collections.singletonList(expectedPropertiesFile);
|
||||||
|
|
||||||
String propertiesFile = daemon.propertiesFile(arguments);
|
String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments);
|
||||||
|
|
||||||
assertThat(propertiesFile, equalTo(expectedPropertiesFile));
|
assertThat(propertiesFile, equalTo(expectedPropertiesFile));
|
||||||
}
|
}
|
||||||
|
|
@ -180,7 +180,7 @@ public class MultiLangDaemonTest {
|
||||||
arguments.parameters = Collections.singletonList(propertiesArgument);
|
arguments.parameters = Collections.singletonList(propertiesArgument);
|
||||||
arguments.propertiesFile = propertiesOptions;
|
arguments.propertiesFile = propertiesOptions;
|
||||||
|
|
||||||
String propertiesFile = daemon.propertiesFile(arguments);
|
String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments);
|
||||||
|
|
||||||
assertThat(propertiesFile, equalTo(propertiesOptions));
|
assertThat(propertiesFile, equalTo(propertiesOptions));
|
||||||
}
|
}
|
||||||
|
|
@ -193,7 +193,7 @@ public class MultiLangDaemonTest {
|
||||||
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
||||||
arguments.parameters = Arrays.asList("parameter1", "parameter2");
|
arguments.parameters = Arrays.asList("parameter1", "parameter2");
|
||||||
|
|
||||||
daemon.propertiesFile(arguments);
|
daemon.validateAndGetPropertiesFileName(arguments);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
|
|
@ -14,11 +14,11 @@
|
||||||
*/
|
*/
|
||||||
package software.amazon.kinesis.multilang;
|
package software.amazon.kinesis.multilang;
|
||||||
|
|
||||||
import com.amazonaws.regions.Regions;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
import org.mockito.runners.MockitoJUnitRunner;
|
import org.mockito.runners.MockitoJUnitRunner;
|
||||||
|
import software.amazon.awssdk.regions.Region;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
|
|
@ -64,9 +64,9 @@ public class NestedPropertyKeyTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEndpointRegion() {
|
public void testEndpointRegion() {
|
||||||
final Regions expectedRegion = Regions.GovCloud;
|
final Region expectedRegion = Region.US_GOV_WEST_1;
|
||||||
|
|
||||||
parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.getName()));
|
parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.id()));
|
||||||
verify(mockProcessor).acceptEndpointRegion(expectedRegion);
|
verify(mockProcessor).acceptEndpointRegion(expectedRegion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,15 +31,14 @@ public class KclSTSAssumeRoleSessionCredentialsProviderTest {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testConstructorWithoutOptionalParams() {
|
public void testConstructorWithoutOptionalParams() {
|
||||||
new KclSTSAssumeRoleSessionCredentialsProvider(new String[] {ARN, SESSION_NAME});
|
new KclStsAssumeRoleCredentialsProvider(new String[] {ARN, SESSION_NAME});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAcceptEndpoint() {
|
public void testAcceptEndpoint() {
|
||||||
// discovered exception during e2e testing; therefore, this test is
|
// discovered exception during e2e testing; therefore, this test is
|
||||||
// to simply verify the constructed STS client doesn't go *boom*
|
// to simply verify the constructed STS client doesn't go *boom*
|
||||||
final KclSTSAssumeRoleSessionCredentialsProvider provider =
|
final KclStsAssumeRoleCredentialsProvider provider = new KclStsAssumeRoleCredentialsProvider(ARN, SESSION_NAME);
|
||||||
new KclSTSAssumeRoleSessionCredentialsProvider(ARN, SESSION_NAME);
|
|
||||||
provider.acceptEndpoint("endpoint", "us-east-1");
|
provider.acceptEndpoint("endpoint", "us-east-1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -53,7 +52,7 @@ public class KclSTSAssumeRoleSessionCredentialsProviderTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class VarArgsSpy extends KclSTSAssumeRoleSessionCredentialsProvider {
|
private static class VarArgsSpy extends KclStsAssumeRoleCredentialsProvider {
|
||||||
|
|
||||||
private String externalId;
|
private String externalId;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,16 +16,17 @@ package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import com.amazonaws.auth.AWSCredentials;
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProviderChain;
|
|
||||||
import com.amazonaws.auth.BasicAWSCredentials;
|
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
import org.hamcrest.Description;
|
import org.hamcrest.Description;
|
||||||
import org.hamcrest.Matcher;
|
import org.hamcrest.Matcher;
|
||||||
import org.hamcrest.TypeSafeDiagnosingMatcher;
|
import org.hamcrest.TypeSafeDiagnosingMatcher;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import software.amazon.kinesis.multilang.auth.KclSTSAssumeRoleSessionCredentialsProvider;
|
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain;
|
||||||
|
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
|
||||||
|
import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider;
|
||||||
|
|
||||||
import static org.hamcrest.CoreMatchers.equalTo;
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||||
|
|
@ -33,31 +34,32 @@ import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
|
|
||||||
public class AWSCredentialsProviderPropertyValueDecoderTest {
|
public class AwsCredentialsProviderPropertyValueDecoderTest {
|
||||||
|
|
||||||
private static final String TEST_ACCESS_KEY_ID = "123";
|
private static final String TEST_ACCESS_KEY_ID = "123";
|
||||||
private static final String TEST_SECRET_KEY = "456";
|
private static final String TEST_SECRET_KEY = "456";
|
||||||
|
|
||||||
private final String credentialName1 = AlwaysSucceedCredentialsProvider.class.getName();
|
private final String credentialName1 = AlwaysSucceedCredentialsProvider.class.getName();
|
||||||
private final String credentialName2 = ConstructorCredentialsProvider.class.getName();
|
private final String credentialName2 = ConstructorCredentialsProvider.class.getName();
|
||||||
private final AWSCredentialsProviderPropertyValueDecoder decoder = new AWSCredentialsProviderPropertyValueDecoder();
|
private final String createCredentialClass = CreateProvider.class.getName();
|
||||||
|
private final AwsCredentialsProviderPropertyValueDecoder decoder = new AwsCredentialsProviderPropertyValueDecoder();
|
||||||
|
|
||||||
@ToString
|
@ToString
|
||||||
private static class AWSCredentialsMatcher extends TypeSafeDiagnosingMatcher<AWSCredentialsProvider> {
|
private static class AwsCredentialsMatcher extends TypeSafeDiagnosingMatcher<AwsCredentialsProvider> {
|
||||||
|
|
||||||
private final Matcher<String> akidMatcher;
|
private final Matcher<String> akidMatcher;
|
||||||
private final Matcher<String> secretMatcher;
|
private final Matcher<String> secretMatcher;
|
||||||
private final Matcher<Class<?>> classMatcher;
|
private final Matcher<Class<?>> classMatcher;
|
||||||
|
|
||||||
public AWSCredentialsMatcher(String akid, String secret) {
|
public AwsCredentialsMatcher(String akid, String secret) {
|
||||||
this.akidMatcher = equalTo(akid);
|
this.akidMatcher = equalTo(akid);
|
||||||
this.secretMatcher = equalTo(secret);
|
this.secretMatcher = equalTo(secret);
|
||||||
this.classMatcher = instanceOf(AWSCredentialsProviderChain.class);
|
this.classMatcher = instanceOf(AwsCredentialsProviderChain.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatchDescription) {
|
protected boolean matchesSafely(AwsCredentialsProvider item, Description mismatchDescription) {
|
||||||
AWSCredentials actual = item.getCredentials();
|
AwsCredentials actual = item.resolveCredentials();
|
||||||
boolean matched = true;
|
boolean matched = true;
|
||||||
|
|
||||||
if (!classMatcher.matches(item)) {
|
if (!classMatcher.matches(item)) {
|
||||||
|
|
@ -65,12 +67,12 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||||
matched = false;
|
matched = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!akidMatcher.matches(actual.getAWSAccessKeyId())) {
|
if (!akidMatcher.matches(actual.accessKeyId())) {
|
||||||
akidMatcher.describeMismatch(actual.getAWSAccessKeyId(), mismatchDescription);
|
akidMatcher.describeMismatch(actual.accessKeyId(), mismatchDescription);
|
||||||
matched = false;
|
matched = false;
|
||||||
}
|
}
|
||||||
if (!secretMatcher.matches(actual.getAWSSecretKey())) {
|
if (!secretMatcher.matches(actual.secretAccessKey())) {
|
||||||
secretMatcher.describeMismatch(actual.getAWSSecretKey(), mismatchDescription);
|
secretMatcher.describeMismatch(actual.secretAccessKey(), mismatchDescription);
|
||||||
matched = false;
|
matched = false;
|
||||||
}
|
}
|
||||||
return matched;
|
return matched;
|
||||||
|
|
@ -79,36 +81,36 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||||
@Override
|
@Override
|
||||||
public void describeTo(Description description) {
|
public void describeTo(Description description) {
|
||||||
description
|
description
|
||||||
.appendText("An AWSCredentialsProvider that provides an AWSCredential matching: ")
|
.appendText("An AwsCredentialsProvider that provides an AwsCredential matching: ")
|
||||||
.appendList("(", ", ", ")", Arrays.asList(classMatcher, akidMatcher, secretMatcher));
|
.appendList("(", ", ", ")", Arrays.asList(classMatcher, akidMatcher, secretMatcher));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static AWSCredentialsMatcher hasCredentials(String akid, String secret) {
|
private static AwsCredentialsMatcher hasCredentials(String akid, String secret) {
|
||||||
return new AWSCredentialsMatcher(akid, secret);
|
return new AwsCredentialsMatcher(akid, secret);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSingleProvider() {
|
public void testSingleProvider() {
|
||||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName1);
|
AwsCredentialsProvider provider = decoder.decodeValue(credentialName1);
|
||||||
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTwoProviders() {
|
public void testTwoProviders() {
|
||||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1);
|
AwsCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1);
|
||||||
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testProfileProviderWithOneArg() {
|
public void testProfileProviderWithOneArg() {
|
||||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg");
|
AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg");
|
||||||
assertThat(provider, hasCredentials("arg", "blank"));
|
assertThat(provider, hasCredentials("arg", "blank"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testProfileProviderWithTwoArgs() {
|
public void testProfileProviderWithTwoArgs() {
|
||||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2");
|
AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2");
|
||||||
assertThat(provider, hasCredentials("arg1", "arg2"));
|
assertThat(provider, hasCredentials("arg1", "arg2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -118,14 +120,33 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||||
@Test
|
@Test
|
||||||
public void testKclAuthProvider() {
|
public void testKclAuthProvider() {
|
||||||
for (final String className : Arrays.asList(
|
for (final String className : Arrays.asList(
|
||||||
KclSTSAssumeRoleSessionCredentialsProvider.class.getName(), // fully-qualified name
|
KclStsAssumeRoleCredentialsProvider.class.getName(), // fully-qualified name
|
||||||
KclSTSAssumeRoleSessionCredentialsProvider.class.getSimpleName() // name-only; needs prefix
|
KclStsAssumeRoleCredentialsProvider.class.getSimpleName(), // name-only; needs prefix
|
||||||
)) {
|
StsAssumeRoleCredentialsProvider.class.getName(), // user passes full sts package path
|
||||||
final AWSCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName");
|
StsAssumeRoleCredentialsProvider.class.getSimpleName())) {
|
||||||
|
final AwsCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName");
|
||||||
assertNotNull(className, provider);
|
assertNotNull(className, provider);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that OneArgCreateProvider in the SDK v2 can process a create() method
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testEmptyCreateProvider() {
|
||||||
|
AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass);
|
||||||
|
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that OneArgCreateProvider in the SDK v2 can process a create(arg1) method
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testOneArgCreateProvider() {
|
||||||
|
AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass + "|testCreateProperty");
|
||||||
|
assertThat(provider, hasCredentials("testCreateProperty", TEST_SECRET_KEY));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that a provider can be instantiated by its varargs constructor.
|
* Test that a provider can be instantiated by its varargs constructor.
|
||||||
*/
|
*/
|
||||||
|
|
@ -135,28 +156,24 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||||
final String className = VarArgCredentialsProvider.class.getName();
|
final String className = VarArgCredentialsProvider.class.getName();
|
||||||
final String encodedValue = className + "|" + String.join("|", args);
|
final String encodedValue = className + "|" + String.join("|", args);
|
||||||
|
|
||||||
final AWSCredentialsProvider provider = decoder.decodeValue(encodedValue);
|
final AwsCredentialsProvider provider = decoder.decodeValue(encodedValue);
|
||||||
assertEquals(Arrays.toString(args), provider.getCredentials().getAWSAccessKeyId());
|
assertEquals(Arrays.toString(args), provider.resolveCredentials().accessKeyId());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This credentials provider will always succeed
|
* This credentials provider will always succeed
|
||||||
*/
|
*/
|
||||||
public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider {
|
public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
return new BasicAWSCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY);
|
return AwsBasicCredentials.create(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This credentials provider needs a constructor call to instantiate it
|
* This credentials provider needs a constructor call to instantiate it
|
||||||
*/
|
*/
|
||||||
public static class ConstructorCredentialsProvider implements AWSCredentialsProvider {
|
public static class ConstructorCredentialsProvider implements AwsCredentialsProvider {
|
||||||
|
|
||||||
private String arg1;
|
private String arg1;
|
||||||
private String arg2;
|
private String arg2;
|
||||||
|
|
@ -172,15 +189,12 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
return new BasicAWSCredentials(arg1, arg2);
|
return AwsBasicCredentials.create(arg1, arg2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private static class VarArgCredentialsProvider implements AwsCredentialsProvider {
|
||||||
public void refresh() {}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class VarArgCredentialsProvider implements AWSCredentialsProvider {
|
|
||||||
|
|
||||||
private final String[] args;
|
private final String[] args;
|
||||||
|
|
||||||
|
|
@ -189,13 +203,34 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
// KISS solution to surface the constructor args
|
// KISS solution to surface the constructor args
|
||||||
final String flattenedArgs = Arrays.toString(args);
|
final String flattenedArgs = Arrays.toString(args);
|
||||||
return new BasicAWSCredentials(flattenedArgs, flattenedArgs);
|
return AwsBasicCredentials.create(flattenedArgs, flattenedArgs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Credentials provider to test AWS SDK v2 create() methods for providers like ProfileCredentialsProvider
|
||||||
|
*/
|
||||||
|
public static class CreateProvider implements AwsCredentialsProvider {
|
||||||
|
private String accessKeyId;
|
||||||
|
|
||||||
|
private CreateProvider(String accessKeyId) {
|
||||||
|
this.accessKeyId = accessKeyId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CreateProvider create() {
|
||||||
|
return new CreateProvider(TEST_ACCESS_KEY_ID);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CreateProvider create(String accessKeyId) {
|
||||||
|
return new CreateProvider(accessKeyId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void refresh() {}
|
public AwsCredentials resolveCredentials() {
|
||||||
|
return AwsBasicCredentials.create(accessKeyId, TEST_SECRET_KEY);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -52,6 +52,16 @@ public class ConfigurationSettableUtilsTest {
|
||||||
assertThat(actual, equalTo(expected));
|
assertThat(actual, equalTo(expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBoolean() {
|
||||||
|
ConfigResult expected = ConfigResult.builder().bool(false).build();
|
||||||
|
|
||||||
|
ConfigObject configObject = ConfigObject.builder().bool(expected.bool).build();
|
||||||
|
ConfigResult actual = resolve(configObject);
|
||||||
|
|
||||||
|
assertThat(actual, equalTo(expected));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHeapValuesSet() {
|
public void testHeapValuesSet() {
|
||||||
ConfigResult expected =
|
ConfigResult expected =
|
||||||
|
|
@ -147,6 +157,9 @@ public class ConfigurationSettableUtilsTest {
|
||||||
private Long boxedLong;
|
private Long boxedLong;
|
||||||
private ComplexValue complexValue;
|
private ComplexValue complexValue;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
private Boolean bool = true;
|
||||||
|
|
||||||
private Optional<String> optionalString;
|
private Optional<String> optionalString;
|
||||||
private Optional<Integer> optionalInteger;
|
private Optional<Integer> optionalInteger;
|
||||||
private Optional<Long> optionalLong;
|
private Optional<Long> optionalLong;
|
||||||
|
|
@ -175,6 +188,10 @@ public class ConfigurationSettableUtilsTest {
|
||||||
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
||||||
private int rawInt;
|
private int rawInt;
|
||||||
|
|
||||||
|
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
||||||
|
@Builder.Default
|
||||||
|
private Boolean bool = true;
|
||||||
|
|
||||||
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
||||||
private Integer boxedInt;
|
private Integer boxedInt;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,19 +20,21 @@ import java.net.URI;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import com.amazonaws.auth.AWSCredentials;
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
||||||
import com.amazonaws.auth.BasicAWSCredentials;
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.mockito.runners.MockitoJUnitRunner;
|
import org.mockito.runners.MockitoJUnitRunner;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
|
||||||
|
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
import software.amazon.kinesis.common.InitialPositionInStream;
|
import software.amazon.kinesis.common.InitialPositionInStream;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig;
|
||||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
|
||||||
import static org.hamcrest.CoreMatchers.equalTo;
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
|
|
@ -40,6 +42,7 @@ import static org.hamcrest.CoreMatchers.nullValue;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
@ -60,7 +63,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"workerId = 123"
|
"workerId = 123"
|
||||||
},
|
},
|
||||||
'\n'));
|
'\n'));
|
||||||
|
|
@ -69,6 +72,8 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
assertEquals(config.getWorkerIdentifier(), "123");
|
assertEquals(config.getWorkerIdentifier(), "123");
|
||||||
assertThat(config.getMaxGetRecordsThreadPool(), nullValue());
|
assertThat(config.getMaxGetRecordsThreadPool(), nullValue());
|
||||||
assertThat(config.getRetryGetRecordsInSeconds(), nullValue());
|
assertThat(config.getRetryGetRecordsInSeconds(), nullValue());
|
||||||
|
assertNull(config.getGracefulLeaseHandoffTimeoutMillis());
|
||||||
|
assertNull(config.getIsGracefulLeaseHandoffEnabled());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -77,7 +82,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"applicationName = app",
|
"applicationName = app",
|
||||||
"streamName = 123",
|
"streamName = 123",
|
||||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"failoverTimeMillis = 100",
|
"failoverTimeMillis = 100",
|
||||||
"shardSyncIntervalMillis = 500"
|
"shardSyncIntervalMillis = 500"
|
||||||
|
|
@ -98,7 +103,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"applicationName = app",
|
"applicationName = app",
|
||||||
"streamName = 123",
|
"streamName = 123",
|
||||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
"initialPositionInStreamExtended = " + epochTimeInSeconds
|
"initialPositionInStreamExtended = " + epochTimeInSeconds
|
||||||
},
|
},
|
||||||
'\n'));
|
'\n'));
|
||||||
|
|
@ -116,7 +121,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"applicationName = app",
|
"applicationName = app",
|
||||||
"streamName = 123",
|
"streamName = 123",
|
||||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
"initialPositionInStream = AT_TIMESTAMP"
|
"initialPositionInStream = AT_TIMESTAMP"
|
||||||
},
|
},
|
||||||
'\n'));
|
'\n'));
|
||||||
|
|
@ -136,7 +141,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"applicationName = app",
|
"applicationName = app",
|
||||||
"streamName = 123",
|
"streamName = 123",
|
||||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
"initialPositionInStreamExtended = null"
|
"initialPositionInStreamExtended = null"
|
||||||
},
|
},
|
||||||
'\n'));
|
'\n'));
|
||||||
|
|
@ -147,11 +152,156 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGracefulLeaseHandoffConfig() {
|
||||||
|
final Long testGracefulLeaseHandoffTimeoutMillis = 12345L;
|
||||||
|
final boolean testGracefulLeaseHandoffEnabled = true;
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
|
new String[] {
|
||||||
|
"applicationName = dummyApplicationName",
|
||||||
|
"streamName = dummyStreamName",
|
||||||
|
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
|
"gracefulLeaseHandoffTimeoutMillis = " + testGracefulLeaseHandoffTimeoutMillis,
|
||||||
|
"isGracefulLeaseHandoffEnabled = " + testGracefulLeaseHandoffEnabled
|
||||||
|
},
|
||||||
|
'\n'));
|
||||||
|
|
||||||
|
assertEquals(testGracefulLeaseHandoffTimeoutMillis, config.getGracefulLeaseHandoffTimeoutMillis());
|
||||||
|
assertEquals(testGracefulLeaseHandoffEnabled, config.getIsGracefulLeaseHandoffEnabled());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClientVersionConfig() {
|
||||||
|
final CoordinatorConfig.ClientVersionConfig testClientVersionConfig = Arrays.stream(
|
||||||
|
CoordinatorConfig.ClientVersionConfig.values())
|
||||||
|
.findAny()
|
||||||
|
.orElseThrow(NoSuchElementException::new);
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
|
new String[] {
|
||||||
|
"applicationName = dummyApplicationName",
|
||||||
|
"streamName = dummyStreamName",
|
||||||
|
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
|
"clientVersionConfig = " + testClientVersionConfig.name()
|
||||||
|
},
|
||||||
|
'\n'));
|
||||||
|
|
||||||
|
assertEquals(testClientVersionConfig, config.getClientVersionConfig());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCoordinatorStateConfig() {
|
||||||
|
final String testCoordinatorStateTableName = "CoordState";
|
||||||
|
final BillingMode testCoordinatorStateBillingMode = BillingMode.PAY_PER_REQUEST;
|
||||||
|
final long testCoordinatorStateReadCapacity = 123;
|
||||||
|
final long testCoordinatorStateWriteCapacity = 123;
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
|
new String[] {
|
||||||
|
"applicationName = dummyApplicationName",
|
||||||
|
"streamName = dummyStreamName",
|
||||||
|
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
|
"coordinatorStateTableName = " + testCoordinatorStateTableName,
|
||||||
|
"coordinatorStateBillingMode = " + testCoordinatorStateBillingMode.name(),
|
||||||
|
"coordinatorStateReadCapacity = " + testCoordinatorStateReadCapacity,
|
||||||
|
"coordinatorStateWriteCapacity = " + testCoordinatorStateWriteCapacity
|
||||||
|
},
|
||||||
|
'\n'));
|
||||||
|
|
||||||
|
assertEquals(testCoordinatorStateTableName, config.getCoordinatorStateTableName());
|
||||||
|
assertEquals(testCoordinatorStateBillingMode, config.getCoordinatorStateBillingMode());
|
||||||
|
assertEquals(testCoordinatorStateReadCapacity, config.getCoordinatorStateReadCapacity());
|
||||||
|
assertEquals(testCoordinatorStateWriteCapacity, config.getCoordinatorStateWriteCapacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWorkerUtilizationAwareAssignmentConfig() {
|
||||||
|
final long testInMemoryWorkerMetricsCaptureFrequencyMillis = 123;
|
||||||
|
final long testWorkerMetricsReporterFreqInMillis = 123;
|
||||||
|
final long testNoOfPersistedMetricsPerWorkerMetrics = 123;
|
||||||
|
final Boolean testDisableWorkerMetrics = true;
|
||||||
|
final double testMaxThroughputPerHostKBps = 123;
|
||||||
|
final long testDampeningPercentage = 12;
|
||||||
|
final long testReBalanceThresholdPercentage = 12;
|
||||||
|
final Boolean testAllowThroughputOvershoot = false;
|
||||||
|
final long testVarianceBalancingFrequency = 12;
|
||||||
|
final double testWorkerMetricsEMAAlpha = .123;
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
|
new String[] {
|
||||||
|
"applicationName = dummyApplicationName",
|
||||||
|
"streamName = dummyStreamName",
|
||||||
|
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
|
"inMemoryWorkerMetricsCaptureFrequencyMillis = " + testInMemoryWorkerMetricsCaptureFrequencyMillis,
|
||||||
|
"workerMetricsReporterFreqInMillis = " + testWorkerMetricsReporterFreqInMillis,
|
||||||
|
"noOfPersistedMetricsPerWorkerMetrics = " + testNoOfPersistedMetricsPerWorkerMetrics,
|
||||||
|
"disableWorkerMetrics = " + testDisableWorkerMetrics,
|
||||||
|
"maxThroughputPerHostKBps = " + testMaxThroughputPerHostKBps,
|
||||||
|
"dampeningPercentage = " + testDampeningPercentage,
|
||||||
|
"reBalanceThresholdPercentage = " + testReBalanceThresholdPercentage,
|
||||||
|
"allowThroughputOvershoot = " + testAllowThroughputOvershoot,
|
||||||
|
"varianceBalancingFrequency = " + testVarianceBalancingFrequency,
|
||||||
|
"workerMetricsEMAAlpha = " + testWorkerMetricsEMAAlpha
|
||||||
|
},
|
||||||
|
'\n'));
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
testInMemoryWorkerMetricsCaptureFrequencyMillis,
|
||||||
|
config.getInMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||||
|
assertEquals(testWorkerMetricsReporterFreqInMillis, config.getWorkerMetricsReporterFreqInMillis());
|
||||||
|
assertEquals(testNoOfPersistedMetricsPerWorkerMetrics, config.getNoOfPersistedMetricsPerWorkerMetrics());
|
||||||
|
assertEquals(testDisableWorkerMetrics, config.getDisableWorkerMetrics());
|
||||||
|
assertEquals(testMaxThroughputPerHostKBps, config.getMaxThroughputPerHostKBps(), 0.0001);
|
||||||
|
assertEquals(testDampeningPercentage, config.getDampeningPercentage());
|
||||||
|
assertEquals(testReBalanceThresholdPercentage, config.getReBalanceThresholdPercentage());
|
||||||
|
assertEquals(testAllowThroughputOvershoot, config.getAllowThroughputOvershoot());
|
||||||
|
assertEquals(testVarianceBalancingFrequency, config.getVarianceBalancingFrequency());
|
||||||
|
assertEquals(testWorkerMetricsEMAAlpha, config.getWorkerMetricsEMAAlpha(), 0.0001);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWorkerMetricsConfig() {
|
||||||
|
final String testWorkerMetricsTableName = "CoordState";
|
||||||
|
final BillingMode testWorkerMetricsBillingMode = BillingMode.PROVISIONED;
|
||||||
|
final long testWorkerMetricsReadCapacity = 123;
|
||||||
|
final long testWorkerMetricsWriteCapacity = 123;
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
|
new String[] {
|
||||||
|
"applicationName = dummyApplicationName",
|
||||||
|
"streamName = dummyStreamName",
|
||||||
|
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
|
"workerMetricsTableName = " + testWorkerMetricsTableName,
|
||||||
|
"workerMetricsBillingMode = " + testWorkerMetricsBillingMode.name(),
|
||||||
|
"workerMetricsReadCapacity = " + testWorkerMetricsReadCapacity,
|
||||||
|
"workerMetricsWriteCapacity = " + testWorkerMetricsWriteCapacity
|
||||||
|
},
|
||||||
|
'\n'));
|
||||||
|
|
||||||
|
assertEquals(testWorkerMetricsTableName, config.getWorkerMetricsTableName());
|
||||||
|
assertEquals(testWorkerMetricsBillingMode, config.getWorkerMetricsBillingMode());
|
||||||
|
assertEquals(testWorkerMetricsReadCapacity, config.getWorkerMetricsReadCapacity());
|
||||||
|
assertEquals(testWorkerMetricsWriteCapacity, config.getWorkerMetricsWriteCapacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = IllegalArgumentException.class)
|
||||||
|
public void testInvalidClientVersionConfig() {
|
||||||
|
getConfiguration(StringUtils.join(
|
||||||
|
new String[] {
|
||||||
|
"applicationName = dummyApplicationName",
|
||||||
|
"streamName = dummyStreamName",
|
||||||
|
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
|
"clientVersionConfig = " + "invalid_client_version_config"
|
||||||
|
},
|
||||||
|
'\n'));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithUnsupportedClientConfigurationVariables() {
|
public void testWithUnsupportedClientConfigurationVariables() {
|
||||||
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||||
"workerId = id",
|
"workerId = id",
|
||||||
"kinesisClientConfig = {}",
|
"kinesisClientConfig = {}",
|
||||||
"streamName = stream",
|
"streamName = stream",
|
||||||
|
|
@ -170,7 +320,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = kinesis",
|
"streamName = kinesis",
|
||||||
"AWSCredentialsProvider = " + credentialName2 + ", " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName2 + ", " + credentialName1,
|
||||||
"workerId = w123",
|
"workerId = w123",
|
||||||
"maxRecords = 10",
|
"maxRecords = 10",
|
||||||
"metricsMaxQueueSize = 20",
|
"metricsMaxQueueSize = 20",
|
||||||
|
|
@ -195,7 +345,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD, " + credentialName1,
|
"AwsCredentialsProvider = ABCD, " + credentialName1,
|
||||||
"workerId = 0",
|
"workerId = 0",
|
||||||
"cleanupLeasesUponShardCompletion = false",
|
"cleanupLeasesUponShardCompletion = false",
|
||||||
"validateSequenceNumberBeforeCheckpointing = true"
|
"validateSequenceNumberBeforeCheckpointing = true"
|
||||||
|
|
@ -215,7 +365,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 1",
|
"workerId = 1",
|
||||||
"kinesisEndpoint = https://kinesis",
|
"kinesisEndpoint = https://kinesis",
|
||||||
"metricsLevel = SUMMARY"
|
"metricsLevel = SUMMARY"
|
||||||
|
|
@ -233,7 +383,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 1",
|
"workerId = 1",
|
||||||
"metricsEnabledDimensions = ShardId, WorkerIdentifier"
|
"metricsEnabledDimensions = ShardId, WorkerIdentifier"
|
||||||
},
|
},
|
||||||
|
|
@ -253,7 +403,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"initialPositionInStream = TriM_Horizon"
|
"initialPositionInStream = TriM_Horizon"
|
||||||
},
|
},
|
||||||
|
|
@ -268,7 +418,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"initialPositionInStream = LateSt"
|
"initialPositionInStream = LateSt"
|
||||||
},
|
},
|
||||||
|
|
@ -283,7 +433,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"initialPositionInStream = TriM_Horizon",
|
"initialPositionInStream = TriM_Horizon",
|
||||||
"abc = 1"
|
"abc = 1"
|
||||||
|
|
@ -302,7 +452,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"initialPositionInStream = TriM_Horizon",
|
"initialPositionInStream = TriM_Horizon",
|
||||||
"maxGetRecordsThreadPool = 1"
|
"maxGetRecordsThreadPool = 1"
|
||||||
|
|
@ -318,7 +468,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"initialPositionInStream = TriM_Horizon",
|
"initialPositionInStream = TriM_Horizon",
|
||||||
"maxGetRecordsThreadPool = 0",
|
"maxGetRecordsThreadPool = 0",
|
||||||
|
|
@ -334,7 +484,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"failoverTimeMillis = 100nf"
|
"failoverTimeMillis = 100nf"
|
||||||
},
|
},
|
||||||
|
|
@ -348,7 +498,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"failoverTimeMillis = -12"
|
"failoverTimeMillis = -12"
|
||||||
},
|
},
|
||||||
|
|
@ -380,7 +530,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"failoverTimeMillis = 100",
|
"failoverTimeMillis = 100",
|
||||||
"shardSyncIntervalMillis = 500"
|
"shardSyncIntervalMillis = 500"
|
||||||
},
|
},
|
||||||
|
|
@ -397,7 +547,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
String test = StringUtils.join(
|
String test = StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"failoverTimeMillis = 100"
|
"failoverTimeMillis = 100"
|
||||||
},
|
},
|
||||||
|
|
@ -410,7 +560,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
String test = StringUtils.join(
|
String test = StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"failoverTimeMillis = 100",
|
"failoverTimeMillis = 100",
|
||||||
"streamName = ",
|
"streamName = ",
|
||||||
|
|
@ -425,7 +575,7 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
String test = StringUtils.join(
|
String test = StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"AWSCredentialsProvider = " + credentialName1,
|
"AwsCredentialsProvider = " + credentialName1,
|
||||||
"workerId = 123",
|
"workerId = 123",
|
||||||
"failoverTimeMillis = 100"
|
"failoverTimeMillis = 100"
|
||||||
},
|
},
|
||||||
|
|
@ -434,12 +584,12 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithAWSCredentialsFailed() {
|
public void testWithAwsCredentialsFailed() {
|
||||||
String test = StringUtils.join(
|
String test = StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialName2,
|
"AwsCredentialsProvider = " + credentialName2,
|
||||||
"failoverTimeMillis = 100",
|
"failoverTimeMillis = 100",
|
||||||
"shardSyncIntervalMillis = 500"
|
"shardSyncIntervalMillis = 500"
|
||||||
},
|
},
|
||||||
|
|
@ -457,16 +607,44 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProcessKeyWithExpectedCasing() {
|
||||||
|
String key = "AwsCredentialsProvider";
|
||||||
|
String result = configurator.processKey(key);
|
||||||
|
assertEquals("awsCredentialsProvider", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProcessKeyWithOldCasing() {
|
||||||
|
String key = "AWSCredentialsProvider";
|
||||||
|
String result = configurator.processKey(key);
|
||||||
|
assertEquals("awsCredentialsProvider", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProcessKeyWithMixedCasing() {
|
||||||
|
String key = "AwScReDeNtIaLsPrOvIdEr";
|
||||||
|
String result = configurator.processKey(key);
|
||||||
|
assertEquals("awsCredentialsProvider", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProcessKeyWithSuffix() {
|
||||||
|
String key = "awscredentialsproviderDynamoDB";
|
||||||
|
String result = configurator.processKey(key);
|
||||||
|
assertEquals("awsCredentialsProviderDynamoDB", result);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: fix this test
|
// TODO: fix this test
|
||||||
@Test
|
@Test
|
||||||
public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatch() {
|
public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatch() {
|
||||||
String test = StringUtils.join(
|
String test = StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialNameKinesis,
|
"AwsCredentialsProvider = " + credentialNameKinesis,
|
||||||
"AWSCredentialsProviderDynamoDB = " + credentialNameDynamoDB,
|
"AwsCredentialsProviderDynamoDB = " + credentialNameDynamoDB,
|
||||||
"AWSCredentialsProviderCloudWatch = " + credentialNameCloudWatch,
|
"AwsCredentialsProviderCloudWatch = " + credentialNameCloudWatch,
|
||||||
"failoverTimeMillis = 100",
|
"failoverTimeMillis = 100",
|
||||||
"shardSyncIntervalMillis = 500"
|
"shardSyncIntervalMillis = 500"
|
||||||
},
|
},
|
||||||
|
|
@ -487,14 +665,14 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
|
|
||||||
// TODO: fix this test
|
// TODO: fix this test
|
||||||
@Test
|
@Test
|
||||||
public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatchFailed() {
|
public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatchFailed() {
|
||||||
String test = StringUtils.join(
|
String test = StringUtils.join(
|
||||||
new String[] {
|
new String[] {
|
||||||
"streamName = a",
|
"streamName = a",
|
||||||
"applicationName = b",
|
"applicationName = b",
|
||||||
"AWSCredentialsProvider = " + credentialNameKinesis,
|
"AwsCredentialsProvider = " + credentialNameKinesis,
|
||||||
"AWSCredentialsProviderDynamoDB = " + credentialName2,
|
"AwsCredentialsProviderDynamoDB = " + credentialName2,
|
||||||
"AWSCredentialsProviderCloudWatch = " + credentialName2,
|
"AwsCredentialsProviderCloudWatch = " + credentialName2,
|
||||||
"failoverTimeMillis = 100",
|
"failoverTimeMillis = 100",
|
||||||
"shardSyncIntervalMillis = 500"
|
"shardSyncIntervalMillis = 500"
|
||||||
},
|
},
|
||||||
|
|
@ -526,71 +704,52 @@ public class KinesisClientLibConfiguratorTest {
|
||||||
/**
|
/**
|
||||||
* This credentials provider will always succeed
|
* This credentials provider will always succeed
|
||||||
*/
|
*/
|
||||||
public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider {
|
public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
return new BasicAWSCredentials("a", "b");
|
return AwsBasicCredentials.create("a", "b");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This credentials provider will always succeed
|
* This credentials provider will always succeed
|
||||||
*/
|
*/
|
||||||
public static class AlwaysSucceedCredentialsProviderKinesis implements AWSCredentialsProvider {
|
public static class AlwaysSucceedCredentialsProviderKinesis implements AwsCredentialsProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
return new BasicAWSCredentials("", "");
|
return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This credentials provider will always succeed
|
* This credentials provider will always succeed
|
||||||
*/
|
*/
|
||||||
public static class AlwaysSucceedCredentialsProviderDynamoDB implements AWSCredentialsProvider {
|
public static class AlwaysSucceedCredentialsProviderDynamoDB implements AwsCredentialsProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
return new BasicAWSCredentials("", "");
|
return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This credentials provider will always succeed
|
* This credentials provider will always succeed
|
||||||
*/
|
*/
|
||||||
public static class AlwaysSucceedCredentialsProviderCloudWatch implements AWSCredentialsProvider {
|
public static class AlwaysSucceedCredentialsProviderCloudWatch implements AwsCredentialsProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
return new BasicAWSCredentials("", "");
|
return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This credentials provider will always fail
|
* This credentials provider will always fail
|
||||||
*/
|
*/
|
||||||
public static class AlwaysFailCredentialsProvider implements AWSCredentialsProvider {
|
public static class AlwaysFailCredentialsProvider implements AwsCredentialsProvider {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AWSCredentials getCredentials() {
|
public AwsCredentials resolveCredentials() {
|
||||||
throw new IllegalArgumentException();
|
throw new IllegalArgumentException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh() {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private MultiLangDaemonConfiguration getConfiguration(String configString) {
|
private MultiLangDaemonConfiguration getConfiguration(String configString) {
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,9 @@
|
||||||
|
|
||||||
package software.amazon.kinesis.multilang.config;
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
import org.apache.commons.beanutils.BeanUtilsBean;
|
import org.apache.commons.beanutils.BeanUtilsBean;
|
||||||
import org.apache.commons.beanutils.ConvertUtilsBean;
|
import org.apache.commons.beanutils.ConvertUtilsBean;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
|
@ -24,8 +27,16 @@ import org.junit.Test;
|
||||||
import org.junit.rules.ExpectedException;
|
import org.junit.rules.ExpectedException;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.mockito.Mock;
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.Mockito;
|
||||||
import org.mockito.runners.MockitoJUnitRunner;
|
import org.mockito.runners.MockitoJUnitRunner;
|
||||||
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
|
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||||
|
import software.amazon.kinesis.common.ConfigsBuilder;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||||
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
||||||
import software.amazon.kinesis.retrieval.fanout.FanOutConfig;
|
import software.amazon.kinesis.retrieval.fanout.FanOutConfig;
|
||||||
import software.amazon.kinesis.retrieval.polling.PollingConfig;
|
import software.amazon.kinesis.retrieval.polling.PollingConfig;
|
||||||
|
|
@ -34,6 +45,7 @@ import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotEquals;
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
|
@ -41,6 +53,8 @@ import static org.junit.Assert.assertTrue;
|
||||||
public class MultiLangDaemonConfigurationTest {
|
public class MultiLangDaemonConfigurationTest {
|
||||||
|
|
||||||
private static final String AWS_REGION_PROPERTY_NAME = "aws.region";
|
private static final String AWS_REGION_PROPERTY_NAME = "aws.region";
|
||||||
|
private static final String DUMMY_APPLICATION_NAME = "dummyApplicationName";
|
||||||
|
private static final String DUMMY_STREAM_NAME = "dummyStreamName";
|
||||||
|
|
||||||
private BeanUtilsBean utilsBean;
|
private BeanUtilsBean utilsBean;
|
||||||
private ConvertUtilsBean convertUtilsBean;
|
private ConvertUtilsBean convertUtilsBean;
|
||||||
|
|
@ -71,8 +85,8 @@ public class MultiLangDaemonConfigurationTest {
|
||||||
|
|
||||||
public MultiLangDaemonConfiguration baseConfiguration() {
|
public MultiLangDaemonConfiguration baseConfiguration() {
|
||||||
MultiLangDaemonConfiguration configuration = new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
|
MultiLangDaemonConfiguration configuration = new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
|
||||||
configuration.setApplicationName("Test");
|
configuration.setApplicationName(DUMMY_APPLICATION_NAME);
|
||||||
configuration.setStreamName("Test");
|
configuration.setStreamName(DUMMY_STREAM_NAME);
|
||||||
configuration.getKinesisCredentialsProvider().set("class", DefaultCredentialsProvider.class.getName());
|
configuration.getKinesisCredentialsProvider().set("class", DefaultCredentialsProvider.class.getName());
|
||||||
|
|
||||||
return configuration;
|
return configuration;
|
||||||
|
|
@ -111,6 +125,197 @@ public class MultiLangDaemonConfigurationTest {
|
||||||
assertTrue(resolvedConfiguration.leaseManagementConfig.leaseTableDeletionProtectionEnabled());
|
assertTrue(resolvedConfiguration.leaseManagementConfig.leaseTableDeletionProtectionEnabled());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGracefulLeaseHandoffConfig() {
|
||||||
|
final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig =
|
||||||
|
getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig();
|
||||||
|
|
||||||
|
final long testGracefulLeaseHandoffTimeoutMillis =
|
||||||
|
defaultGracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis() + 12345;
|
||||||
|
final boolean testGracefulLeaseHandoffEnabled =
|
||||||
|
!defaultGracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled();
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
configuration.setGracefulLeaseHandoffTimeoutMillis(testGracefulLeaseHandoffTimeoutMillis);
|
||||||
|
configuration.setIsGracefulLeaseHandoffEnabled(testGracefulLeaseHandoffEnabled);
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
|
||||||
|
resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig();
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
testGracefulLeaseHandoffTimeoutMillis, gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis());
|
||||||
|
assertEquals(testGracefulLeaseHandoffEnabled, gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGracefulLeaseHandoffUsesDefaults() {
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
|
||||||
|
resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig();
|
||||||
|
|
||||||
|
final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig =
|
||||||
|
getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig();
|
||||||
|
|
||||||
|
assertEquals(defaultGracefulLeaseHandoffConfig, gracefulLeaseHandoffConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWorkerUtilizationAwareAssignmentConfig() {
|
||||||
|
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
|
||||||
|
configuration.setInMemoryWorkerMetricsCaptureFrequencyMillis(123);
|
||||||
|
configuration.setWorkerMetricsReporterFreqInMillis(123);
|
||||||
|
configuration.setNoOfPersistedMetricsPerWorkerMetrics(123);
|
||||||
|
configuration.setDisableWorkerMetrics(true);
|
||||||
|
configuration.setMaxThroughputPerHostKBps(.123);
|
||||||
|
configuration.setDampeningPercentage(12);
|
||||||
|
configuration.setReBalanceThresholdPercentage(12);
|
||||||
|
configuration.setAllowThroughputOvershoot(false);
|
||||||
|
configuration.setVarianceBalancingFrequency(12);
|
||||||
|
configuration.setWorkerMetricsEMAAlpha(.123);
|
||||||
|
|
||||||
|
MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig;
|
||||||
|
LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config =
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
|
||||||
|
|
||||||
|
assertEquals(config.inMemoryWorkerMetricsCaptureFrequencyMillis(), 123);
|
||||||
|
assertEquals(config.workerMetricsReporterFreqInMillis(), 123);
|
||||||
|
assertEquals(config.noOfPersistedMetricsPerWorkerMetrics(), 123);
|
||||||
|
assertTrue(config.disableWorkerMetrics());
|
||||||
|
assertEquals(config.maxThroughputPerHostKBps(), .123, .25);
|
||||||
|
assertEquals(config.dampeningPercentage(), 12);
|
||||||
|
assertEquals(config.reBalanceThresholdPercentage(), 12);
|
||||||
|
assertFalse(config.allowThroughputOvershoot());
|
||||||
|
assertEquals(config.varianceBalancingFrequency(), 12);
|
||||||
|
assertEquals(config.workerMetricsEMAAlpha(), .123, .25);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWorkerUtilizationAwareAssignmentConfigUsesDefaults() {
|
||||||
|
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig defaultWorkerUtilAwareAssignmentConfig =
|
||||||
|
getTestConfigsBuilder().leaseManagementConfig().workerUtilizationAwareAssignmentConfig();
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
configuration.setVarianceBalancingFrequency(
|
||||||
|
defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency() + 12345);
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig resolvedWorkerUtilAwareAssignmentConfig =
|
||||||
|
resolvedConfiguration.leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
|
||||||
|
|
||||||
|
assertNotEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig);
|
||||||
|
|
||||||
|
// apart from the single updated configuration, all other config values should be equal to the default
|
||||||
|
resolvedWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency(
|
||||||
|
defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency());
|
||||||
|
assertEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWorkerMetricsTableConfigBean() {
|
||||||
|
final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PROVISIONED;
|
||||||
|
|
||||||
|
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
|
||||||
|
configuration.setWorkerMetricsTableName("testTable");
|
||||||
|
configuration.setWorkerMetricsBillingMode(testWorkerMetricsTableBillingMode);
|
||||||
|
configuration.setWorkerMetricsReadCapacity(123);
|
||||||
|
configuration.setWorkerMetricsWriteCapacity(123);
|
||||||
|
|
||||||
|
MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig;
|
||||||
|
LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationConfig =
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
|
||||||
|
LeaseManagementConfig.WorkerMetricsTableConfig workerMetricsConfig =
|
||||||
|
workerUtilizationConfig.workerMetricsTableConfig();
|
||||||
|
|
||||||
|
assertEquals(workerMetricsConfig.tableName(), "testTable");
|
||||||
|
assertEquals(workerMetricsConfig.billingMode(), testWorkerMetricsTableBillingMode);
|
||||||
|
assertEquals(workerMetricsConfig.readCapacity(), 123);
|
||||||
|
assertEquals(workerMetricsConfig.writeCapacity(), 123);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWorkerMetricsTableConfigUsesDefaults() {
|
||||||
|
final LeaseManagementConfig.WorkerMetricsTableConfig defaultWorkerMetricsTableConfig = getTestConfigsBuilder()
|
||||||
|
.leaseManagementConfig()
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig();
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
configuration.setWorkerMetricsBillingMode(Arrays.stream(BillingMode.values())
|
||||||
|
.filter(billingMode -> billingMode != defaultWorkerMetricsTableConfig.billingMode())
|
||||||
|
.findFirst()
|
||||||
|
.orElseThrow(NoSuchElementException::new));
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final LeaseManagementConfig.WorkerMetricsTableConfig resolvedWorkerMetricsTableConfig = resolvedConfiguration
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig();
|
||||||
|
|
||||||
|
assertNotEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig);
|
||||||
|
|
||||||
|
// apart from the single updated configuration, all other config values should be equal to the default
|
||||||
|
resolvedWorkerMetricsTableConfig.billingMode(defaultWorkerMetricsTableConfig.billingMode());
|
||||||
|
assertEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCoordinatorStateTableConfigBean() {
|
||||||
|
final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PAY_PER_REQUEST;
|
||||||
|
|
||||||
|
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
|
||||||
|
configuration.setCoordinatorStateTableName("testTable");
|
||||||
|
configuration.setCoordinatorStateBillingMode(testWorkerMetricsTableBillingMode);
|
||||||
|
configuration.setCoordinatorStateReadCapacity(123);
|
||||||
|
configuration.setCoordinatorStateWriteCapacity(123);
|
||||||
|
|
||||||
|
MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
CoordinatorConfig coordinatorConfig = resolvedConfiguration.getCoordinatorConfig();
|
||||||
|
CoordinatorConfig.CoordinatorStateTableConfig coordinatorStateConfig =
|
||||||
|
coordinatorConfig.coordinatorStateTableConfig();
|
||||||
|
assertEquals(coordinatorStateConfig.tableName(), "testTable");
|
||||||
|
assertEquals(coordinatorStateConfig.billingMode(), testWorkerMetricsTableBillingMode);
|
||||||
|
assertEquals(coordinatorStateConfig.readCapacity(), 123);
|
||||||
|
assertEquals(coordinatorStateConfig.writeCapacity(), 123);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCoordinatorStateTableConfigUsesDefaults() {
|
||||||
|
final CoordinatorConfig.CoordinatorStateTableConfig defaultCoordinatorStateTableConfig =
|
||||||
|
getTestConfigsBuilder().coordinatorConfig().coordinatorStateTableConfig();
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
configuration.setCoordinatorStateWriteCapacity(defaultCoordinatorStateTableConfig.writeCapacity() + 12345);
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final CoordinatorConfig.CoordinatorStateTableConfig resolvedCoordinatorStateTableConfig =
|
||||||
|
resolvedConfiguration.coordinatorConfig.coordinatorStateTableConfig();
|
||||||
|
|
||||||
|
assertNotEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig);
|
||||||
|
|
||||||
|
// apart from the single updated configuration, all other config values should be equal to the default
|
||||||
|
resolvedCoordinatorStateTableConfig.writeCapacity(defaultCoordinatorStateTableConfig.writeCapacity());
|
||||||
|
assertEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetLeaseTablePitrEnabledToTrue() {
|
public void testSetLeaseTablePitrEnabledToTrue() {
|
||||||
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
|
@ -266,4 +471,43 @@ public class MultiLangDaemonConfigurationTest {
|
||||||
|
|
||||||
assertThat(fanOutConfig.consumerArn(), equalTo(consumerArn));
|
assertThat(fanOutConfig.consumerArn(), equalTo(consumerArn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClientVersionConfig() {
|
||||||
|
final CoordinatorConfig.ClientVersionConfig testClientVersionConfig =
|
||||||
|
CoordinatorConfig.ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X;
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||||
|
configuration.setClientVersionConfig(testClientVersionConfig);
|
||||||
|
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig;
|
||||||
|
|
||||||
|
assertEquals(testClientVersionConfig, coordinatorConfig.clientVersionConfig());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClientVersionConfigUsesDefault() {
|
||||||
|
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||||
|
baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory);
|
||||||
|
|
||||||
|
final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig;
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
getTestConfigsBuilder().coordinatorConfig().clientVersionConfig(),
|
||||||
|
coordinatorConfig.clientVersionConfig());
|
||||||
|
}
|
||||||
|
|
||||||
|
private ConfigsBuilder getTestConfigsBuilder() {
|
||||||
|
return new ConfigsBuilder(
|
||||||
|
DUMMY_STREAM_NAME,
|
||||||
|
DUMMY_APPLICATION_NAME,
|
||||||
|
Mockito.mock(KinesisAsyncClient.class),
|
||||||
|
Mockito.mock(DynamoDbAsyncClient.class),
|
||||||
|
Mockito.mock(CloudWatchAsyncClient.class),
|
||||||
|
"dummyWorkerIdentifier",
|
||||||
|
shardRecordProcessorFactory);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,251 @@
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
|
||||||
|
import software.amazon.kinesis.multilang.MultiLangDaemonConfig;
|
||||||
|
import software.amazon.kinesis.multilang.config.MultiLangDaemonConfiguration.ResolvedConfiguration;
|
||||||
|
import software.amazon.kinesis.processor.ShardRecordProcessor;
|
||||||
|
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
public class PropertiesMappingE2ETest {
|
||||||
|
private static final String PROPERTIES_FILE = "multilang.properties";
|
||||||
|
private static final String PROPERTIES_FILE_V3 = "multilangv3.properties";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKclV3PropertiesMapping() throws IOException {
|
||||||
|
final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE);
|
||||||
|
|
||||||
|
final ResolvedConfiguration kclV3Config =
|
||||||
|
config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X,
|
||||||
|
kclV3Config.coordinatorConfig.clientVersionConfig());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"MultiLangTest-CoordinatorState-CustomName",
|
||||||
|
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName());
|
||||||
|
assertEquals(
|
||||||
|
BillingMode.PROVISIONED,
|
||||||
|
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode());
|
||||||
|
assertEquals(
|
||||||
|
1000,
|
||||||
|
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().readCapacity());
|
||||||
|
assertEquals(
|
||||||
|
500, kclV3Config.coordinatorConfig.coordinatorStateTableConfig().writeCapacity());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
10000L,
|
||||||
|
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis());
|
||||||
|
assertFalse(
|
||||||
|
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
5000L,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.inMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||||
|
assertEquals(
|
||||||
|
60000L,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsReporterFreqInMillis());
|
||||||
|
assertEquals(
|
||||||
|
50,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.noOfPersistedMetricsPerWorkerMetrics());
|
||||||
|
assertTrue(kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.disableWorkerMetrics());
|
||||||
|
assertEquals(
|
||||||
|
10000,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.maxThroughputPerHostKBps());
|
||||||
|
assertEquals(
|
||||||
|
90,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.dampeningPercentage());
|
||||||
|
assertEquals(
|
||||||
|
5,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.reBalanceThresholdPercentage());
|
||||||
|
assertFalse(kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.allowThroughputOvershoot());
|
||||||
|
assertEquals(
|
||||||
|
Duration.ofHours(12),
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.staleWorkerMetricsEntryCleanupDuration());
|
||||||
|
assertEquals(
|
||||||
|
5,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.varianceBalancingFrequency());
|
||||||
|
assertEquals(
|
||||||
|
0.18D,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsEMAAlpha());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"MultiLangTest-WorkerMetrics-CustomName",
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig()
|
||||||
|
.tableName());
|
||||||
|
assertEquals(
|
||||||
|
BillingMode.PROVISIONED,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig()
|
||||||
|
.billingMode());
|
||||||
|
assertEquals(
|
||||||
|
250,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig()
|
||||||
|
.readCapacity());
|
||||||
|
assertEquals(
|
||||||
|
90,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig()
|
||||||
|
.writeCapacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKclV3PropertiesMappingForDefaultValues() throws IOException {
|
||||||
|
final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE_V3);
|
||||||
|
|
||||||
|
final ResolvedConfiguration kclV3Config =
|
||||||
|
config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory());
|
||||||
|
|
||||||
|
assertEquals(ClientVersionConfig.CLIENT_VERSION_CONFIG_3X, kclV3Config.coordinatorConfig.clientVersionConfig());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"MultiLangTest-CoordinatorState",
|
||||||
|
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName());
|
||||||
|
assertEquals(
|
||||||
|
BillingMode.PAY_PER_REQUEST,
|
||||||
|
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
30_000L,
|
||||||
|
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis());
|
||||||
|
assertTrue(
|
||||||
|
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
1000L,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.inMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||||
|
assertEquals(
|
||||||
|
30000L,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsReporterFreqInMillis());
|
||||||
|
assertEquals(
|
||||||
|
10,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.noOfPersistedMetricsPerWorkerMetrics());
|
||||||
|
assertFalse(kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.disableWorkerMetrics());
|
||||||
|
assertEquals(
|
||||||
|
Double.MAX_VALUE,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.maxThroughputPerHostKBps());
|
||||||
|
assertEquals(
|
||||||
|
60,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.dampeningPercentage());
|
||||||
|
assertEquals(
|
||||||
|
10,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.reBalanceThresholdPercentage());
|
||||||
|
assertTrue(kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.allowThroughputOvershoot());
|
||||||
|
assertEquals(
|
||||||
|
Duration.ofDays(1),
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.staleWorkerMetricsEntryCleanupDuration());
|
||||||
|
assertEquals(
|
||||||
|
3,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.varianceBalancingFrequency());
|
||||||
|
assertEquals(
|
||||||
|
0.5D,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsEMAAlpha());
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"MultiLangTest-WorkerMetricStats",
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig()
|
||||||
|
.tableName());
|
||||||
|
assertEquals(
|
||||||
|
BillingMode.PAY_PER_REQUEST,
|
||||||
|
kclV3Config
|
||||||
|
.leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.workerMetricsTableConfig()
|
||||||
|
.billingMode());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TestRecordProcessorFactory implements ShardRecordProcessorFactory {
|
||||||
|
@Override
|
||||||
|
public ShardRecordProcessor shardRecordProcessor() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package software.amazon.kinesis.multilang.config;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.beanutils.BeanUtilsBean;
|
||||||
|
import org.apache.commons.beanutils.ConvertUtilsBean;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.runners.MockitoJUnitRunner;
|
||||||
|
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||||
|
import software.amazon.kinesis.retrieval.polling.PollingConfig;
|
||||||
|
|
||||||
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
|
|
||||||
|
@RunWith(MockitoJUnitRunner.class)
|
||||||
|
public class WorkerUtilizationAwareAssignmentConfigBeanTest {
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private KinesisAsyncClient kinesisAsyncClient;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAllPropertiesTransit() {
|
||||||
|
PollingConfigBean pollingConfigBean = new PollingConfigBean();
|
||||||
|
pollingConfigBean.setIdleTimeBetweenReadsInMillis(1000);
|
||||||
|
pollingConfigBean.setMaxGetRecordsThreadPool(20);
|
||||||
|
pollingConfigBean.setMaxRecords(5000);
|
||||||
|
pollingConfigBean.setRetryGetRecordsInSeconds(30);
|
||||||
|
|
||||||
|
ConvertUtilsBean convertUtilsBean = new ConvertUtilsBean();
|
||||||
|
BeanUtilsBean utilsBean = new BeanUtilsBean(convertUtilsBean);
|
||||||
|
|
||||||
|
MultiLangDaemonConfiguration multiLangDaemonConfiguration =
|
||||||
|
new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
|
||||||
|
multiLangDaemonConfiguration.setStreamName("test-stream");
|
||||||
|
|
||||||
|
PollingConfig pollingConfig = pollingConfigBean.build(kinesisAsyncClient, multiLangDaemonConfiguration);
|
||||||
|
|
||||||
|
assertThat(pollingConfig.kinesisClient(), equalTo(kinesisAsyncClient));
|
||||||
|
assertThat(pollingConfig.streamName(), equalTo(multiLangDaemonConfiguration.getStreamName()));
|
||||||
|
assertThat(
|
||||||
|
pollingConfig.idleTimeBetweenReadsInMillis(),
|
||||||
|
equalTo(pollingConfigBean.getIdleTimeBetweenReadsInMillis()));
|
||||||
|
assertThat(
|
||||||
|
pollingConfig.maxGetRecordsThreadPool(),
|
||||||
|
equalTo(Optional.of(pollingConfigBean.getMaxGetRecordsThreadPool())));
|
||||||
|
assertThat(pollingConfig.maxRecords(), equalTo(pollingConfigBean.getMaxRecords()));
|
||||||
|
assertThat(
|
||||||
|
pollingConfig.retryGetRecordsInSeconds(),
|
||||||
|
equalTo(Optional.of(pollingConfigBean.getRetryGetRecordsInSeconds())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -17,10 +17,12 @@ streamName = kclpysample
|
||||||
applicationName = MultiLangTest
|
applicationName = MultiLangTest
|
||||||
|
|
||||||
# Users can change the credentials provider the KCL will use to retrieve credentials.
|
# Users can change the credentials provider the KCL will use to retrieve credentials.
|
||||||
# The DefaultAWSCredentialsProviderChain checks several other providers, which is
|
# Expected key name (case-sensitive):
|
||||||
|
# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
|
||||||
|
# The DefaultCredentialsProvider checks several other providers, which is
|
||||||
# described here:
|
# described here:
|
||||||
# http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
|
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
|
||||||
AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
|
AwsCredentialsProvider = DefaultCredentialsProvider
|
||||||
|
|
||||||
# Appended to the user agent of the KCL. Does not impact the functionality of the
|
# Appended to the user agent of the KCL. Does not impact the functionality of the
|
||||||
# KCL in any other way.
|
# KCL in any other way.
|
||||||
|
|
@ -91,3 +93,73 @@ validateSequenceNumberBeforeCheckpointing = true
|
||||||
# active threads set to the provided value. If a non-positive integer or no
|
# active threads set to the provided value. If a non-positive integer or no
|
||||||
# value is provided a CachedThreadPool is used.
|
# value is provided a CachedThreadPool is used.
|
||||||
maxActiveThreads = -1
|
maxActiveThreads = -1
|
||||||
|
|
||||||
|
################### KclV3 configurations ###################
|
||||||
|
# Coordinator config
|
||||||
|
# Version the KCL needs to operate in. For more details check the KCLv3 migration
|
||||||
|
# documentation. Default is CLIENT_VERSION_CONFIG_3X
|
||||||
|
clientVersionConfig = CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2x
|
||||||
|
# TODO: include table deletion protection and pitr config once its added
|
||||||
|
# Configurations to control how the CoordinatorState DDB table is created
|
||||||
|
# Default name is applicationName-CoordinatorState in PAY_PER_REQUEST
|
||||||
|
coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName
|
||||||
|
coordinatorStateBillingMode = PROVISIONED
|
||||||
|
coordinatorStateReadCapacity = 1000
|
||||||
|
coordinatorStateWriteCapacity = 500
|
||||||
|
|
||||||
|
# Graceful handoff config - tuning of the shutdown behavior during lease transfers
|
||||||
|
# default values are 30000 and true respectively
|
||||||
|
gracefulLeaseHandoffTimeoutMillis = 10000
|
||||||
|
isGracefulLeaseHandoffEnabled = false
|
||||||
|
|
||||||
|
# WorkerMetricStats table config - control how the DDB table is created
|
||||||
|
## Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST
|
||||||
|
# TODO: include table deletion protection and pitr config once its added
|
||||||
|
workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName
|
||||||
|
workerMetricsBillingMode = PROVISIONED
|
||||||
|
workerMetricsReadCapacity = 250
|
||||||
|
workerMetricsWriteCapacity = 90
|
||||||
|
|
||||||
|
# WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
|
||||||
|
#
|
||||||
|
# frequency of capturing worker metrics in memory. Default is 1s
|
||||||
|
inMemoryWorkerMetricsCaptureFrequencyMillis = 5000
|
||||||
|
# frequency of reporting worker metric stats to storage. Default is 30s
|
||||||
|
workerMetricsReporterFreqInMillis = 60000
|
||||||
|
# No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10
|
||||||
|
noOfPersistedMetricsPerWorkerMetrics = 50
|
||||||
|
# Disable use of worker metrics to balance lease, default is false.
|
||||||
|
# If it is true, the algorithm balances lease based on worker's processing throughput.
|
||||||
|
disableWorkerMetrics = true
|
||||||
|
# Max throughput per host 10 MBps, to limit processing to the given value
|
||||||
|
# Default is unlimited.
|
||||||
|
maxThroughputPerHostKBps = 10000
|
||||||
|
# Dampen the load that is rebalanced during lease re-balancing, default is 60%
|
||||||
|
dampeningPercentage = 90
|
||||||
|
# Configures the allowed variance range for worker utilization. The upper
|
||||||
|
# limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
|
||||||
|
# The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
|
||||||
|
# any worker's utilization falls outside this range, lease re-balancing is
|
||||||
|
# triggered. The re-balancing algorithm aims to bring variance within the
|
||||||
|
# specified range. It also avoids thrashing by ensuring the utilization of
|
||||||
|
# the worker receiving the load after re-balancing doesn't exceed the fleet
|
||||||
|
# average. This might cause no re-balancing action even the utilization is
|
||||||
|
# out of the variance range. The default value is 10, representing +/-10%
|
||||||
|
# variance from the average value.
|
||||||
|
reBalanceThresholdPercentage = 5
|
||||||
|
# Whether at-least one lease must be taken from a high utilization worker
|
||||||
|
# during re-balancing when there is no lease assigned to that worker which has
|
||||||
|
# throughput is less than or equal to the minimum throughput that needs to be
|
||||||
|
# moved away from that worker to bring the worker back into the allowed variance.
|
||||||
|
# Default is true.
|
||||||
|
allowThroughputOvershoot = false
|
||||||
|
# Lease assignment is performed every failoverTimeMillis but re-balance will
|
||||||
|
# be attempted only once in 5 times based on the below config. Default is 3.
|
||||||
|
varianceBalancingFrequency = 5
|
||||||
|
# Alpha value used for calculating exponential moving average of worker's metricStats.
|
||||||
|
workerMetricsEMAAlpha = 0.18
|
||||||
|
# Duration after which workerMetricStats entry from WorkerMetricStats table will
|
||||||
|
# be cleaned up.
|
||||||
|
# Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
|
||||||
|
# Refer to Duration.parse javadocs for more details
|
||||||
|
staleWorkerMetricsEntryCleanupDuration = PT12H
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,169 @@
|
||||||
|
# The script that abides by the multi-language protocol. This script will
|
||||||
|
# be executed by the MultiLangDaemon, which will communicate with this script
|
||||||
|
# over STDIN and STDOUT according to the multi-language protocol.
|
||||||
|
executableName = sample_kclpy_app.py
|
||||||
|
|
||||||
|
# The Stream arn: arn:aws:kinesis:<region>:<account id>:stream/<stream name>
|
||||||
|
# Important: streamArn takes precedence over streamName if both are set
|
||||||
|
streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample
|
||||||
|
|
||||||
|
# The name of an Amazon Kinesis stream to process.
|
||||||
|
# Important: streamArn takes precedence over streamName if both are set
|
||||||
|
streamName = kclpysample
|
||||||
|
|
||||||
|
# Used by the KCL as the name of this application. Will be used as the name
|
||||||
|
# of an Amazon DynamoDB table which will store the lease and checkpoint
|
||||||
|
# information for workers with this application name
|
||||||
|
applicationName = MultiLangTest
|
||||||
|
|
||||||
|
# Users can change the credentials provider the KCL will use to retrieve credentials.
|
||||||
|
# Expected key name (case-sensitive):
|
||||||
|
# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
|
||||||
|
# The DefaultCredentialsProvider checks several other providers, which is
|
||||||
|
# described here:
|
||||||
|
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
|
||||||
|
AwsCredentialsProvider = DefaultCredentialsProvider
|
||||||
|
|
||||||
|
# Appended to the user agent of the KCL. Does not impact the functionality of the
|
||||||
|
# KCL in any other way.
|
||||||
|
processingLanguage = python/3.8
|
||||||
|
|
||||||
|
# Valid options at TRIM_HORIZON or LATEST.
|
||||||
|
# See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
|
||||||
|
initialPositionInStream = TRIM_HORIZON
|
||||||
|
|
||||||
|
# To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended',
|
||||||
|
# and uncomment below line with right timestamp value.
|
||||||
|
# See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
|
||||||
|
#initialPositionInStreamExtended = 1636609142
|
||||||
|
|
||||||
|
# The following properties are also available for configuring the KCL Worker that is created
|
||||||
|
# by the MultiLangDaemon.
|
||||||
|
|
||||||
|
# The KCL defaults to us-east-1
|
||||||
|
regionName = us-east-1
|
||||||
|
|
||||||
|
# Fail over time in milliseconds. A worker which does not renew it's lease within this time interval
|
||||||
|
# will be regarded as having problems and it's shards will be assigned to other workers.
|
||||||
|
# For applications that have a large number of shards, this msy be set to a higher number to reduce
|
||||||
|
# the number of DynamoDB IOPS required for tracking leases
|
||||||
|
failoverTimeMillis = 10000
|
||||||
|
|
||||||
|
# A worker id that uniquely identifies this worker among all workers using the same applicationName
|
||||||
|
# If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself.
|
||||||
|
workerId = "workerId"
|
||||||
|
|
||||||
|
# Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks.
|
||||||
|
shardSyncIntervalMillis = 60000
|
||||||
|
|
||||||
|
# Max records to fetch from Kinesis in a single GetRecords call.
|
||||||
|
maxRecords = 10000
|
||||||
|
|
||||||
|
# Idle time between record reads in milliseconds.
|
||||||
|
idleTimeBetweenReadsInMillis = 1000
|
||||||
|
|
||||||
|
# Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while)
|
||||||
|
callProcessRecordsEvenForEmptyRecordList = false
|
||||||
|
|
||||||
|
# Interval in milliseconds between polling to check for parent shard completion.
|
||||||
|
# Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on
|
||||||
|
# completion of parent shards).
|
||||||
|
parentShardPollIntervalMillis = 10000
|
||||||
|
|
||||||
|
# Cleanup leases upon shards completion (don't wait until they expire in Kinesis).
|
||||||
|
# Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try
|
||||||
|
# to delete the ones we don't need any longer.
|
||||||
|
cleanupLeasesUponShardCompletion = true
|
||||||
|
|
||||||
|
# Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures).
|
||||||
|
taskBackoffTimeMillis = 500
|
||||||
|
|
||||||
|
# Buffer metrics for at most this long before publishing to CloudWatch.
|
||||||
|
metricsBufferTimeMillis = 10000
|
||||||
|
|
||||||
|
# Buffer at most this many metrics before publishing to CloudWatch.
|
||||||
|
metricsMaxQueueSize = 10000
|
||||||
|
|
||||||
|
# KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls
|
||||||
|
# to RecordProcessorCheckpointer#checkpoint(String) by default.
|
||||||
|
validateSequenceNumberBeforeCheckpointing = true
|
||||||
|
|
||||||
|
# The maximum number of active threads for the MultiLangDaemon to permit.
|
||||||
|
# If a value is provided then a FixedThreadPool is used with the maximum
|
||||||
|
# active threads set to the provided value. If a non-positive integer or no
|
||||||
|
# value is provided a CachedThreadPool is used.
|
||||||
|
maxActiveThreads = -1
|
||||||
|
|
||||||
|
################### KclV3 configurations ###################
|
||||||
|
# Coordinator config
|
||||||
|
clientVersionConfig = CLIENT_VERSION_CONFIG_3x
|
||||||
|
|
||||||
|
## Let all other config be defaults
|
||||||
|
## TODO: include table deletion protection and pitr config once its added
|
||||||
|
## Configurations to control how the CoordinatorState DDB table is created
|
||||||
|
## Default name is applicationName-CoordinatorState in PAY_PER_REQUEST
|
||||||
|
#coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName
|
||||||
|
#coordinatorStateBillingMode = PROVISIONED
|
||||||
|
#coordinatorStateReadCapacity = 1000
|
||||||
|
#coordinatorStateWriteCapacity = 500
|
||||||
|
#
|
||||||
|
## Graceful handoff config - tuning of the shutdown behavior during lease transfers
|
||||||
|
## default values are 30000 and true respectively
|
||||||
|
#gracefulLeaseHandoffTimeoutMillis = 10000
|
||||||
|
#isGracefulLeaseHandoffEnabled = false
|
||||||
|
#
|
||||||
|
## WorkerMetricStats table config - control how the DDB table is created
|
||||||
|
### Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST
|
||||||
|
## TODO: include table deletion protection and pitr config once its added
|
||||||
|
#workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName
|
||||||
|
#workerMetricsBillingMode = PROVISIONED
|
||||||
|
#workerMetricsReadCapacity = 250
|
||||||
|
#workerMetricsWriteCapacity = 90
|
||||||
|
#
|
||||||
|
## WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
|
||||||
|
##
|
||||||
|
## frequency of capturing worker metrics in memory. Default is 1s
|
||||||
|
#inMemoryWorkerMetricsCaptureFrequencyMillis = 5000
|
||||||
|
## frequency of reporting worker metric stats to storage. Default is 30s
|
||||||
|
#workerMetricsReporterFreqInMillis = 60000
|
||||||
|
## No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10.
|
||||||
|
## This provides historic values that are used to compute the workers current
|
||||||
|
## utilization using an exponential-moving-average.
|
||||||
|
#noOfPersistedMetricsPerWorkerMetrics = 50
|
||||||
|
## Disable use of worker metrics to balance lease, default is false.
|
||||||
|
## If it is true, the algorithm balances lease based on worker's processing throughput.
|
||||||
|
#disableWorkerMetrics = true
|
||||||
|
## Max throughput per host 10 MBps, to limit processing to the given value
|
||||||
|
## Default is unlimited.
|
||||||
|
#maxThroughputPerHostKBps = 10000
|
||||||
|
## Dampen the load that is rebalanced during lease re-balancing, default is 60%
|
||||||
|
#dampeningPercentage = 90
|
||||||
|
## Configures the allowed variance range for worker utilization. The upper
|
||||||
|
## limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
|
||||||
|
## The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
|
||||||
|
## any worker's utilization falls outside this range, lease re-balancing is
|
||||||
|
## triggered. The re-balancing algorithm aims to bring variance within the
|
||||||
|
## specified range. It also avoids thrashing by ensuring the utilization of
|
||||||
|
## the worker receiving the load after re-balancing doesn't exceed the fleet
|
||||||
|
## average. This might cause no re-balancing action even the utilization is
|
||||||
|
## out of the variance range. The default value is 10, representing +/-10%
|
||||||
|
## variance from the average value.
|
||||||
|
#reBalanceThresholdPercentage = 5
|
||||||
|
## Whether at-least one lease must be taken from a high utilization worker
|
||||||
|
## during re-balancing when there is no lease assigned to that worker which has
|
||||||
|
## throughput is less than or equal to the minimum throughput that needs to be
|
||||||
|
## moved away from that worker to bring the worker back into the allowed variance.
|
||||||
|
## Default is true.
|
||||||
|
#allowThroughputOvershoot = false
|
||||||
|
## Lease assignment is performed every failoverTimeMillis but re-balance will
|
||||||
|
## be attempted only once in 5 times based on the below config. Default is 3.
|
||||||
|
#varianceBalancingFrequency = 5
|
||||||
|
## Alpha value used for calculating exponential moving average of worker's metricStats.
|
||||||
|
## Default is 0.5, a higher alpha value will make re-balancing more sensitive
|
||||||
|
## to recent metricStats.
|
||||||
|
#workerMetricsEMAAlpha = 0.18
|
||||||
|
## Duration after which workerMetricStats entry from WorkerMetricStats table will
|
||||||
|
## be cleaned up. Default is 1 day.
|
||||||
|
## Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
|
||||||
|
## Refer to Duration.parse javadocs for more details
|
||||||
|
#staleWorkerMetricsEntryCleanupDuration = PT12H
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>software.amazon.kinesis</groupId>
|
<groupId>software.amazon.kinesis</groupId>
|
||||||
<artifactId>amazon-kinesis-client-pom</artifactId>
|
<artifactId>amazon-kinesis-client-pom</artifactId>
|
||||||
<version>2.6.1-SNAPSHOT</version>
|
<version>3.0.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>amazon-kinesis-client</artifactId>
|
<artifactId>amazon-kinesis-client</artifactId>
|
||||||
|
|
@ -68,6 +68,18 @@
|
||||||
<artifactId>dynamodb</artifactId>
|
<artifactId>dynamodb</artifactId>
|
||||||
<version>${awssdk.version}</version>
|
<version>${awssdk.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/dynamodb-enhanced -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>software.amazon.awssdk</groupId>
|
||||||
|
<artifactId>dynamodb-enhanced</artifactId>
|
||||||
|
<version>${awssdk.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<!-- https://mvnrepository.com/artifact/com.amazonaws/dynamodb-lock-client -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.amazonaws</groupId>
|
||||||
|
<artifactId>dynamodb-lock-client</artifactId>
|
||||||
|
<version>1.3.0</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>software.amazon.awssdk</groupId>
|
<groupId>software.amazon.awssdk</groupId>
|
||||||
<artifactId>cloudwatch</artifactId>
|
<artifactId>cloudwatch</artifactId>
|
||||||
|
|
@ -82,6 +94,12 @@
|
||||||
<groupId>software.amazon.glue</groupId>
|
<groupId>software.amazon.glue</groupId>
|
||||||
<artifactId>schema-registry-serde</artifactId>
|
<artifactId>schema-registry-serde</artifactId>
|
||||||
<version>${gsr.version}</version>
|
<version>${gsr.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>com.amazonaws</groupId>
|
||||||
|
<artifactId>aws-java-sdk-sts</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>software.amazon.glue</groupId>
|
<groupId>software.amazon.glue</groupId>
|
||||||
|
|
@ -103,11 +121,23 @@
|
||||||
<artifactId>commons-lang3</artifactId>
|
<artifactId>commons-lang3</artifactId>
|
||||||
<version>3.14.0</version>
|
<version>3.14.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- https://mvnrepository.com/artifact/commons-collections/commons-collections -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-collections</groupId>
|
||||||
|
<artifactId>commons-collections</artifactId>
|
||||||
|
<version>3.2.2</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-api</artifactId>
|
<artifactId>slf4j-api</artifactId>
|
||||||
<version>${slf4j.version}</version>
|
<version>${slf4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- https://mvnrepository.com/artifact/org.jetbrains/annotations -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.jetbrains</groupId>
|
||||||
|
<artifactId>annotations</artifactId>
|
||||||
|
<version>26.0.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.reactivex.rxjava3</groupId>
|
<groupId>io.reactivex.rxjava3</groupId>
|
||||||
|
|
@ -123,35 +153,47 @@
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Test -->
|
<!-- Test -->
|
||||||
|
<!-- TODO: Migrate all tests to Junit5 -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter-api</artifactId>
|
||||||
|
<version>5.11.3</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.13.2</version>
|
<version>4.13.2</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.junit.jupiter</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>junit-jupiter-params</artifactId>
|
||||||
<version>1.10.19</version>
|
<version>5.11.3</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<!-- Using older version to be compatible with Java 8 -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-junit-jupiter</artifactId>
|
||||||
|
<version>3.12.4</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.hamcrest</groupId>
|
<groupId>org.hamcrest</groupId>
|
||||||
<artifactId>hamcrest-all</artifactId>
|
<artifactId>hamcrest-all</artifactId>
|
||||||
<version>1.3</version>
|
<version>1.3</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- Using older version to be compatible with Java 8 -->
|
||||||
|
<!-- https://mvnrepository.com/artifact/com.amazonaws/DynamoDBLocal -->
|
||||||
<!--<dependency>-->
|
<dependency>
|
||||||
<!--<groupId>com.amazonaws</groupId>-->
|
<groupId>com.amazonaws</groupId>
|
||||||
<!--<artifactId>DynamoDBLocal</artifactId>-->
|
<artifactId>DynamoDBLocal</artifactId>
|
||||||
<!--<version>1.11.86</version>-->
|
<version>1.25.0</version>
|
||||||
<!--<scope>test</scope>-->
|
<scope>test</scope>
|
||||||
<!--</dependency>-->
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ch.qos.logback</groupId>
|
<groupId>ch.qos.logback</groupId>
|
||||||
<artifactId>logback-classic</artifactId>
|
<artifactId>logback-classic</artifactId>
|
||||||
|
|
|
||||||
610
amazon-kinesis-client/scripts/KclMigrationTool.py
Normal file
610
amazon-kinesis-client/scripts/KclMigrationTool.py
Normal file
|
|
@ -0,0 +1,610 @@
|
||||||
|
"""
|
||||||
|
Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
Licensed under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
import boto3
|
||||||
|
from botocore.config import Config
|
||||||
|
from botocore.exceptions import ClientError
|
||||||
|
|
||||||
|
# DynamoDB table suffixes
|
||||||
|
DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX = "-CoordinatorState"
|
||||||
|
DEFAULT_WORKER_METRICS_TABLE_SUFFIX = "-WorkerMetricStats"
|
||||||
|
|
||||||
|
# DynamoDB attribute names and values
|
||||||
|
CLIENT_VERSION_ATTR = 'cv'
|
||||||
|
TIMESTAMP_ATTR = 'mts'
|
||||||
|
MODIFIED_BY_ATTR = 'mb'
|
||||||
|
HISTORY_ATTR = 'h'
|
||||||
|
MIGRATION_KEY = "Migration3.0"
|
||||||
|
|
||||||
|
# GSI constants
|
||||||
|
GSI_NAME = 'LeaseOwnerToLeaseKeyIndex'
|
||||||
|
GSI_DELETION_WAIT_TIME_SECONDS = 120
|
||||||
|
|
||||||
|
config = Config(
|
||||||
|
# TODO: parameterize
|
||||||
|
region_name = 'us-east-1',
|
||||||
|
retries = {
|
||||||
|
'max_attempts': 10,
|
||||||
|
'mode': 'standard'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: validate where these values came from. None of the originals seem to work.
|
||||||
|
class KclClientVersion(Enum):
|
||||||
|
VERSION_2X = "CLIENT_VERSION_2x"
|
||||||
|
UPGRADE_FROM_2X = "CLIENT_VERSION_UPGRADE_FROM_2x"
|
||||||
|
VERSION_3X_WITH_ROLLBACK = "CLIENT_VERSION_3x_WITH_ROLLBACK"
|
||||||
|
VERSION_3X = "CLIENT_VERSION_3x"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
|
||||||
|
def get_time_in_millis():
|
||||||
|
return str(round(time.time() * 1000))
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_version(version, mode):
|
||||||
|
"""
|
||||||
|
Validate if the given version is valid for the specified mode
|
||||||
|
|
||||||
|
:param version: The KCL client version to validate
|
||||||
|
:param mode: Either 'rollback' or 'rollforward'
|
||||||
|
:return: True if the version is valid for the given mode, False otherwise
|
||||||
|
"""
|
||||||
|
if mode == 'rollback':
|
||||||
|
if version == KclClientVersion.VERSION_2X.value:
|
||||||
|
print("Your KCL application already runs in a mode compatible with KCL 2.x. You can deploy the code with the previous KCL version if you still experience an issue.")
|
||||||
|
return True
|
||||||
|
if version in [KclClientVersion.UPGRADE_FROM_2X.value,
|
||||||
|
KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]:
|
||||||
|
return True
|
||||||
|
if version == KclClientVersion.VERSION_3X.value:
|
||||||
|
print("Cannot roll back the KCL application."
|
||||||
|
" It is not in a state that supports rollback.")
|
||||||
|
return False
|
||||||
|
print("Migration to KCL 3.0 not in progress or application_name / coordinator_state_table_name is incorrect."
|
||||||
|
" Please double check and run again with correct arguments.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if mode == 'rollforward':
|
||||||
|
if version == KclClientVersion.VERSION_2X.value:
|
||||||
|
return True
|
||||||
|
if version in [KclClientVersion.UPGRADE_FROM_2X.value,
|
||||||
|
KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]:
|
||||||
|
print("Cannot roll-forward application. It is not in a rolled back state.")
|
||||||
|
return False
|
||||||
|
if version == KclClientVersion.VERSION_3X.value:
|
||||||
|
print("Cannot roll-forward the KCL application."
|
||||||
|
" Application has already migrated.")
|
||||||
|
return False
|
||||||
|
print("Cannot roll-forward because migration to KCL 3.0 is not in progress or application_name"
|
||||||
|
" / coordinator_state_table_name is incorrect. Please double check and run again with correct arguments.")
|
||||||
|
return False
|
||||||
|
print(f"Invalid mode: {mode}. Mode must be either 'rollback' or 'rollforward'.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def handle_get_item_client_error(e, operation, table_name):
|
||||||
|
"""
|
||||||
|
Handle ClientError exceptions raised by get_item on given DynamoDB table
|
||||||
|
|
||||||
|
:param e: The ClientError exception object
|
||||||
|
:param operation: Rollback or Roll-forward for logging the errors
|
||||||
|
:param table_name: The name of the DynamoDB table where the error occurred
|
||||||
|
"""
|
||||||
|
error_code = e.response['Error']['Code']
|
||||||
|
error_message = e.response['Error']['Message']
|
||||||
|
print(f"{operation} could not be performed.")
|
||||||
|
if error_code == 'ProvisionedThroughputExceededException':
|
||||||
|
print(f"Throughput exceeded even after retries: {error_message}")
|
||||||
|
else:
|
||||||
|
print(f"Unexpected client error occurred: {error_code} - {error_message}")
|
||||||
|
print("Please resolve the issue and run the KclMigrationTool again.")
|
||||||
|
|
||||||
|
|
||||||
|
def table_exists(dynamodb_client, table_name):
|
||||||
|
"""
|
||||||
|
Check if a DynamoDB table exists.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param table_name: Name of the DynamoDB table to check
|
||||||
|
:return: True if the table exists, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dynamodb_client.describe_table(TableName=table_name)
|
||||||
|
return True
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||||
|
print(f"Table '{table_name}' does not exist.")
|
||||||
|
return False
|
||||||
|
print(f"An error occurred while checking table '{table_name}': {e}.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def validate_tables(dynamodb_client, operation, coordinator_state_table_name, lease_table_name=None):
|
||||||
|
"""
|
||||||
|
Validate the existence of DynamoDB tables required for KCL operations
|
||||||
|
|
||||||
|
:param dynamodb_client: A boto3 DynamoDB client object
|
||||||
|
:param operation: Rollback or Roll-forward for logging
|
||||||
|
:param coordinator_state_table_name: Name of the coordinator state table
|
||||||
|
:param lease_table_name: Name of the DynamoDB KCL lease table (optional)
|
||||||
|
:return: True if all required tables exist, False otherwise
|
||||||
|
"""
|
||||||
|
if lease_table_name and not table_exists(dynamodb_client, lease_table_name):
|
||||||
|
print(
|
||||||
|
f"{operation} failed. Could not find a KCL Application DDB lease table "
|
||||||
|
f"with name {lease_table_name}. Please pass in the correct application_name "
|
||||||
|
"and/or lease_table_name that matches your KCL application configuration."
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not table_exists(dynamodb_client, coordinator_state_table_name):
|
||||||
|
print(
|
||||||
|
f"{operation} failed. Could not find a coordinator state table "
|
||||||
|
f"{coordinator_state_table_name}. Please pass in the correct application_name or"
|
||||||
|
f" coordinator_state_table_name that matches your KCL application configuration."
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def add_current_state_to_history(item, max_history=10):
|
||||||
|
"""
|
||||||
|
Adds the current state of a DynamoDB item to its history attribute.
|
||||||
|
Creates a new history entry from the current value and maintains a capped history list.
|
||||||
|
|
||||||
|
:param item: DynamoDB item to add history to
|
||||||
|
:param max_history: Maximum number of history entries to maintain (default: 10)
|
||||||
|
:return: Updated history attribute as a DynamoDB-formatted dictionary
|
||||||
|
"""
|
||||||
|
# Extract current values
|
||||||
|
current_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown')
|
||||||
|
current_modified_by = item.get(MODIFIED_BY_ATTR, {}).get('S', 'Unknown')
|
||||||
|
current_time_in_millis = (
|
||||||
|
item.get(TIMESTAMP_ATTR, {}).get('N', get_time_in_millis())
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create new history entry
|
||||||
|
new_entry = {
|
||||||
|
'M': {
|
||||||
|
CLIENT_VERSION_ATTR: {'S': current_version},
|
||||||
|
MODIFIED_BY_ATTR: {'S': current_modified_by},
|
||||||
|
TIMESTAMP_ATTR: {'N': current_time_in_millis}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get existing history or create new if doesn't exist
|
||||||
|
history_dict = item.get(f'{HISTORY_ATTR}', {'L': []})
|
||||||
|
history_list = history_dict['L']
|
||||||
|
|
||||||
|
# Add new entry to the beginning of the list, capping at max_history
|
||||||
|
history_list.insert(0, new_entry)
|
||||||
|
history_list = history_list[:max_history]
|
||||||
|
|
||||||
|
return history_dict
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_state(dynamodb_client, table_name):
|
||||||
|
"""
|
||||||
|
Retrieve the current state from the DynamoDB table and prepare history update.
|
||||||
|
Fetches the current item from the specified DynamoDB table,
|
||||||
|
extracts the initial client version, and creates a new history entry.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param table_name: Name of the DynamoDB table to query
|
||||||
|
:return: A tuple containing:
|
||||||
|
- initial_version (str): The current client version, or 'Unknown' if not found
|
||||||
|
- new_history (dict): Updated history including the current state
|
||||||
|
"""
|
||||||
|
response = dynamodb_client.get_item(
|
||||||
|
TableName=table_name,
|
||||||
|
Key={'key': {'S': MIGRATION_KEY}}
|
||||||
|
)
|
||||||
|
item = response.get('Item', {})
|
||||||
|
initial_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown')
|
||||||
|
new_history = add_current_state_to_history(item)
|
||||||
|
return initial_version, new_history
|
||||||
|
|
||||||
|
|
||||||
|
def rollback_client_version(dynamodb_client, table_name, history):
|
||||||
|
"""
|
||||||
|
Update the client version in the coordinator state table to initiate rollback.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param table_name: Name of the coordinator state DDB table
|
||||||
|
:param history: Updated history attribute as a DynamoDB-formatted dictionary
|
||||||
|
:return: A tuple containing:
|
||||||
|
- success (bool): True if client version was successfully updated, False otherwise
|
||||||
|
- previous_version (str): The version that was replaced, or None if update failed
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
print(f"Rolling back client version in table '{table_name}'...")
|
||||||
|
update_response = dynamodb_client.update_item(
|
||||||
|
TableName=table_name,
|
||||||
|
Key={'key': {'S': MIGRATION_KEY}},
|
||||||
|
UpdateExpression=(
|
||||||
|
f"SET {CLIENT_VERSION_ATTR} = :rollback_client_version, "
|
||||||
|
f"{TIMESTAMP_ATTR} = :updated_at, "
|
||||||
|
f"{MODIFIED_BY_ATTR} = :modifier, "
|
||||||
|
f"{HISTORY_ATTR} = :history"
|
||||||
|
),
|
||||||
|
ConditionExpression=(
|
||||||
|
f"{CLIENT_VERSION_ATTR} IN ("
|
||||||
|
":upgrade_from_2x_client_version, "
|
||||||
|
":3x_with_rollback_client_version)"
|
||||||
|
),
|
||||||
|
ExpressionAttributeValues={
|
||||||
|
':rollback_client_version': {'S': KclClientVersion.VERSION_2X.value},
|
||||||
|
':updated_at': {'N': get_time_in_millis()},
|
||||||
|
':modifier': {'S': 'KclMigrationTool-rollback'},
|
||||||
|
':history': history,
|
||||||
|
':upgrade_from_2x_client_version': (
|
||||||
|
{'S': KclClientVersion.UPGRADE_FROM_2X.value}
|
||||||
|
),
|
||||||
|
':3x_with_rollback_client_version': (
|
||||||
|
{'S': KclClientVersion.VERSION_3X_WITH_ROLLBACK.value}
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ReturnValues='UPDATED_OLD'
|
||||||
|
)
|
||||||
|
replaced_item = update_response.get('Attributes', {})
|
||||||
|
replaced_version = replaced_item.get('cv', {}).get('S', '')
|
||||||
|
return True, replaced_version
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'ConditionalCheckFailedException':
|
||||||
|
print("Unable to rollback, as application is not in a state that allows rollback."
|
||||||
|
"Ensure that the given application_name or coordinator_state_table_name is correct and"
|
||||||
|
" you have followed all prior migration steps.")
|
||||||
|
else:
|
||||||
|
print(f"An unexpected error occurred while rolling back: {str(e)}"
|
||||||
|
"Please resolve and run this migration script again.")
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
def rollfoward_client_version(dynamodb_client, table_name, history):
|
||||||
|
"""
|
||||||
|
Update the client version in the coordinator state table to initiate roll-forward
|
||||||
|
conditionally if application is currently in rolled back state.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param table_name: Name of the coordinator state DDB table
|
||||||
|
:param history: Updated history attribute as a DynamoDB-formatted dictionary
|
||||||
|
:return: True if client version was successfully updated, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Conditionally update client version
|
||||||
|
dynamodb_client.update_item(
|
||||||
|
TableName=table_name,
|
||||||
|
Key={'key': {'S': MIGRATION_KEY}},
|
||||||
|
UpdateExpression= (
|
||||||
|
f"SET {CLIENT_VERSION_ATTR} = :rollforward_version, "
|
||||||
|
f"{TIMESTAMP_ATTR} = :updated_at, "
|
||||||
|
f"{MODIFIED_BY_ATTR} = :modifier, "
|
||||||
|
f"{HISTORY_ATTR} = :new_history"
|
||||||
|
),
|
||||||
|
ConditionExpression=f"{CLIENT_VERSION_ATTR} = :kcl_2x_version",
|
||||||
|
ExpressionAttributeValues={
|
||||||
|
':rollforward_version': {'S': KclClientVersion.UPGRADE_FROM_2X.value},
|
||||||
|
':updated_at': {'N': get_time_in_millis()},
|
||||||
|
':modifier': {'S': 'KclMigrationTool-rollforward'},
|
||||||
|
':new_history': history,
|
||||||
|
':kcl_2x_version': {'S': KclClientVersion.VERSION_2X.value},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print("Roll-forward has been initiated. KCL application will monitor for 3.0 readiness and"
|
||||||
|
" automatically switch to 3.0 functionality when readiness criteria have been met.")
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'ConditionalCheckFailedException':
|
||||||
|
print("Unable to roll-forward because application is not in rolled back state."
|
||||||
|
" Ensure that the given application_name or coordinator_state_table_name is correct"
|
||||||
|
" and you have followed all prior migration steps.")
|
||||||
|
else:
|
||||||
|
print(f"Unable to roll-forward due to error: {str(e)}. "
|
||||||
|
"Please resolve and run this migration script again.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Unable to roll-forward due to error: {str(e)}. "
|
||||||
|
"Please resolve and run this migration script again.")
|
||||||
|
|
||||||
|
|
||||||
|
def delete_gsi_if_exists(dynamodb_client, table_name):
|
||||||
|
"""
|
||||||
|
Deletes GSI on given lease table if it exists.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param table_name: Name of lease table to remove GSI from
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
gsi_present = False
|
||||||
|
response = dynamodb_client.describe_table(TableName=table_name)
|
||||||
|
if 'GlobalSecondaryIndexes' in response['Table']:
|
||||||
|
gsi_list = response['Table']['GlobalSecondaryIndexes']
|
||||||
|
for gsi in gsi_list:
|
||||||
|
if gsi['IndexName'] == GSI_NAME:
|
||||||
|
gsi_present = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not gsi_present:
|
||||||
|
print(f"GSI {GSI_NAME} is not present on lease table {table_name}. It may already be successfully"
|
||||||
|
" deleted. Or if lease table name is incorrect, please re-run the KclMigrationTool with correct"
|
||||||
|
" application_name or lease_table_name.")
|
||||||
|
return
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||||
|
print(f"Lease table {table_name} does not exist, please check application_name or lease_table_name"
|
||||||
|
" configuration and try again.")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
print(f"An unexpected error occurred while checking if GSI {GSI_NAME} exists"
|
||||||
|
f" on lease table {table_name}: {str(e)}. Please rectify the error and try again.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Deleting GSI '{GSI_NAME}' from table '{table_name}'...")
|
||||||
|
try:
|
||||||
|
dynamodb_client.update_table(
|
||||||
|
TableName=table_name,
|
||||||
|
GlobalSecondaryIndexUpdates=[
|
||||||
|
{
|
||||||
|
'Delete': {
|
||||||
|
'IndexName': GSI_NAME
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||||
|
print(f"{GSI_NAME} not found or table '{table_name}' not found.")
|
||||||
|
elif e.response['Error']['Code'] == 'ResourceInUseException':
|
||||||
|
print(f"Unable to delete GSI: '{table_name}' is currently being modified.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred while deleting GSI {GSI_NAME} on lease table {table_name}: {str(e)}."
|
||||||
|
" Please manually confirm the GSI is removed from the lease table, or"
|
||||||
|
" resolve the error and rerun the migration script.")
|
||||||
|
|
||||||
|
|
||||||
|
def delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name):
|
||||||
|
"""
|
||||||
|
Deletes worker metrics table based on application name, if it exists.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param worker_metrics_table_name: Name of the DynamoDB worker metrics table
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
dynamodb_client.describe_table(TableName=worker_metrics_table_name)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||||
|
print(f"Worker metrics table {worker_metrics_table_name} does not exist."
|
||||||
|
" It may already be successfully deleted. Please check that the application_name"
|
||||||
|
" or worker_metrics_table_name is correct. If not, correct this and rerun the migration script.")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
print(f"An unexpected error occurred when checking if {worker_metrics_table_name} table exists: {str(e)}."
|
||||||
|
" Please manually confirm the table is deleted, or resolve the error"
|
||||||
|
" and rerun the migration script.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Deleting worker metrics table {worker_metrics_table_name}...")
|
||||||
|
try:
|
||||||
|
dynamodb_client.delete_table(TableName=worker_metrics_table_name)
|
||||||
|
except ClientError as e:
|
||||||
|
if e.response['Error']['Code'] == 'AccessDeniedException':
|
||||||
|
print(f"No permissions to delete table {worker_metrics_table_name}. Please manually delete it if you"
|
||||||
|
" want to avoid any charges until you are ready to rollforward with migration.")
|
||||||
|
else:
|
||||||
|
print(f"An unexpected client error occurred while deleting worker metrics table: {str(e)}."
|
||||||
|
" Please manually confirm the table is deleted, or resolve the error"
|
||||||
|
" and rerun the migration script.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An unexpected error occurred while deleting worker metrics table: {str(e)}."
|
||||||
|
" Please manually confirm the table is deleted, or resolve the error"
|
||||||
|
" and rerun the migration script.")
|
||||||
|
|
||||||
|
|
||||||
|
def perform_rollback(dynamodb_client, lease_table_name, coordinator_state_table_name, worker_metrics_table_name):
|
||||||
|
"""
|
||||||
|
Perform KCL 3.0 migration rollback by updating MigrationState for the KCL application.
|
||||||
|
Rolls client version back, removes GSI from lease table, deletes worker metrics table.
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param coordinator_state_table_name: Name of the DynamoDB coordinator state table
|
||||||
|
:param coordinator_state_table_name: Name of the DynamoDB coordinator state table
|
||||||
|
:param worker_metrics_table_name: Name of the DynamoDB worker metrics table
|
||||||
|
"""
|
||||||
|
if not validate_tables(dynamodb_client, "Rollback", coordinator_state_table_name, lease_table_name):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
initial_version, new_history = get_current_state(dynamodb_client,
|
||||||
|
coordinator_state_table_name)
|
||||||
|
except ClientError as e:
|
||||||
|
handle_get_item_client_error(e, "Rollback", coordinator_state_table_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not is_valid_version(version=initial_version, mode='rollback'):
|
||||||
|
return
|
||||||
|
|
||||||
|
# 1. Rollback client version
|
||||||
|
if initial_version != KclClientVersion.VERSION_2X.value:
|
||||||
|
rollback_succeeded, initial_version = rollback_client_version(
|
||||||
|
dynamodb_client, coordinator_state_table_name, new_history
|
||||||
|
)
|
||||||
|
if not rollback_succeeded:
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Waiting for {GSI_DELETION_WAIT_TIME_SECONDS} seconds before cleaning up KCL 3.0 resources after rollback...")
|
||||||
|
time.sleep(GSI_DELETION_WAIT_TIME_SECONDS)
|
||||||
|
|
||||||
|
# 2. Delete the GSI
|
||||||
|
delete_gsi_if_exists(dynamodb_client, lease_table_name)
|
||||||
|
|
||||||
|
# 3. Delete worker metrics table
|
||||||
|
delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name)
|
||||||
|
|
||||||
|
# Log success
|
||||||
|
if initial_version == KclClientVersion.UPGRADE_FROM_2X.value:
|
||||||
|
print("\nRollback completed. Your application was running 2x compatible functionality.")
|
||||||
|
print("Please rollback to your previous application binaries by deploying the code with your previous KCL version.")
|
||||||
|
elif initial_version == KclClientVersion.VERSION_3X_WITH_ROLLBACK.value:
|
||||||
|
print("\nRollback completed. Your KCL Application was running 3x functionality and will rollback to 2x compatible functionality.")
|
||||||
|
print("If you don't see mitigation after a short period of time,"
|
||||||
|
" please rollback to your previous application binaries by deploying the code with your previous KCL version.")
|
||||||
|
elif initial_version == KclClientVersion.VERSION_2X.value:
|
||||||
|
print("\nApplication was already rolled back. Any KCLv3 resources that could be deleted were cleaned up"
|
||||||
|
" to avoid charges until the application can be rolled forward with migration.")
|
||||||
|
|
||||||
|
|
||||||
|
def perform_rollforward(dynamodb_client, coordinator_state_table_name):
|
||||||
|
"""
|
||||||
|
Perform KCL 3.0 migration roll-forward by updating MigrationState for the KCL application
|
||||||
|
|
||||||
|
:param dynamodb_client: Boto3 DynamoDB client
|
||||||
|
:param coordinator_state_table_name: Name of the DynamoDB table
|
||||||
|
"""
|
||||||
|
if not validate_tables(dynamodb_client, "Roll-forward", coordinator_state_table_name):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
initial_version, new_history = get_current_state(dynamodb_client,
|
||||||
|
coordinator_state_table_name)
|
||||||
|
except ClientError as e:
|
||||||
|
handle_get_item_client_error(e, "Roll-forward", coordinator_state_table_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not is_valid_version(version=initial_version, mode='rollforward'):
|
||||||
|
return
|
||||||
|
|
||||||
|
rollfoward_client_version(dynamodb_client, coordinator_state_table_name, new_history)
|
||||||
|
|
||||||
|
|
||||||
|
def run_kcl_migration(mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name):
|
||||||
|
"""
|
||||||
|
Update the MigrationState in CoordinatorState DDB Table
|
||||||
|
|
||||||
|
:param mode: Either 'rollback' or 'rollforward'
|
||||||
|
:param lease_table_name: Name of the DynamoDB KCL lease table
|
||||||
|
:param coordinator_state_table_name: Name of the DynamoDB coordinator state table
|
||||||
|
:param worker_metrics_table_name: Name of the DynamoDB worker metrics table
|
||||||
|
"""
|
||||||
|
dynamodb_client = boto3.client('dynamodb', config=config)
|
||||||
|
|
||||||
|
if mode == "rollback":
|
||||||
|
perform_rollback(
|
||||||
|
dynamodb_client,
|
||||||
|
lease_table_name,
|
||||||
|
coordinator_state_table_name,
|
||||||
|
worker_metrics_table_name
|
||||||
|
)
|
||||||
|
elif mode == "rollforward":
|
||||||
|
perform_rollforward(dynamodb_client, coordinator_state_table_name)
|
||||||
|
else:
|
||||||
|
print(f"Invalid mode: {mode}. Please use 'rollback' or 'rollforward'.")
|
||||||
|
|
||||||
|
|
||||||
|
def validate_args(args):
|
||||||
|
if args.mode == 'rollforward':
|
||||||
|
if not (args.application_name or args.coordinator_state_table_name):
|
||||||
|
raise ValueError(
|
||||||
|
"For rollforward mode, either application_name or "
|
||||||
|
"coordinator_state_table_name must be provided."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if args.application_name:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not (args.lease_table_name and
|
||||||
|
args.coordinator_state_table_name and
|
||||||
|
args.worker_metrics_table_name):
|
||||||
|
raise ValueError(
|
||||||
|
"For rollback mode, either application_name or all three table names "
|
||||||
|
"(lease_table_name, coordinator_state_table_name, and "
|
||||||
|
"worker_metrics_table_name) must be provided."
|
||||||
|
)
|
||||||
|
|
||||||
|
def process_table_names(args):
|
||||||
|
"""
|
||||||
|
Process command line arguments to determine table names based on mode.
|
||||||
|
Args:
|
||||||
|
args: Parsed command line arguments
|
||||||
|
Returns:
|
||||||
|
tuple: (mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name)
|
||||||
|
"""
|
||||||
|
mode_input = args.mode
|
||||||
|
application_name_input = args.application_name
|
||||||
|
|
||||||
|
coordinator_state_table_name_input = (args.coordinator_state_table_name or
|
||||||
|
application_name_input + DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX)
|
||||||
|
lease_table_name_input = None
|
||||||
|
worker_metrics_table_name_input = None
|
||||||
|
|
||||||
|
if mode_input == "rollback":
|
||||||
|
lease_table_name_input = args.lease_table_name or application_name_input
|
||||||
|
worker_metrics_table_name_input = (args.worker_metrics_table_name or
|
||||||
|
application_name_input + DEFAULT_WORKER_METRICS_TABLE_SUFFIX)
|
||||||
|
|
||||||
|
return (mode_input,
|
||||||
|
lease_table_name_input,
|
||||||
|
coordinator_state_table_name_input,
|
||||||
|
worker_metrics_table_name_input)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=
|
||||||
|
"""
|
||||||
|
KCL Migration Tool
|
||||||
|
This tool facilitates the migration and rollback processes for Amazon KCLv3 applications.
|
||||||
|
|
||||||
|
Before running this tool:
|
||||||
|
1. Ensure you have the necessary AWS permissions configured to access and modify the following:
|
||||||
|
- KCL application DynamoDB tables (lease table and coordinator state table)
|
||||||
|
|
||||||
|
2. Verify that your AWS credentials are properly set up in your environment or AWS config file.
|
||||||
|
|
||||||
|
3. Confirm that you have the correct KCL application name and lease table name (if configured in KCL).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
This tool supports two main operations: rollforward (upgrade) and rollback.
|
||||||
|
For detailed usage instructions, use the -h or --help option.
|
||||||
|
""",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||||
|
parser.add_argument("--mode", choices=['rollback', 'rollforward'], required=True,
|
||||||
|
help="Mode of operation: rollback or rollforward")
|
||||||
|
parser.add_argument("--application_name",
|
||||||
|
help="Name of the KCL application. This must match the application name "
|
||||||
|
"used in the KCL Library configurations.")
|
||||||
|
parser.add_argument("--lease_table_name",
|
||||||
|
help="Name of the DynamoDB lease table (defaults to applicationName)."
|
||||||
|
" If LeaseTable name was specified for the application as part of "
|
||||||
|
"the KCL configurations, the same name must be passed here.")
|
||||||
|
parser.add_argument("--coordinator_state_table_name",
|
||||||
|
help="Name of the DynamoDB coordinator state table "
|
||||||
|
"(defaults to applicationName-CoordinatorState)."
|
||||||
|
" If coordinator state table name was specified for the application "
|
||||||
|
"as part of the KCL configurations, the same name must be passed here.")
|
||||||
|
parser.add_argument("--worker_metrics_table_name",
|
||||||
|
help="Name of the DynamoDB worker metrics table "
|
||||||
|
"(defaults to applicationName-WorkerMetricStats)."
|
||||||
|
" If worker metrics table name was specified for the application "
|
||||||
|
"as part of the KCL configurations, the same name must be passed here.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
validate_args(args)
|
||||||
|
run_kcl_migration(*process_table_names(args))
|
||||||
|
|
@ -256,7 +256,8 @@ public class ConfigsBuilder {
|
||||||
* @return LeaseManagementConfig
|
* @return LeaseManagementConfig
|
||||||
*/
|
*/
|
||||||
public LeaseManagementConfig leaseManagementConfig() {
|
public LeaseManagementConfig leaseManagementConfig() {
|
||||||
return new LeaseManagementConfig(tableName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
|
return new LeaseManagementConfig(
|
||||||
|
tableName(), applicationName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.common;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.experimental.Accessors;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.Tag;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configurations of a DDB table created by KCL for its internal operations.
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Accessors(fluent = true)
|
||||||
|
@NoArgsConstructor
|
||||||
|
public class DdbTableConfig {
|
||||||
|
|
||||||
|
protected DdbTableConfig(final String applicationName, final String tableSuffix) {
|
||||||
|
this.tableName = applicationName + "-" + tableSuffix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* name to use for the DDB table. If null, it will default to
|
||||||
|
* applicationName-tableSuffix. If multiple KCL applications
|
||||||
|
* run in the same account, a unique tableName must be provided.
|
||||||
|
*/
|
||||||
|
private String tableName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Billing mode used to create the DDB table.
|
||||||
|
*/
|
||||||
|
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* read capacity to provision during DDB table creation,
|
||||||
|
* if billing mode is PROVISIONED.
|
||||||
|
*/
|
||||||
|
private long readCapacity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* write capacity to provision during DDB table creation,
|
||||||
|
* if billing mode is PROVISIONED.
|
||||||
|
*/
|
||||||
|
private long writeCapacity;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flag to enable Point in Time Recovery on the DDB table.
|
||||||
|
*/
|
||||||
|
private boolean pointInTimeRecoveryEnabled = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flag to enable deletion protection on the DDB table.
|
||||||
|
*/
|
||||||
|
private boolean deletionProtectionEnabled = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tags to add to the DDB table.
|
||||||
|
*/
|
||||||
|
private Collection<Tag> tags = Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
@ -15,10 +15,13 @@
|
||||||
package software.amazon.kinesis.common;
|
package software.amazon.kinesis.common;
|
||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.CompletionException;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
public class FutureUtils {
|
public class FutureUtils {
|
||||||
|
|
||||||
|
|
@ -31,4 +34,15 @@ public class FutureUtils {
|
||||||
throw te;
|
throw te;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static <T> T unwrappingFuture(final Supplier<CompletableFuture<T>> supplier) {
|
||||||
|
try {
|
||||||
|
return supplier.get().join();
|
||||||
|
} catch (CompletionException e) {
|
||||||
|
if (e.getCause() instanceof RuntimeException) {
|
||||||
|
throw (RuntimeException) e.getCause();
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
* Licensed under the Apache License, Version 2.0 (the
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
* "License"); you may not use this file except in compliance
|
* "License"); you may not use this file except in compliance
|
||||||
* with the License. You may obtain a copy of the License at
|
* with the License. You may obtain a copy of the License at
|
||||||
|
|
@ -12,18 +12,16 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
package software.amazon.kinesis.common;
|
||||||
|
|
||||||
package software.amazon.kinesis.leases.dynamodb;
|
public class StackTraceUtils {
|
||||||
|
public static String getPrintableStackTrace(final StackTraceElement[] stackTrace) {
|
||||||
|
final StringBuilder stackTraceString = new StringBuilder();
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
for (final StackTraceElement traceElement : stackTrace) {
|
||||||
import lombok.NoArgsConstructor;
|
stackTraceString.append("\tat ").append(traceElement).append("\n");
|
||||||
|
}
|
||||||
/**
|
|
||||||
* This class is just a holder for initial lease table IOPs units. This class will be removed in a future release.
|
return stackTraceString.toString();
|
||||||
*/
|
}
|
||||||
@Deprecated
|
|
||||||
@NoArgsConstructor(access = AccessLevel.PRIVATE)
|
|
||||||
public class TableConstants {
|
|
||||||
public static final long DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY = 10L;
|
|
||||||
public static final long DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY = 10L;
|
|
||||||
}
|
}
|
||||||
|
|
@ -18,6 +18,7 @@ package software.amazon.kinesis.coordinator;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.experimental.Accessors;
|
import lombok.experimental.Accessors;
|
||||||
|
import software.amazon.kinesis.common.DdbTableConfig;
|
||||||
import software.amazon.kinesis.leases.NoOpShardPrioritization;
|
import software.amazon.kinesis.leases.NoOpShardPrioritization;
|
||||||
import software.amazon.kinesis.leases.ShardPrioritization;
|
import software.amazon.kinesis.leases.ShardPrioritization;
|
||||||
|
|
||||||
|
|
@ -27,6 +28,14 @@ import software.amazon.kinesis.leases.ShardPrioritization;
|
||||||
@Data
|
@Data
|
||||||
@Accessors(fluent = true)
|
@Accessors(fluent = true)
|
||||||
public class CoordinatorConfig {
|
public class CoordinatorConfig {
|
||||||
|
|
||||||
|
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
|
||||||
|
|
||||||
|
public CoordinatorConfig(final String applicationName) {
|
||||||
|
this.applicationName = applicationName;
|
||||||
|
this.coordinatorStateTableConfig = new CoordinatorStateTableConfig(applicationName);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Application name used by checkpointer to checkpoint.
|
* Application name used by checkpointer to checkpoint.
|
||||||
*
|
*
|
||||||
|
|
@ -96,4 +105,53 @@ public class CoordinatorConfig {
|
||||||
* <p>Default value: 1000L</p>
|
* <p>Default value: 1000L</p>
|
||||||
*/
|
*/
|
||||||
private long schedulerInitializationBackoffTimeMillis = 1000L;
|
private long schedulerInitializationBackoffTimeMillis = 1000L;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Version the KCL needs to operate in. For more details check the KCLv3 migration
|
||||||
|
* documentation.
|
||||||
|
*/
|
||||||
|
public enum ClientVersionConfig {
|
||||||
|
/**
|
||||||
|
* For an application that was operating with previous KCLv2.x, during
|
||||||
|
* upgrade to KCLv3.x, a migration process is needed due to the incompatible
|
||||||
|
* changes between the 2 versions. During the migration process, application
|
||||||
|
* must use ClientVersion=CLIENT_VERSION_COMPATIBLE_WITH_2x so that it runs in
|
||||||
|
* a compatible mode until all workers in the cluster have upgraded to the version
|
||||||
|
* running 3.x version (which is determined based on workers emitting WorkerMetricStats)
|
||||||
|
* Once all known workers are in 3.x mode, the library auto toggles to 3.x mode;
|
||||||
|
* but prior to that it runs in a mode compatible with 2.x workers.
|
||||||
|
* This version also allows rolling back to the compatible mode from the
|
||||||
|
* auto-toggled 3.x mode.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X,
|
||||||
|
/**
|
||||||
|
* A new application operating with KCLv3.x will use this value. Also, an application
|
||||||
|
* that has successfully upgraded to 3.x version and no longer needs the ability
|
||||||
|
* for a rollback to a 2.x compatible version, will use this value. In this version,
|
||||||
|
* KCL will operate with new algorithms introduced in 3.x which is not compatible
|
||||||
|
* with prior versions. And once in this version, rollback to 2.x is not supported.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_CONFIG_3X,
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Client version KCL must operate in, by default it operates in 3.x version which is not
|
||||||
|
* compatible with prior versions.
|
||||||
|
*/
|
||||||
|
private ClientVersionConfig clientVersionConfig = ClientVersionConfig.CLIENT_VERSION_CONFIG_3X;
|
||||||
|
|
||||||
|
public static class CoordinatorStateTableConfig extends DdbTableConfig {
|
||||||
|
private CoordinatorStateTableConfig(final String applicationName) {
|
||||||
|
super(applicationName, "CoordinatorState");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration to control how the CoordinatorState DDB table is created, such as table name,
|
||||||
|
* billing mode, provisioned capacity. If no table name is specified, the table name will
|
||||||
|
* default to applicationName-CoordinatorState. If no billing more is chosen, default is
|
||||||
|
* On-Demand.
|
||||||
|
*/
|
||||||
|
@NonNull
|
||||||
|
private final CoordinatorStateTableConfig coordinatorStateTableConfig;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DataModel for CoordinatorState, this data model is used to store various state information required
|
||||||
|
* for coordination across the KCL worker fleet. Therefore, the model follows a flexible schema.
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor(access = AccessLevel.PRIVATE)
|
||||||
|
@Slf4j
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
public class CoordinatorState {
|
||||||
|
public static final String COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME = "key";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Key value for the item in the CoordinatorState table used for leader
|
||||||
|
* election among the KCL workers. The attributes relevant to this item
|
||||||
|
* is dictated by the DDB Lock client implementation that is used to
|
||||||
|
* provide mutual exclusion.
|
||||||
|
*/
|
||||||
|
public static final String LEADER_HASH_KEY = "Leader";
|
||||||
|
|
||||||
|
private String key;
|
||||||
|
|
||||||
|
private Map<String, AttributeValue> attributes;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,425 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions;
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions.AmazonDynamoDBLockClientOptionsBuilder;
|
||||||
|
import lombok.NonNull;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.MapUtils;
|
||||||
|
import software.amazon.awssdk.core.waiters.WaiterResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DynamoDbException;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.KeyType;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.TableDescription;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.TableStatus;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter;
|
||||||
|
import software.amazon.awssdk.utils.CollectionUtils;
|
||||||
|
import software.amazon.kinesis.common.FutureUtils;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
|
||||||
|
import software.amazon.kinesis.coordinator.migration.MigrationState;
|
||||||
|
import software.amazon.kinesis.leases.DynamoUtils;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
import software.amazon.kinesis.utils.DdbUtil;
|
||||||
|
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture;
|
||||||
|
import static software.amazon.kinesis.coordinator.CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data Access Object to abstract accessing {@link CoordinatorState} from
|
||||||
|
* the CoordinatorState DDB table.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
public class CoordinatorStateDAO {
|
||||||
|
private final DynamoDbAsyncClient dynamoDbAsyncClient;
|
||||||
|
private final DynamoDbClient dynamoDbSyncClient;
|
||||||
|
|
||||||
|
private final CoordinatorStateTableConfig config;
|
||||||
|
|
||||||
|
public CoordinatorStateDAO(
|
||||||
|
final DynamoDbAsyncClient dynamoDbAsyncClient, final CoordinatorStateTableConfig config) {
|
||||||
|
this.dynamoDbAsyncClient = dynamoDbAsyncClient;
|
||||||
|
this.config = config;
|
||||||
|
this.dynamoDbSyncClient = createDelegateClient();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize() throws DependencyException {
|
||||||
|
createTableIfNotExists();
|
||||||
|
}
|
||||||
|
|
||||||
|
private DynamoDbClient createDelegateClient() {
|
||||||
|
return new DynamoDbAsyncToSyncClientAdapter(dynamoDbAsyncClient);
|
||||||
|
}
|
||||||
|
|
||||||
|
public AmazonDynamoDBLockClientOptionsBuilder getDDBLockClientOptionsBuilder() {
|
||||||
|
return AmazonDynamoDBLockClientOptions.builder(dynamoDbSyncClient, config.tableName())
|
||||||
|
.withPartitionKeyName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all the {@link CoordinatorState} from the DDB table synchronously
|
||||||
|
*
|
||||||
|
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||||
|
* @throws InvalidStateException if ddb table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||||
|
*
|
||||||
|
* @return list of state
|
||||||
|
*/
|
||||||
|
public List<CoordinatorState> listCoordinatorState()
|
||||||
|
throws ProvisionedThroughputException, DependencyException, InvalidStateException {
|
||||||
|
log.debug("Listing coordinatorState");
|
||||||
|
|
||||||
|
final ScanRequest request =
|
||||||
|
ScanRequest.builder().tableName(config.tableName()).build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
ScanResponse response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(request));
|
||||||
|
final List<CoordinatorState> stateList = new ArrayList<>();
|
||||||
|
while (Objects.nonNull(response)) {
|
||||||
|
log.debug("Scan response {}", response);
|
||||||
|
|
||||||
|
response.items().stream().map(this::fromDynamoRecord).forEach(stateList::add);
|
||||||
|
if (!CollectionUtils.isNullOrEmpty(response.lastEvaluatedKey())) {
|
||||||
|
final ScanRequest continuationRequest = request.toBuilder()
|
||||||
|
.exclusiveStartKey(response.lastEvaluatedKey())
|
||||||
|
.build();
|
||||||
|
log.debug("Scan request {}", continuationRequest);
|
||||||
|
response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(continuationRequest));
|
||||||
|
} else {
|
||||||
|
log.debug("Scan finished");
|
||||||
|
response = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stateList;
|
||||||
|
} catch (final ProvisionedThroughputExceededException e) {
|
||||||
|
log.warn(
|
||||||
|
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||||
|
+ " on the table.",
|
||||||
|
config.tableName());
|
||||||
|
throw new ProvisionedThroughputException(e);
|
||||||
|
} catch (final ResourceNotFoundException e) {
|
||||||
|
throw new InvalidStateException(
|
||||||
|
String.format("Cannot list coordinatorState, because table %s does not exist", config.tableName()));
|
||||||
|
} catch (final DynamoDbException e) {
|
||||||
|
throw new DependencyException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new {@link CoordinatorState} if it does not exist.
|
||||||
|
* @param state the state to create
|
||||||
|
* @return true if state was created, false if it already exists
|
||||||
|
*
|
||||||
|
* @throws DependencyException if DynamoDB put fails in an unexpected way
|
||||||
|
* @throws InvalidStateException if lease table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB put fails due to lack of capacity
|
||||||
|
*/
|
||||||
|
public boolean createCoordinatorStateIfNotExists(final CoordinatorState state)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
log.debug("Creating coordinatorState {}", state);
|
||||||
|
|
||||||
|
final PutItemRequest request = PutItemRequest.builder()
|
||||||
|
.tableName(config.tableName())
|
||||||
|
.item(toDynamoRecord(state))
|
||||||
|
.expected(getDynamoNonExistentExpectation())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.putItem(request));
|
||||||
|
} catch (final ConditionalCheckFailedException e) {
|
||||||
|
log.info("Not creating coordinator state because the key already exists");
|
||||||
|
return false;
|
||||||
|
} catch (final ProvisionedThroughputExceededException e) {
|
||||||
|
log.warn(
|
||||||
|
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||||
|
+ " on the table.",
|
||||||
|
config.tableName());
|
||||||
|
throw new ProvisionedThroughputException(e);
|
||||||
|
} catch (final ResourceNotFoundException e) {
|
||||||
|
throw new InvalidStateException(String.format(
|
||||||
|
"Cannot create coordinatorState %s, because table %s does not exist", state, config.tableName()));
|
||||||
|
} catch (final DynamoDbException e) {
|
||||||
|
throw new DependencyException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Created CoordinatorState: {}", state);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param key Get the CoordinatorState for this key
|
||||||
|
*
|
||||||
|
* @throws InvalidStateException if ddb table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB get fails due to lack of capacity
|
||||||
|
* @throws DependencyException if DynamoDB get fails in an unexpected way
|
||||||
|
*
|
||||||
|
* @return state for the specified key, or null if one doesn't exist
|
||||||
|
*/
|
||||||
|
public CoordinatorState getCoordinatorState(@NonNull final String key)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
log.debug("Getting coordinatorState with key {}", key);
|
||||||
|
|
||||||
|
final GetItemRequest request = GetItemRequest.builder()
|
||||||
|
.tableName(config.tableName())
|
||||||
|
.key(getCoordinatorStateKey(key))
|
||||||
|
.consistentRead(true)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
final GetItemResponse result = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.getItem(request));
|
||||||
|
|
||||||
|
final Map<String, AttributeValue> dynamoRecord = result.item();
|
||||||
|
if (CollectionUtils.isNullOrEmpty(dynamoRecord)) {
|
||||||
|
log.debug("No coordinatorState found with key {}, returning null.", key);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return fromDynamoRecord(dynamoRecord);
|
||||||
|
} catch (final ProvisionedThroughputExceededException e) {
|
||||||
|
log.warn(
|
||||||
|
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||||
|
+ " on the table.",
|
||||||
|
config.tableName());
|
||||||
|
throw new ProvisionedThroughputException(e);
|
||||||
|
} catch (final ResourceNotFoundException e) {
|
||||||
|
throw new InvalidStateException(String.format(
|
||||||
|
"Cannot get coordinatorState for key %s, because table %s does not exist",
|
||||||
|
key, config.tableName()));
|
||||||
|
} catch (final DynamoDbException e) {
|
||||||
|
throw new DependencyException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update fields of the given coordinator state in DynamoDB. Conditional on the provided expectation.
|
||||||
|
*
|
||||||
|
* @return true if update succeeded, false otherwise when expectations are not met
|
||||||
|
*
|
||||||
|
* @throws InvalidStateException if table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
|
||||||
|
* @throws DependencyException if DynamoDB update fails in an unexpected way
|
||||||
|
*/
|
||||||
|
public boolean updateCoordinatorStateWithExpectation(
|
||||||
|
@NonNull final CoordinatorState state, final Map<String, ExpectedAttributeValue> expectations)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
final Map<String, ExpectedAttributeValue> expectationMap = getDynamoExistentExpectation(state.getKey());
|
||||||
|
expectationMap.putAll(MapUtils.emptyIfNull(expectations));
|
||||||
|
|
||||||
|
final Map<String, AttributeValueUpdate> updateMap = getDynamoCoordinatorStateUpdate(state);
|
||||||
|
|
||||||
|
final UpdateItemRequest request = UpdateItemRequest.builder()
|
||||||
|
.tableName(config.tableName())
|
||||||
|
.key(getCoordinatorStateKey(state.getKey()))
|
||||||
|
.expected(expectationMap)
|
||||||
|
.attributeUpdates(updateMap)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.updateItem(request));
|
||||||
|
} catch (final ConditionalCheckFailedException e) {
|
||||||
|
log.debug("CoordinatorState update {} failed because conditions were not met", state);
|
||||||
|
return false;
|
||||||
|
} catch (final ProvisionedThroughputExceededException e) {
|
||||||
|
log.warn(
|
||||||
|
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||||
|
+ " on the table.",
|
||||||
|
config.tableName());
|
||||||
|
throw new ProvisionedThroughputException(e);
|
||||||
|
} catch (final ResourceNotFoundException e) {
|
||||||
|
throw new InvalidStateException(String.format(
|
||||||
|
"Cannot update coordinatorState for key %s, because table %s does not exist",
|
||||||
|
state.getKey(), config.tableName()));
|
||||||
|
} catch (final DynamoDbException e) {
|
||||||
|
throw new DependencyException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Coordinator state updated {}", state);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createTableIfNotExists() throws DependencyException {
|
||||||
|
TableDescription tableDescription = getTableDescription();
|
||||||
|
if (tableDescription == null) {
|
||||||
|
final CreateTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.createTable(getRequest()));
|
||||||
|
tableDescription = response.tableDescription();
|
||||||
|
log.info("DDB Table: {} created", config.tableName());
|
||||||
|
} else {
|
||||||
|
log.info("Skipping DDB table {} creation as it already exists", config.tableName());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tableDescription.tableStatus() != TableStatus.ACTIVE) {
|
||||||
|
log.info("Waiting for DDB Table: {} to become active", config.tableName());
|
||||||
|
try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) {
|
||||||
|
final WaiterResponse<DescribeTableResponse> response =
|
||||||
|
unwrappingFuture(() -> waiter.waitUntilTableExists(
|
||||||
|
r -> r.tableName(config.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10))));
|
||||||
|
response.matched()
|
||||||
|
.response()
|
||||||
|
.orElseThrow(() -> new DependencyException(new IllegalStateException(
|
||||||
|
"Creating CoordinatorState table timed out",
|
||||||
|
response.matched().exception().orElse(null))));
|
||||||
|
}
|
||||||
|
unwrappingFuture(() -> DdbUtil.pitrEnabler(config, dynamoDbAsyncClient));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private CreateTableRequest getRequest() {
|
||||||
|
final CreateTableRequest.Builder requestBuilder = CreateTableRequest.builder()
|
||||||
|
.tableName(config.tableName())
|
||||||
|
.keySchema(KeySchemaElement.builder()
|
||||||
|
.attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
|
||||||
|
.keyType(KeyType.HASH)
|
||||||
|
.build())
|
||||||
|
.attributeDefinitions(AttributeDefinition.builder()
|
||||||
|
.attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
|
||||||
|
.attributeType(ScalarAttributeType.S)
|
||||||
|
.build())
|
||||||
|
.deletionProtectionEnabled(config.deletionProtectionEnabled());
|
||||||
|
|
||||||
|
if (nonNull(config.tags()) && !config.tags().isEmpty()) {
|
||||||
|
requestBuilder.tags(config.tags());
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (config.billingMode()) {
|
||||||
|
case PAY_PER_REQUEST:
|
||||||
|
requestBuilder.billingMode(BillingMode.PAY_PER_REQUEST);
|
||||||
|
break;
|
||||||
|
case PROVISIONED:
|
||||||
|
requestBuilder.billingMode(BillingMode.PROVISIONED);
|
||||||
|
|
||||||
|
final ProvisionedThroughput throughput = ProvisionedThroughput.builder()
|
||||||
|
.readCapacityUnits(config.readCapacity())
|
||||||
|
.writeCapacityUnits(config.writeCapacity())
|
||||||
|
.build();
|
||||||
|
requestBuilder.provisionedThroughput(throughput);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return requestBuilder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, AttributeValue> getCoordinatorStateKey(@NonNull final String key) {
|
||||||
|
return Collections.singletonMap(
|
||||||
|
COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
private CoordinatorState fromDynamoRecord(final Map<String, AttributeValue> dynamoRecord) {
|
||||||
|
final HashMap<String, AttributeValue> attributes = new HashMap<>(dynamoRecord);
|
||||||
|
final String keyValue =
|
||||||
|
DynamoUtils.safeGetString(attributes.remove(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME));
|
||||||
|
|
||||||
|
final MigrationState migrationState = MigrationState.deserialize(keyValue, attributes);
|
||||||
|
if (migrationState != null) {
|
||||||
|
log.debug("Retrieved MigrationState {}", migrationState);
|
||||||
|
return migrationState;
|
||||||
|
}
|
||||||
|
|
||||||
|
final CoordinatorState c =
|
||||||
|
CoordinatorState.builder().key(keyValue).attributes(attributes).build();
|
||||||
|
log.debug("Retrieved coordinatorState {}", c);
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, AttributeValue> toDynamoRecord(final CoordinatorState state) {
|
||||||
|
final Map<String, AttributeValue> result = new HashMap<>();
|
||||||
|
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(state.getKey()));
|
||||||
|
if (state instanceof MigrationState) {
|
||||||
|
result.putAll(((MigrationState) state).serialize());
|
||||||
|
}
|
||||||
|
if (!CollectionUtils.isNullOrEmpty(state.getAttributes())) {
|
||||||
|
result.putAll(state.getAttributes());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, ExpectedAttributeValue> getDynamoNonExistentExpectation() {
|
||||||
|
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||||
|
|
||||||
|
final ExpectedAttributeValue expectedAV =
|
||||||
|
ExpectedAttributeValue.builder().exists(false).build();
|
||||||
|
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, ExpectedAttributeValue> getDynamoExistentExpectation(final String keyValue) {
|
||||||
|
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||||
|
|
||||||
|
final ExpectedAttributeValue expectedAV = ExpectedAttributeValue.builder()
|
||||||
|
.value(AttributeValue.fromS(keyValue))
|
||||||
|
.build();
|
||||||
|
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, AttributeValueUpdate> getDynamoCoordinatorStateUpdate(final CoordinatorState state) {
|
||||||
|
final HashMap<String, AttributeValueUpdate> updates = new HashMap<>();
|
||||||
|
if (state instanceof MigrationState) {
|
||||||
|
updates.putAll(((MigrationState) state).getDynamoUpdate());
|
||||||
|
}
|
||||||
|
state.getAttributes()
|
||||||
|
.forEach((attribute, value) -> updates.put(
|
||||||
|
attribute,
|
||||||
|
AttributeValueUpdate.builder()
|
||||||
|
.value(value)
|
||||||
|
.action(AttributeAction.PUT)
|
||||||
|
.build()));
|
||||||
|
return updates;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TableDescription getTableDescription() {
|
||||||
|
try {
|
||||||
|
final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(
|
||||||
|
DescribeTableRequest.builder().tableName(config.tableName()).build()));
|
||||||
|
return response.table();
|
||||||
|
} catch (final ResourceNotFoundException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,12 +28,17 @@ import java.util.function.BooleanSupplier;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
import software.amazon.awssdk.utils.CollectionUtils;
|
import software.amazon.awssdk.utils.CollectionUtils;
|
||||||
import software.amazon.kinesis.leases.Lease;
|
import software.amazon.kinesis.leases.Lease;
|
||||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An implementation of the {@code LeaderDecider} to elect leader(s) based on workerId.
|
* An implementation of the {@code LeaderDecider} to elect leader(s) based on workerId.
|
||||||
|
|
@ -46,7 +51,7 @@ import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
* This ensures redundancy for shard-sync during host failures.
|
* This ensures redundancy for shard-sync during host failures.
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
public class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||||
// Fixed seed so that the shuffle order is preserved across workers
|
// Fixed seed so that the shuffle order is preserved across workers
|
||||||
static final int DETERMINISTIC_SHUFFLE_SEED = 1947;
|
static final int DETERMINISTIC_SHUFFLE_SEED = 1947;
|
||||||
|
|
||||||
|
|
@ -59,6 +64,7 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||||
private final LeaseRefresher leaseRefresher;
|
private final LeaseRefresher leaseRefresher;
|
||||||
private final int numPeriodicShardSyncWorkers;
|
private final int numPeriodicShardSyncWorkers;
|
||||||
private final ScheduledExecutorService leaderElectionThreadPool;
|
private final ScheduledExecutorService leaderElectionThreadPool;
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
|
||||||
private volatile Set<String> leaders;
|
private volatile Set<String> leaders;
|
||||||
|
|
||||||
|
|
@ -67,11 +73,17 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||||
* @param leaderElectionThreadPool Thread-pool to be used for leaderElection.
|
* @param leaderElectionThreadPool Thread-pool to be used for leaderElection.
|
||||||
* @param numPeriodicShardSyncWorkers Number of leaders that will be elected to perform periodic shard syncs.
|
* @param numPeriodicShardSyncWorkers Number of leaders that will be elected to perform periodic shard syncs.
|
||||||
*/
|
*/
|
||||||
DeterministicShuffleShardSyncLeaderDecider(
|
public DeterministicShuffleShardSyncLeaderDecider(
|
||||||
LeaseRefresher leaseRefresher,
|
LeaseRefresher leaseRefresher,
|
||||||
ScheduledExecutorService leaderElectionThreadPool,
|
ScheduledExecutorService leaderElectionThreadPool,
|
||||||
int numPeriodicShardSyncWorkers) {
|
int numPeriodicShardSyncWorkers,
|
||||||
this(leaseRefresher, leaderElectionThreadPool, numPeriodicShardSyncWorkers, new ReentrantReadWriteLock());
|
MetricsFactory metricsFactory) {
|
||||||
|
this(
|
||||||
|
leaseRefresher,
|
||||||
|
leaderElectionThreadPool,
|
||||||
|
numPeriodicShardSyncWorkers,
|
||||||
|
new ReentrantReadWriteLock(),
|
||||||
|
metricsFactory);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -84,11 +96,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||||
LeaseRefresher leaseRefresher,
|
LeaseRefresher leaseRefresher,
|
||||||
ScheduledExecutorService leaderElectionThreadPool,
|
ScheduledExecutorService leaderElectionThreadPool,
|
||||||
int numPeriodicShardSyncWorkers,
|
int numPeriodicShardSyncWorkers,
|
||||||
ReadWriteLock readWriteLock) {
|
ReadWriteLock readWriteLock,
|
||||||
|
MetricsFactory metricsFactory) {
|
||||||
this.leaseRefresher = leaseRefresher;
|
this.leaseRefresher = leaseRefresher;
|
||||||
this.leaderElectionThreadPool = leaderElectionThreadPool;
|
this.leaderElectionThreadPool = leaderElectionThreadPool;
|
||||||
this.numPeriodicShardSyncWorkers = numPeriodicShardSyncWorkers;
|
this.numPeriodicShardSyncWorkers = numPeriodicShardSyncWorkers;
|
||||||
this.readWriteLock = readWriteLock;
|
this.readWriteLock = readWriteLock;
|
||||||
|
this.metricsFactory = metricsFactory;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -146,8 +160,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||||
ELECTION_SCHEDULING_INTERVAL_MILLIS,
|
ELECTION_SCHEDULING_INTERVAL_MILLIS,
|
||||||
TimeUnit.MILLISECONDS);
|
TimeUnit.MILLISECONDS);
|
||||||
}
|
}
|
||||||
|
final boolean response = executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
|
||||||
return executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
|
final MetricsScope metricsScope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
|
||||||
|
metricsScope.addData(
|
||||||
|
METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
|
||||||
|
MetricsUtil.endScope(metricsScope);
|
||||||
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,403 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.ScheduledFuture;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.experimental.Accessors;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode;
|
||||||
|
import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
|
||||||
|
import software.amazon.kinesis.coordinator.migration.ClientVersion;
|
||||||
|
import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
|
||||||
|
import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
|
||||||
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsReporter;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
|
||||||
|
import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||||
|
import static software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager.DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is responsible for initializing the KCL components that supports
|
||||||
|
* seamless upgrade from v2.x to v3.x.
|
||||||
|
* During specific versions, it also dynamically switches the functionality
|
||||||
|
* to be either vanilla 3.x or 2.x compatible.
|
||||||
|
*
|
||||||
|
* It is responsible for creating:
|
||||||
|
* 1. LeaderDecider
|
||||||
|
* 2. LAM
|
||||||
|
* 3. WorkerMetricStatsReporter
|
||||||
|
*
|
||||||
|
* It manages initializing the following components at initialization time
|
||||||
|
* 1. workerMetricsDAO and workerMetricsManager
|
||||||
|
* 2. leaderDecider
|
||||||
|
* 3. MigrationAdaptiveLeaseAssignmentModeProvider
|
||||||
|
*
|
||||||
|
* It updates the following components dynamically:
|
||||||
|
* 1. starts/stops LAM
|
||||||
|
* 2. starts/stops WorkerMetricStatsReporter
|
||||||
|
* 3. updates LeaseAssignmentMode to either DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT or WORKER_UTILIZATION_AWARE_ASSIGNMENT
|
||||||
|
* 4. creates GSI (deletion is done by KclMigrationTool)
|
||||||
|
* 5. creates WorkerMetricStats table (deletion is done by KclMigrationTool)
|
||||||
|
* 6. updates LeaderDecider to either DeterministicShuffleShardSyncLeaderDecider or DynamoDBLockBasedLeaderDecider
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@ThreadSafe
|
||||||
|
@Accessors(fluent = true)
|
||||||
|
public final class DynamicMigrationComponentsInitializer {
|
||||||
|
private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final LeaseRefresher leaseRefresher;
|
||||||
|
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ScheduledExecutorService workerMetricsThreadPool;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final WorkerMetricStatsDAO workerMetricsDAO;
|
||||||
|
|
||||||
|
private final WorkerMetricStatsManager workerMetricsManager;
|
||||||
|
private final ScheduledExecutorService lamThreadPool;
|
||||||
|
private final BiFunction<ScheduledExecutorService, LeaderDecider, LeaseAssignmentManager> lamCreator;
|
||||||
|
private final Supplier<MigrationAdaptiveLeaderDecider> adaptiveLeaderDeciderCreator;
|
||||||
|
private final Supplier<DeterministicShuffleShardSyncLeaderDecider> deterministicLeaderDeciderCreator;
|
||||||
|
private final Supplier<DynamoDBLockBasedLeaderDecider> ddbLockBasedLeaderDeciderCreator;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final String workerIdentifier;
|
||||||
|
|
||||||
|
private final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final long workerMetricsExpirySeconds;
|
||||||
|
|
||||||
|
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseModeChangeConsumer;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private LeaderDecider leaderDecider;
|
||||||
|
|
||||||
|
private LeaseAssignmentManager leaseAssignmentManager;
|
||||||
|
private ScheduledFuture<?> workerMetricsReporterFuture;
|
||||||
|
private LeaseAssignmentMode currentAssignmentMode;
|
||||||
|
private boolean dualMode;
|
||||||
|
private boolean initialized;
|
||||||
|
|
||||||
|
@Builder(access = AccessLevel.PACKAGE)
|
||||||
|
DynamicMigrationComponentsInitializer(
|
||||||
|
final MetricsFactory metricsFactory,
|
||||||
|
final LeaseRefresher leaseRefresher,
|
||||||
|
final CoordinatorStateDAO coordinatorStateDAO,
|
||||||
|
final ScheduledExecutorService workerMetricsThreadPool,
|
||||||
|
final WorkerMetricStatsDAO workerMetricsDAO,
|
||||||
|
final WorkerMetricStatsManager workerMetricsManager,
|
||||||
|
final ScheduledExecutorService lamThreadPool,
|
||||||
|
final BiFunction<ScheduledExecutorService, LeaderDecider, LeaseAssignmentManager> lamCreator,
|
||||||
|
final Supplier<MigrationAdaptiveLeaderDecider> adaptiveLeaderDeciderCreator,
|
||||||
|
final Supplier<DeterministicShuffleShardSyncLeaderDecider> deterministicLeaderDeciderCreator,
|
||||||
|
final Supplier<DynamoDBLockBasedLeaderDecider> ddbLockBasedLeaderDeciderCreator,
|
||||||
|
final String workerIdentifier,
|
||||||
|
final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
|
||||||
|
final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) {
|
||||||
|
this.metricsFactory = metricsFactory;
|
||||||
|
this.leaseRefresher = leaseRefresher;
|
||||||
|
this.coordinatorStateDAO = coordinatorStateDAO;
|
||||||
|
this.workerIdentifier = workerIdentifier;
|
||||||
|
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
|
||||||
|
this.workerMetricsExpirySeconds = Duration.ofMillis(DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
|
||||||
|
* workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis())
|
||||||
|
.getSeconds();
|
||||||
|
this.workerMetricsManager = workerMetricsManager;
|
||||||
|
this.workerMetricsDAO = workerMetricsDAO;
|
||||||
|
this.workerMetricsThreadPool = workerMetricsThreadPool;
|
||||||
|
this.lamThreadPool = lamThreadPool;
|
||||||
|
this.lamCreator = lamCreator;
|
||||||
|
this.adaptiveLeaderDeciderCreator = adaptiveLeaderDeciderCreator;
|
||||||
|
this.deterministicLeaderDeciderCreator = deterministicLeaderDeciderCreator;
|
||||||
|
this.ddbLockBasedLeaderDeciderCreator = ddbLockBasedLeaderDeciderCreator;
|
||||||
|
this.leaseModeChangeConsumer = leaseAssignmentModeProvider;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(final ClientVersion migrationStateMachineStartingClientVersion) throws DependencyException {
|
||||||
|
if (initialized) {
|
||||||
|
log.info("Already initialized, nothing to do");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// always collect metrics so that when we flip to start reporting we will have accurate historical data.
|
||||||
|
log.info("Start collection of WorkerMetricStats");
|
||||||
|
workerMetricsManager.startManager();
|
||||||
|
if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X) {
|
||||||
|
initializeComponentsFor3x();
|
||||||
|
} else {
|
||||||
|
initializeComponentsForMigration(migrationStateMachineStartingClientVersion);
|
||||||
|
}
|
||||||
|
log.info("Initialized dual mode {} current assignment mode {}", dualMode, currentAssignmentMode);
|
||||||
|
|
||||||
|
log.info("Creating LAM");
|
||||||
|
leaseAssignmentManager = lamCreator.apply(lamThreadPool, leaderDecider);
|
||||||
|
log.info("Initializing {}", leaseModeChangeConsumer.getClass().getSimpleName());
|
||||||
|
leaseModeChangeConsumer.initialize(dualMode, currentAssignmentMode);
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeComponentsFor3x() {
|
||||||
|
log.info("Initializing for 3x functionality");
|
||||||
|
dualMode = false;
|
||||||
|
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||||
|
log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
|
||||||
|
leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
|
||||||
|
log.info("Initializing {}", leaderDecider.getClass().getSimpleName());
|
||||||
|
leaderDecider.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeComponentsForMigration(final ClientVersion migrationStateMachineStartingClientVersion) {
|
||||||
|
log.info("Initializing for migration to 3x");
|
||||||
|
dualMode = true;
|
||||||
|
final LeaderDecider initialLeaderDecider;
|
||||||
|
if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) {
|
||||||
|
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||||
|
initialLeaderDecider = ddbLockBasedLeaderDeciderCreator.get();
|
||||||
|
} else {
|
||||||
|
currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
|
||||||
|
initialLeaderDecider = deterministicLeaderDeciderCreator.get();
|
||||||
|
}
|
||||||
|
log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
|
||||||
|
|
||||||
|
final MigrationAdaptiveLeaderDecider adaptiveLeaderDecider = adaptiveLeaderDeciderCreator.get();
|
||||||
|
log.info(
|
||||||
|
"Initializing MigrationAdaptiveLeaderDecider with {}",
|
||||||
|
initialLeaderDecider.getClass().getSimpleName());
|
||||||
|
adaptiveLeaderDecider.updateLeaderDecider(initialLeaderDecider);
|
||||||
|
this.leaderDecider = adaptiveLeaderDecider;
|
||||||
|
}
|
||||||
|
|
||||||
|
void shutdown() {
|
||||||
|
log.info("Shutting down components");
|
||||||
|
if (initialized) {
|
||||||
|
log.info("Stopping LAM, LeaderDecider, workerMetrics reporting and collection");
|
||||||
|
leaseAssignmentManager.stop();
|
||||||
|
// leader decider is shut down later when scheduler is doing a final shutdown
|
||||||
|
// since scheduler still accesses the leader decider while shutting down
|
||||||
|
stopWorkerMetricsReporter();
|
||||||
|
workerMetricsManager.stopManager();
|
||||||
|
}
|
||||||
|
|
||||||
|
// lam does not manage lifecycle of its threadpool to easily stop/start dynamically.
|
||||||
|
// once migration code is obsolete (i.e. all 3x functionality is the baseline and no
|
||||||
|
// migration is needed), it can be moved inside lam
|
||||||
|
log.info("Shutting down lamThreadPool and workerMetrics reporter thread pool");
|
||||||
|
lamThreadPool.shutdown();
|
||||||
|
workerMetricsThreadPool.shutdown();
|
||||||
|
try {
|
||||||
|
if (!lamThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
|
||||||
|
lamThreadPool.shutdownNow();
|
||||||
|
}
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
log.warn("Interrupted while waiting for shutdown of LeaseAssignmentManager ThreadPool", e);
|
||||||
|
lamThreadPool.shutdownNow();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!workerMetricsThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
|
||||||
|
workerMetricsThreadPool.shutdownNow();
|
||||||
|
}
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
log.warn("Interrupted while waiting for shutdown of WorkerMetricStatsManager ThreadPool", e);
|
||||||
|
workerMetricsThreadPool.shutdownNow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startWorkerMetricsReporting() throws DependencyException {
|
||||||
|
if (workerMetricsReporterFuture != null) {
|
||||||
|
log.info("Worker metrics reporting is already running...");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
log.info("Initializing WorkerMetricStats");
|
||||||
|
this.workerMetricsDAO.initialize();
|
||||||
|
log.info("Starting worker metrics reporter");
|
||||||
|
// Start with a delay for workerStatsManager to capture some values and start reporting.
|
||||||
|
workerMetricsReporterFuture = workerMetricsThreadPool.scheduleAtFixedRate(
|
||||||
|
new WorkerMetricStatsReporter(metricsFactory, workerIdentifier, workerMetricsManager, workerMetricsDAO),
|
||||||
|
workerUtilizationAwareAssignmentConfig.inMemoryWorkerMetricsCaptureFrequencyMillis() * 2L,
|
||||||
|
workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis(),
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void stopWorkerMetricsReporter() {
|
||||||
|
log.info("Stopping worker metrics reporter");
|
||||||
|
if (workerMetricsReporterFuture != null) {
|
||||||
|
workerMetricsReporterFuture.cancel(false);
|
||||||
|
workerMetricsReporterFuture = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create LeaseOwnerToLeaseKey GSI for the lease table
|
||||||
|
* @param blockingWait whether to wait for the GSI creation or not, if false, the gsi creation will be initiated
|
||||||
|
* but this call will not block for its creation
|
||||||
|
* @throws DependencyException If DDB fails unexpectedly when creating the GSI
|
||||||
|
*/
|
||||||
|
private void createGsi(final boolean blockingWait) throws DependencyException {
|
||||||
|
log.info("Creating Lease table GSI if it does not exist");
|
||||||
|
// KCLv3.0 always starts with GSI available
|
||||||
|
leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists();
|
||||||
|
|
||||||
|
if (blockingWait) {
|
||||||
|
log.info("Waiting for Lease table GSI creation");
|
||||||
|
final long secondsBetweenPolls = 10L;
|
||||||
|
final long timeoutSeconds = 600L;
|
||||||
|
final boolean isIndexActive =
|
||||||
|
leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(secondsBetweenPolls, timeoutSeconds);
|
||||||
|
|
||||||
|
if (!isIndexActive) {
|
||||||
|
throw new DependencyException(
|
||||||
|
new IllegalStateException("Creating LeaseOwnerToLeaseKeyIndex on Lease table timed out"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize KCL with components and configuration to support upgrade from 2x. This can happen
|
||||||
|
* at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X.
|
||||||
|
* Or Dynamically during roll-forward from ClientVersion.CLIENT_VERSION_2X.
|
||||||
|
*/
|
||||||
|
public synchronized void initializeClientVersionForUpgradeFrom2x(final ClientVersion fromClientVersion)
|
||||||
|
throws DependencyException {
|
||||||
|
log.info("Initializing KCL components for upgrade from 2x from {}", fromClientVersion);
|
||||||
|
|
||||||
|
createGsi(false);
|
||||||
|
startWorkerMetricsReporting();
|
||||||
|
// LAM is not started until the dynamic flip to 3xWithRollback
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize KCL with components and configuration to run vanilla 3x functionality. This can happen
|
||||||
|
* at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X, or dynamically
|
||||||
|
* during a new deployment when existing worker are in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK
|
||||||
|
*/
|
||||||
|
public synchronized void initializeClientVersionFor3x(final ClientVersion fromClientVersion)
|
||||||
|
throws DependencyException {
|
||||||
|
log.info("Initializing KCL components for 3x from {}", fromClientVersion);
|
||||||
|
|
||||||
|
log.info("Initializing LeaseAssignmentManager, DDB-lock-based leader decider, WorkerMetricStats manager"
|
||||||
|
+ " and creating the Lease table GSI if it does not exist");
|
||||||
|
if (fromClientVersion == ClientVersion.CLIENT_VERSION_INIT) {
|
||||||
|
// gsi may already exist and be active for migrated application.
|
||||||
|
createGsi(true);
|
||||||
|
startWorkerMetricsReporting();
|
||||||
|
log.info("Starting LAM");
|
||||||
|
leaseAssignmentManager.start();
|
||||||
|
}
|
||||||
|
// nothing to do when transitioning from CLIENT_VERSION_3X_WITH_ROLLBACK.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize KCL with components and configuration to run 2x compatible functionality
|
||||||
|
* while allowing roll-forward. This can happen at KCL Worker startup when MigrationStateMachine
|
||||||
|
* starts in ClientVersion.CLIENT_VERSION_2X (after a rollback)
|
||||||
|
* Or Dynamically during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK.
|
||||||
|
*/
|
||||||
|
public synchronized void initializeClientVersionFor2x(final ClientVersion fromClientVersion) {
|
||||||
|
log.info("Initializing KCL components for rollback to 2x from {}", fromClientVersion);
|
||||||
|
|
||||||
|
if (fromClientVersion != ClientVersion.CLIENT_VERSION_INIT) {
|
||||||
|
// dynamic rollback
|
||||||
|
stopWorkerMetricsReporter();
|
||||||
|
// Migration Tool will delete the lease table LeaseOwner GSI
|
||||||
|
// and WorkerMetricStats table
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fromClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) {
|
||||||
|
// we are rolling back after flip
|
||||||
|
currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
|
||||||
|
notifyLeaseAssignmentModeChange();
|
||||||
|
log.info("Stopping LAM");
|
||||||
|
leaseAssignmentManager.stop();
|
||||||
|
final LeaderDecider leaderDecider = deterministicLeaderDeciderCreator.get();
|
||||||
|
if (this.leaderDecider instanceof MigrationAdaptiveLeaderDecider) {
|
||||||
|
log.info(
|
||||||
|
"Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
|
||||||
|
((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException(String.format("Unexpected leader decider %s", this.leaderDecider));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize KCL with components and configuration to run vanilla 3x functionality
|
||||||
|
* while allowing roll-back to 2x functionality. This can happen at KCL Worker startup
|
||||||
|
* when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK (after the flip)
|
||||||
|
* Or Dynamically during flip from CLIENT_VERSION_UPGRADE_FROM_2X.
|
||||||
|
*/
|
||||||
|
public synchronized void initializeClientVersionFor3xWithRollback(final ClientVersion fromClientVersion)
|
||||||
|
throws DependencyException {
|
||||||
|
log.info("Initializing KCL components for 3x with rollback from {}", fromClientVersion);
|
||||||
|
|
||||||
|
if (fromClientVersion == ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X) {
|
||||||
|
// dynamic flip
|
||||||
|
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||||
|
notifyLeaseAssignmentModeChange();
|
||||||
|
final LeaderDecider leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
|
||||||
|
log.info("Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
|
||||||
|
((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
|
||||||
|
} else {
|
||||||
|
startWorkerMetricsReporting();
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Starting LAM");
|
||||||
|
leaseAssignmentManager.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronously invoke the consumer to change the lease assignment mode.
|
||||||
|
*/
|
||||||
|
private void notifyLeaseAssignmentModeChange() {
|
||||||
|
if (dualMode) {
|
||||||
|
log.info("Notifying {} of {}", leaseModeChangeConsumer, currentAssignmentMode);
|
||||||
|
if (Objects.nonNull(leaseModeChangeConsumer)) {
|
||||||
|
try {
|
||||||
|
leaseModeChangeConsumer.updateLeaseAssignmentMode(currentAssignmentMode);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.warn("LeaseAssignmentMode change consumer threw exception", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("Unexpected assignment mode change");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,144 @@
|
||||||
|
package software.amazon.kinesis.coordinator;
|
||||||
|
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.CompletionException;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BatchGetItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BatchGetItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DeleteTableResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.PutItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.QueryRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.QueryResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.paginators.BatchGetItemIterable;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.paginators.QueryIterable;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.paginators.ScanIterable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DDB Lock client depends on DynamoDbClient and KCL only has DynamoDbAsyncClient configured.
|
||||||
|
* This wrapper delegates APIs from sync client to async client internally so that it can
|
||||||
|
* be used with the DDB Lock client.
|
||||||
|
*/
|
||||||
|
public class DynamoDbAsyncToSyncClientAdapter implements DynamoDbClient {
|
||||||
|
private final DynamoDbAsyncClient asyncClient;
|
||||||
|
|
||||||
|
public DynamoDbAsyncToSyncClientAdapter(final DynamoDbAsyncClient asyncClient) {
|
||||||
|
this.asyncClient = asyncClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String serviceName() {
|
||||||
|
return asyncClient.serviceName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
asyncClient.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T handleException(final Supplier<CompletableFuture<T>> task) {
|
||||||
|
try {
|
||||||
|
return task.get().join();
|
||||||
|
} catch (final CompletionException e) {
|
||||||
|
rethrow(e.getCause());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CreateTableResponse createTable(final CreateTableRequest request) {
|
||||||
|
return handleException(() -> asyncClient.createTable(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DescribeTableResponse describeTable(final DescribeTableRequest request) {
|
||||||
|
return handleException(() -> asyncClient.describeTable(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DeleteTableResponse deleteTable(final DeleteTableRequest request) {
|
||||||
|
return handleException(() -> asyncClient.deleteTable(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DeleteItemResponse deleteItem(final DeleteItemRequest request) {
|
||||||
|
return handleException(() -> asyncClient.deleteItem(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GetItemResponse getItem(final GetItemRequest request) {
|
||||||
|
return handleException(() -> asyncClient.getItem(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PutItemResponse putItem(final PutItemRequest request) {
|
||||||
|
return handleException(() -> asyncClient.putItem(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public UpdateItemResponse updateItem(final UpdateItemRequest request) {
|
||||||
|
return handleException(() -> asyncClient.updateItem(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueryResponse query(final QueryRequest request) {
|
||||||
|
return handleException(() -> asyncClient.query(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScanResponse scan(final ScanRequest request) {
|
||||||
|
return handleException(() -> asyncClient.scan(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueryIterable queryPaginator(final QueryRequest request) {
|
||||||
|
return new QueryIterable(this, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ScanIterable scanPaginator(final ScanRequest request) {
|
||||||
|
return new ScanIterable(this, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BatchGetItemResponse batchGetItem(final BatchGetItemRequest request) {
|
||||||
|
return handleException(() -> asyncClient.batchGetItem(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BatchWriteItemResponse batchWriteItem(final BatchWriteItemRequest request) {
|
||||||
|
return handleException(() -> asyncClient.batchWriteItem(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BatchGetItemIterable batchGetItemPaginator(final BatchGetItemRequest request) {
|
||||||
|
return new BatchGetItemIterable(this, request);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void rethrow(final Throwable e) {
|
||||||
|
castAndThrow(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private static <T extends Throwable> void castAndThrow(final Throwable e) throws T {
|
||||||
|
throw (T) e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -21,6 +21,8 @@ package software.amazon.kinesis.coordinator;
|
||||||
* worker is one of the leaders designated to execute shard-sync and then acts accordingly.
|
* worker is one of the leaders designated to execute shard-sync and then acts accordingly.
|
||||||
*/
|
*/
|
||||||
public interface LeaderDecider {
|
public interface LeaderDecider {
|
||||||
|
String METRIC_OPERATION_LEADER_DECIDER = "LeaderDecider";
|
||||||
|
String METRIC_OPERATION_LEADER_DECIDER_IS_LEADER = METRIC_OPERATION_LEADER_DECIDER + ":IsLeader";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method invoked to check the given workerId corresponds to one of the workers
|
* Method invoked to check the given workerId corresponds to one of the workers
|
||||||
|
|
@ -36,4 +38,32 @@ public interface LeaderDecider {
|
||||||
* being used in the LeaderDecider implementation.
|
* being used in the LeaderDecider implementation.
|
||||||
*/
|
*/
|
||||||
void shutdown();
|
void shutdown();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs initialization tasks for decider if any.
|
||||||
|
*/
|
||||||
|
default void initialize() {
|
||||||
|
// No-op by default
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns if any ACTIVE leader exists that is elected by the current implementation.
|
||||||
|
* Note: Some implementation (like DeterministicShuffleShardSyncLeaderDecider) will always have a leader and will
|
||||||
|
* return true always.
|
||||||
|
*/
|
||||||
|
default boolean isAnyLeaderElected() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the current worker is the leader, then releases the leadership else does nothing.
|
||||||
|
* This might not be relevant for some implementations, for e.g. DeterministicShuffleShardSyncLeaderDecider does
|
||||||
|
* not have mechanism to release leadership.
|
||||||
|
*
|
||||||
|
* Current worker if leader releases leadership, it's possible that the current worker assume leadership sometime
|
||||||
|
* later again in future elections.
|
||||||
|
*/
|
||||||
|
default void releaseLeadershipIfHeld() {
|
||||||
|
// No-op by default
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,126 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator;
|
||||||
|
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides the lease assignment mode KCL must operate in during migration
|
||||||
|
* from 2.x to 3.x.
|
||||||
|
* KCL v2.x lease assignment is based on distributed-worker-stealing algorithm
|
||||||
|
* which balances lease count across workers.
|
||||||
|
* KCL v3.x lease assignment is based on a centralized-lease-assignment algorithm
|
||||||
|
* which balances resource utilization metrics(e.g. CPU utilization) across workers.
|
||||||
|
*
|
||||||
|
* For a new application starting in KCL v3.x, there is no migration needed,
|
||||||
|
* so KCL will initialize with the lease assignment mode accordingly, and it will
|
||||||
|
* not change dynamically.
|
||||||
|
*
|
||||||
|
* During upgrade from 2.x to 3.x, KCL library needs an ability to
|
||||||
|
* start in v2.x assignment mode but dynamically change to v3.x assignment.
|
||||||
|
* In this case, both 2.x and 3.x lease assignment will be running but one
|
||||||
|
* of them will be a no-op based on the mode.
|
||||||
|
*
|
||||||
|
* The methods and internal state is guarded for concurrent access to allow
|
||||||
|
* both lease assignment algorithms to access the state concurrently while
|
||||||
|
* it could be dynamically updated.
|
||||||
|
*/
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
@NoArgsConstructor
|
||||||
|
public final class MigrationAdaptiveLeaseAssignmentModeProvider {
|
||||||
|
|
||||||
|
public enum LeaseAssignmentMode {
|
||||||
|
/**
|
||||||
|
* This is the 2.x assignment mode.
|
||||||
|
* This mode assigns leases based on the number of leases.
|
||||||
|
* This mode involves each worker independently determining how many leases to pick or how many leases to steal
|
||||||
|
* from other workers.
|
||||||
|
*/
|
||||||
|
DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the 3.x assigment mode.
|
||||||
|
* This mode uses each worker's resource utilization to perform lease assignment.
|
||||||
|
* Assignment is done by a single worker (elected leader), which looks at WorkerMetricStats for each worker to
|
||||||
|
* determine lease assignment.
|
||||||
|
*
|
||||||
|
* This mode primarily does
|
||||||
|
* 1. Starts WorkerMetricStatsManager on the worker which starts publishing WorkerMetricStats
|
||||||
|
* 2. Starts the LeaseDiscoverer
|
||||||
|
* 3. Creates if not already available the LeaseOwnerToLeaseKey GSI on the lease table and validate that is
|
||||||
|
* ACTIVE.
|
||||||
|
*/
|
||||||
|
WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
private LeaseAssignmentMode currentMode;
|
||||||
|
private boolean initialized = false;
|
||||||
|
private boolean dynamicModeChangeSupportNeeded;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specify whether both lease assignment algorithms should be initialized to
|
||||||
|
* support dynamically changing lease mode.
|
||||||
|
* @return true if lease assignment mode can change dynamically
|
||||||
|
* false otherwise.
|
||||||
|
*/
|
||||||
|
public synchronized boolean dynamicModeChangeSupportNeeded() {
|
||||||
|
return dynamicModeChangeSupportNeeded;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provide the current lease assignment mode in which KCL should perform lease assignment
|
||||||
|
* @return the current lease assignment mode
|
||||||
|
*/
|
||||||
|
public synchronized LeaseAssignmentMode getLeaseAssignmentMode() {
|
||||||
|
if (!initialized) {
|
||||||
|
throw new IllegalStateException("AssignmentMode is not initialized");
|
||||||
|
}
|
||||||
|
return currentMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized void initialize(final boolean dynamicModeChangeSupportNeeded, final LeaseAssignmentMode mode) {
|
||||||
|
if (!initialized) {
|
||||||
|
log.info("Initializing dynamicModeChangeSupportNeeded {} mode {}", dynamicModeChangeSupportNeeded, mode);
|
||||||
|
this.dynamicModeChangeSupportNeeded = dynamicModeChangeSupportNeeded;
|
||||||
|
this.currentMode = mode;
|
||||||
|
this.initialized = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
log.info(
|
||||||
|
"Already initialized dynamicModeChangeSupportNeeded {} mode {}. Ignoring new values {}, {}",
|
||||||
|
this.dynamicModeChangeSupportNeeded,
|
||||||
|
this.currentMode,
|
||||||
|
dynamicModeChangeSupportNeeded,
|
||||||
|
mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized void updateLeaseAssignmentMode(final LeaseAssignmentMode mode) {
|
||||||
|
if (!initialized) {
|
||||||
|
throw new IllegalStateException("Cannot change mode before initializing");
|
||||||
|
}
|
||||||
|
if (dynamicModeChangeSupportNeeded) {
|
||||||
|
log.info("Changing Lease assignment mode from {} to {}", currentMode, mode);
|
||||||
|
this.currentMode = mode;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw new IllegalStateException(String.format(
|
||||||
|
"Lease assignment mode already initialized to %s cannot" + " change to %s", this.currentMode, mode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -87,7 +87,7 @@ class PeriodicShardSyncManager {
|
||||||
private final Map<StreamIdentifier, HashRangeHoleTracker> hashRangeHoleTrackerMap = new HashMap<>();
|
private final Map<StreamIdentifier, HashRangeHoleTracker> hashRangeHoleTrackerMap = new HashMap<>();
|
||||||
|
|
||||||
private final String workerId;
|
private final String workerId;
|
||||||
private final LeaderDecider leaderDecider;
|
private LeaderDecider leaderDecider;
|
||||||
private final LeaseRefresher leaseRefresher;
|
private final LeaseRefresher leaseRefresher;
|
||||||
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap;
|
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap;
|
||||||
private final Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider;
|
private final Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider;
|
||||||
|
|
@ -105,7 +105,6 @@ class PeriodicShardSyncManager {
|
||||||
|
|
||||||
PeriodicShardSyncManager(
|
PeriodicShardSyncManager(
|
||||||
String workerId,
|
String workerId,
|
||||||
LeaderDecider leaderDecider,
|
|
||||||
LeaseRefresher leaseRefresher,
|
LeaseRefresher leaseRefresher,
|
||||||
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
|
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
|
||||||
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
|
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
|
||||||
|
|
@ -117,7 +116,6 @@ class PeriodicShardSyncManager {
|
||||||
AtomicBoolean leaderSynced) {
|
AtomicBoolean leaderSynced) {
|
||||||
this(
|
this(
|
||||||
workerId,
|
workerId,
|
||||||
leaderDecider,
|
|
||||||
leaseRefresher,
|
leaseRefresher,
|
||||||
currentStreamConfigMap,
|
currentStreamConfigMap,
|
||||||
shardSyncTaskManagerProvider,
|
shardSyncTaskManagerProvider,
|
||||||
|
|
@ -132,7 +130,6 @@ class PeriodicShardSyncManager {
|
||||||
|
|
||||||
PeriodicShardSyncManager(
|
PeriodicShardSyncManager(
|
||||||
String workerId,
|
String workerId,
|
||||||
LeaderDecider leaderDecider,
|
|
||||||
LeaseRefresher leaseRefresher,
|
LeaseRefresher leaseRefresher,
|
||||||
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
|
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
|
||||||
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
|
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
|
||||||
|
|
@ -144,9 +141,7 @@ class PeriodicShardSyncManager {
|
||||||
int leasesRecoveryAuditorInconsistencyConfidenceThreshold,
|
int leasesRecoveryAuditorInconsistencyConfidenceThreshold,
|
||||||
AtomicBoolean leaderSynced) {
|
AtomicBoolean leaderSynced) {
|
||||||
Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager.");
|
Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager.");
|
||||||
Validate.notNull(leaderDecider, "LeaderDecider is required to initialize PeriodicShardSyncManager.");
|
|
||||||
this.workerId = workerId;
|
this.workerId = workerId;
|
||||||
this.leaderDecider = leaderDecider;
|
|
||||||
this.leaseRefresher = leaseRefresher;
|
this.leaseRefresher = leaseRefresher;
|
||||||
this.currentStreamConfigMap = currentStreamConfigMap;
|
this.currentStreamConfigMap = currentStreamConfigMap;
|
||||||
this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider;
|
this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider;
|
||||||
|
|
@ -160,7 +155,9 @@ class PeriodicShardSyncManager {
|
||||||
this.leaderSynced = leaderSynced;
|
this.leaderSynced = leaderSynced;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized TaskResult start() {
|
public synchronized TaskResult start(final LeaderDecider leaderDecider) {
|
||||||
|
Validate.notNull(leaderDecider, "LeaderDecider is required to start PeriodicShardSyncManager.");
|
||||||
|
this.leaderDecider = leaderDecider;
|
||||||
if (!isRunning) {
|
if (!isRunning) {
|
||||||
final Runnable periodicShardSyncer = () -> {
|
final Runnable periodicShardSyncer = () -> {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
|
@ -44,6 +45,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Stopwatch;
|
import com.google.common.base.Stopwatch;
|
||||||
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
|
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
|
|
@ -55,15 +57,23 @@ import lombok.extern.slf4j.Slf4j;
|
||||||
import software.amazon.awssdk.arns.Arn;
|
import software.amazon.awssdk.arns.Arn;
|
||||||
import software.amazon.awssdk.regions.Region;
|
import software.amazon.awssdk.regions.Region;
|
||||||
import software.amazon.awssdk.utils.Validate;
|
import software.amazon.awssdk.utils.Validate;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
import software.amazon.kinesis.checkpoint.CheckpointConfig;
|
import software.amazon.kinesis.checkpoint.CheckpointConfig;
|
||||||
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
|
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
|
||||||
import software.amazon.kinesis.common.StreamConfig;
|
import software.amazon.kinesis.common.StreamConfig;
|
||||||
import software.amazon.kinesis.common.StreamIdentifier;
|
import software.amazon.kinesis.common.StreamIdentifier;
|
||||||
|
import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
|
||||||
|
import software.amazon.kinesis.coordinator.migration.MigrationStateMachine;
|
||||||
|
import software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl;
|
||||||
|
import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
|
||||||
|
import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
|
||||||
import software.amazon.kinesis.leases.HierarchicalShardSyncer;
|
import software.amazon.kinesis.leases.HierarchicalShardSyncer;
|
||||||
import software.amazon.kinesis.leases.Lease;
|
import software.amazon.kinesis.leases.Lease;
|
||||||
import software.amazon.kinesis.leases.LeaseCleanupManager;
|
import software.amazon.kinesis.leases.LeaseCleanupManager;
|
||||||
import software.amazon.kinesis.leases.LeaseCoordinator;
|
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementFactory;
|
||||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
import software.amazon.kinesis.leases.LeaseSerializer;
|
import software.amazon.kinesis.leases.LeaseSerializer;
|
||||||
import software.amazon.kinesis.leases.MultiStreamLease;
|
import software.amazon.kinesis.leases.MultiStreamLease;
|
||||||
|
|
@ -98,6 +108,9 @@ import software.amazon.kinesis.retrieval.AggregatorUtil;
|
||||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||||
import software.amazon.kinesis.retrieval.RetrievalConfig;
|
import software.amazon.kinesis.retrieval.RetrievalConfig;
|
||||||
import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder;
|
import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder;
|
||||||
|
import software.amazon.kinesis.worker.WorkerMetricsSelector;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
|
||||||
|
|
||||||
import static software.amazon.kinesis.common.ArnUtil.constructStreamArn;
|
import static software.amazon.kinesis.common.ArnUtil.constructStreamArn;
|
||||||
import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrategy.StreamsLeasesDeletionType;
|
import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrategy.StreamsLeasesDeletionType;
|
||||||
|
|
@ -106,12 +119,14 @@ import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrat
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@Getter
|
@Getter(AccessLevel.PRIVATE)
|
||||||
@Accessors(fluent = true)
|
@Accessors(fluent = true)
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@KinesisClientInternalApi
|
||||||
public class Scheduler implements Runnable {
|
public class Scheduler implements Runnable {
|
||||||
|
|
||||||
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
|
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
|
||||||
|
|
||||||
private static final long LEASE_TABLE_CHECK_FREQUENCY_MILLIS = 3 * 1000L;
|
private static final long LEASE_TABLE_CHECK_FREQUENCY_MILLIS = 3 * 1000L;
|
||||||
private static final long MIN_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 1000L;
|
private static final long MIN_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 1000L;
|
||||||
private static final long MAX_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 30 * 1000L;
|
private static final long MAX_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 30 * 1000L;
|
||||||
|
|
@ -133,7 +148,9 @@ public class Scheduler implements Runnable {
|
||||||
private final ProcessorConfig processorConfig;
|
private final ProcessorConfig processorConfig;
|
||||||
private final RetrievalConfig retrievalConfig;
|
private final RetrievalConfig retrievalConfig;
|
||||||
|
|
||||||
|
@Getter(AccessLevel.PACKAGE)
|
||||||
private final String applicationName;
|
private final String applicationName;
|
||||||
|
|
||||||
private final int maxInitializationAttempts;
|
private final int maxInitializationAttempts;
|
||||||
private final Checkpointer checkpoint;
|
private final Checkpointer checkpoint;
|
||||||
private final long shardConsumerDispatchPollIntervalMillis;
|
private final long shardConsumerDispatchPollIntervalMillis;
|
||||||
|
|
@ -156,7 +173,10 @@ public class Scheduler implements Runnable {
|
||||||
private final long failoverTimeMillis;
|
private final long failoverTimeMillis;
|
||||||
private final long taskBackoffTimeMillis;
|
private final long taskBackoffTimeMillis;
|
||||||
private final boolean isMultiStreamMode;
|
private final boolean isMultiStreamMode;
|
||||||
|
|
||||||
|
@Getter(AccessLevel.PACKAGE)
|
||||||
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap = new StreamConfigMap();
|
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap = new StreamConfigMap();
|
||||||
|
|
||||||
private final StreamTracker streamTracker;
|
private final StreamTracker streamTracker;
|
||||||
private final FormerStreamsLeasesDeletionStrategy formerStreamsLeasesDeletionStrategy;
|
private final FormerStreamsLeasesDeletionStrategy formerStreamsLeasesDeletionStrategy;
|
||||||
private final long listShardsBackoffTimeMillis;
|
private final long listShardsBackoffTimeMillis;
|
||||||
|
|
@ -167,19 +187,30 @@ public class Scheduler implements Runnable {
|
||||||
private final AggregatorUtil aggregatorUtil;
|
private final AggregatorUtil aggregatorUtil;
|
||||||
private final Function<StreamConfig, HierarchicalShardSyncer> hierarchicalShardSyncerProvider;
|
private final Function<StreamConfig, HierarchicalShardSyncer> hierarchicalShardSyncerProvider;
|
||||||
private final long schedulerInitializationBackoffTimeMillis;
|
private final long schedulerInitializationBackoffTimeMillis;
|
||||||
private final LeaderDecider leaderDecider;
|
private LeaderDecider leaderDecider;
|
||||||
|
|
||||||
|
@Getter(AccessLevel.PACKAGE)
|
||||||
private final Map<StreamIdentifier, Instant> staleStreamDeletionMap = new HashMap<>();
|
private final Map<StreamIdentifier, Instant> staleStreamDeletionMap = new HashMap<>();
|
||||||
|
|
||||||
private final LeaseCleanupManager leaseCleanupManager;
|
private final LeaseCleanupManager leaseCleanupManager;
|
||||||
private final SchemaRegistryDecoder schemaRegistryDecoder;
|
private final SchemaRegistryDecoder schemaRegistryDecoder;
|
||||||
|
|
||||||
|
@Getter(AccessLevel.PACKAGE)
|
||||||
private final DeletedStreamListProvider deletedStreamListProvider;
|
private final DeletedStreamListProvider deletedStreamListProvider;
|
||||||
|
|
||||||
|
private final MigrationStateMachine migrationStateMachine;
|
||||||
|
private final DynamicMigrationComponentsInitializer migrationComponentsInitializer;
|
||||||
|
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
|
||||||
|
|
||||||
// Holds consumers for shards the worker is currently tracking. Key is shard
|
// Holds consumers for shards the worker is currently tracking. Key is shard
|
||||||
// info, value is ShardConsumer.
|
// info, value is ShardConsumer.
|
||||||
|
@Getter(AccessLevel.PACKAGE)
|
||||||
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap = new ConcurrentHashMap<>();
|
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
private volatile boolean shutdown;
|
private volatile boolean shutdown;
|
||||||
private volatile long shutdownStartTimeMillis;
|
private volatile long shutdownStartTimeMillis;
|
||||||
|
|
||||||
|
@Getter(AccessLevel.PACKAGE)
|
||||||
private volatile boolean shutdownComplete = false;
|
private volatile boolean shutdownComplete = false;
|
||||||
|
|
||||||
private final Object lock = new Object();
|
private final Object lock = new Object();
|
||||||
|
|
@ -187,8 +218,6 @@ public class Scheduler implements Runnable {
|
||||||
private final Stopwatch streamSyncWatch = Stopwatch.createUnstarted();
|
private final Stopwatch streamSyncWatch = Stopwatch.createUnstarted();
|
||||||
|
|
||||||
private boolean leasesSyncedOnAppInit = false;
|
private boolean leasesSyncedOnAppInit = false;
|
||||||
|
|
||||||
@Getter(AccessLevel.NONE)
|
|
||||||
private final AtomicBoolean leaderSynced = new AtomicBoolean(false);
|
private final AtomicBoolean leaderSynced = new AtomicBoolean(false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -200,7 +229,6 @@ public class Scheduler implements Runnable {
|
||||||
* CountDownLatch used by the GracefulShutdownCoordinator. Reaching zero means that
|
* CountDownLatch used by the GracefulShutdownCoordinator. Reaching zero means that
|
||||||
* the scheduler's finalShutdown() call has completed.
|
* the scheduler's finalShutdown() call has completed.
|
||||||
*/
|
*/
|
||||||
@Getter(AccessLevel.NONE)
|
|
||||||
private final CountDownLatch finalShutdownLatch = new CountDownLatch(1);
|
private final CountDownLatch finalShutdownLatch = new CountDownLatch(1);
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
|
@ -259,11 +287,32 @@ public class Scheduler implements Runnable {
|
||||||
// Determine leaseSerializer based on availability of MultiStreamTracker.
|
// Determine leaseSerializer based on availability of MultiStreamTracker.
|
||||||
final LeaseSerializer leaseSerializer =
|
final LeaseSerializer leaseSerializer =
|
||||||
isMultiStreamMode ? new DynamoDBMultiStreamLeaseSerializer() : new DynamoDBLeaseSerializer();
|
isMultiStreamMode ? new DynamoDBMultiStreamLeaseSerializer() : new DynamoDBLeaseSerializer();
|
||||||
this.leaseCoordinator = this.leaseManagementConfig
|
|
||||||
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
|
final LeaseManagementFactory leaseManagementFactory =
|
||||||
.createLeaseCoordinator(this.metricsFactory);
|
this.leaseManagementConfig.leaseManagementFactory(leaseSerializer, isMultiStreamMode);
|
||||||
|
this.leaseCoordinator =
|
||||||
|
leaseManagementFactory.createLeaseCoordinator(this.metricsFactory, shardInfoShardConsumerMap);
|
||||||
this.leaseRefresher = this.leaseCoordinator.leaseRefresher();
|
this.leaseRefresher = this.leaseCoordinator.leaseRefresher();
|
||||||
|
|
||||||
|
final CoordinatorStateDAO coordinatorStateDAO = new CoordinatorStateDAO(
|
||||||
|
leaseManagementConfig.dynamoDBClient(), coordinatorConfig().coordinatorStateTableConfig());
|
||||||
|
this.leaseAssignmentModeProvider = new MigrationAdaptiveLeaseAssignmentModeProvider();
|
||||||
|
this.migrationComponentsInitializer = createDynamicMigrationComponentsInitializer(coordinatorStateDAO);
|
||||||
|
this.migrationStateMachine = new MigrationStateMachineImpl(
|
||||||
|
metricsFactory,
|
||||||
|
System::currentTimeMillis,
|
||||||
|
coordinatorStateDAO,
|
||||||
|
Executors.newScheduledThreadPool(
|
||||||
|
2,
|
||||||
|
new ThreadFactoryBuilder()
|
||||||
|
.setNameFormat("MigrationStateMachine-%04d")
|
||||||
|
.build()),
|
||||||
|
coordinatorConfig.clientVersionConfig(),
|
||||||
|
new Random(),
|
||||||
|
this.migrationComponentsInitializer,
|
||||||
|
leaseManagementConfig.workerIdentifier(),
|
||||||
|
Duration.ofMinutes(10).getSeconds());
|
||||||
|
|
||||||
//
|
//
|
||||||
// TODO: Figure out what to do with lease manage <=> checkpoint relationship
|
// TODO: Figure out what to do with lease manage <=> checkpoint relationship
|
||||||
//
|
//
|
||||||
|
|
@ -280,9 +329,8 @@ public class Scheduler implements Runnable {
|
||||||
this.diagnosticEventFactory = diagnosticEventFactory;
|
this.diagnosticEventFactory = diagnosticEventFactory;
|
||||||
this.diagnosticEventHandler = new DiagnosticEventLogger();
|
this.diagnosticEventHandler = new DiagnosticEventLogger();
|
||||||
this.deletedStreamListProvider = new DeletedStreamListProvider();
|
this.deletedStreamListProvider = new DeletedStreamListProvider();
|
||||||
this.shardSyncTaskManagerProvider = streamConfig -> this.leaseManagementConfig
|
this.shardSyncTaskManagerProvider = streamConfig -> leaseManagementFactory.createShardSyncTaskManager(
|
||||||
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
|
this.metricsFactory, streamConfig, this.deletedStreamListProvider);
|
||||||
.createShardSyncTaskManager(this.metricsFactory, streamConfig, this.deletedStreamListProvider);
|
|
||||||
this.shardPrioritization = this.coordinatorConfig.shardPrioritization();
|
this.shardPrioritization = this.coordinatorConfig.shardPrioritization();
|
||||||
this.cleanupLeasesUponShardCompletion = this.leaseManagementConfig.cleanupLeasesUponShardCompletion();
|
this.cleanupLeasesUponShardCompletion = this.leaseManagementConfig.cleanupLeasesUponShardCompletion();
|
||||||
this.skipShardSyncAtWorkerInitializationIfLeasesExist =
|
this.skipShardSyncAtWorkerInitializationIfLeasesExist =
|
||||||
|
|
@ -299,8 +347,6 @@ public class Scheduler implements Runnable {
|
||||||
this.workerStateChangeListener =
|
this.workerStateChangeListener =
|
||||||
this.coordinatorConfig.coordinatorFactory().createWorkerStateChangeListener();
|
this.coordinatorConfig.coordinatorFactory().createWorkerStateChangeListener();
|
||||||
}
|
}
|
||||||
this.leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(
|
|
||||||
leaseRefresher, Executors.newSingleThreadScheduledExecutor(), PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT);
|
|
||||||
this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis();
|
this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis();
|
||||||
this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis();
|
this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis();
|
||||||
this.listShardsBackoffTimeMillis = this.retrievalConfig.listShardsBackoffTimeInMillis();
|
this.listShardsBackoffTimeMillis = this.retrievalConfig.listShardsBackoffTimeInMillis();
|
||||||
|
|
@ -315,7 +361,6 @@ public class Scheduler implements Runnable {
|
||||||
this.coordinatorConfig.schedulerInitializationBackoffTimeMillis();
|
this.coordinatorConfig.schedulerInitializationBackoffTimeMillis();
|
||||||
this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager(
|
this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager(
|
||||||
leaseManagementConfig.workerIdentifier(),
|
leaseManagementConfig.workerIdentifier(),
|
||||||
leaderDecider,
|
|
||||||
leaseRefresher,
|
leaseRefresher,
|
||||||
currentStreamConfigMap,
|
currentStreamConfigMap,
|
||||||
shardSyncTaskManagerProvider,
|
shardSyncTaskManagerProvider,
|
||||||
|
|
@ -325,14 +370,69 @@ public class Scheduler implements Runnable {
|
||||||
leaseManagementConfig.leasesRecoveryAuditorExecutionFrequencyMillis(),
|
leaseManagementConfig.leasesRecoveryAuditorExecutionFrequencyMillis(),
|
||||||
leaseManagementConfig.leasesRecoveryAuditorInconsistencyConfidenceThreshold(),
|
leaseManagementConfig.leasesRecoveryAuditorInconsistencyConfidenceThreshold(),
|
||||||
leaderSynced);
|
leaderSynced);
|
||||||
this.leaseCleanupManager = this.leaseManagementConfig
|
this.leaseCleanupManager = leaseManagementFactory.createLeaseCleanupManager(metricsFactory);
|
||||||
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
|
|
||||||
.createLeaseCleanupManager(metricsFactory);
|
|
||||||
this.schemaRegistryDecoder = this.retrievalConfig.glueSchemaRegistryDeserializer() == null
|
this.schemaRegistryDecoder = this.retrievalConfig.glueSchemaRegistryDeserializer() == null
|
||||||
? null
|
? null
|
||||||
: new SchemaRegistryDecoder(this.retrievalConfig.glueSchemaRegistryDeserializer());
|
: new SchemaRegistryDecoder(this.retrievalConfig.glueSchemaRegistryDeserializer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Depends on LeaseCoordinator and LeaseRefresher to be created first
|
||||||
|
*/
|
||||||
|
private DynamicMigrationComponentsInitializer createDynamicMigrationComponentsInitializer(
|
||||||
|
final CoordinatorStateDAO coordinatorStateDAO) {
|
||||||
|
selectWorkerMetricsIfAvailable(leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
|
||||||
|
|
||||||
|
final WorkerMetricStatsManager workerMetricsManager = new WorkerMetricStatsManager(
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().noOfPersistedMetricsPerWorkerMetrics(),
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricList(),
|
||||||
|
metricsFactory,
|
||||||
|
leaseManagementConfig
|
||||||
|
.workerUtilizationAwareAssignmentConfig()
|
||||||
|
.inMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||||
|
|
||||||
|
final WorkerMetricStatsDAO workerMetricsDAO = new WorkerMetricStatsDAO(
|
||||||
|
leaseManagementConfig.dynamoDBClient(),
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig(),
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsReporterFreqInMillis());
|
||||||
|
|
||||||
|
return DynamicMigrationComponentsInitializer.builder()
|
||||||
|
.metricsFactory(metricsFactory)
|
||||||
|
.leaseRefresher(leaseRefresher)
|
||||||
|
.coordinatorStateDAO(coordinatorStateDAO)
|
||||||
|
.workerMetricsThreadPool(Executors.newScheduledThreadPool(
|
||||||
|
1,
|
||||||
|
new ThreadFactoryBuilder()
|
||||||
|
.setNameFormat("worker-metrics-reporter")
|
||||||
|
.build()))
|
||||||
|
.workerMetricsDAO(workerMetricsDAO)
|
||||||
|
.workerMetricsManager(workerMetricsManager)
|
||||||
|
.lamThreadPool(Executors.newScheduledThreadPool(
|
||||||
|
1,
|
||||||
|
new ThreadFactoryBuilder().setNameFormat("lam-thread").build()))
|
||||||
|
.lamCreator((lamThreadPool, leaderDecider) -> new LeaseAssignmentManager(
|
||||||
|
leaseRefresher,
|
||||||
|
workerMetricsDAO,
|
||||||
|
leaderDecider,
|
||||||
|
leaseManagementConfig.workerUtilizationAwareAssignmentConfig(),
|
||||||
|
leaseCoordinator.workerIdentifier(),
|
||||||
|
leaseManagementConfig.failoverTimeMillis(),
|
||||||
|
metricsFactory,
|
||||||
|
lamThreadPool,
|
||||||
|
System::nanoTime,
|
||||||
|
leaseManagementConfig.maxLeasesForWorker(),
|
||||||
|
leaseManagementConfig.gracefulLeaseHandoffConfig()))
|
||||||
|
.adaptiveLeaderDeciderCreator(() -> new MigrationAdaptiveLeaderDecider(metricsFactory))
|
||||||
|
.deterministicLeaderDeciderCreator(() -> new DeterministicShuffleShardSyncLeaderDecider(
|
||||||
|
leaseRefresher, Executors.newSingleThreadScheduledExecutor(), 1, metricsFactory))
|
||||||
|
.ddbLockBasedLeaderDeciderCreator(() -> DynamoDBLockBasedLeaderDecider.create(
|
||||||
|
coordinatorStateDAO, leaseCoordinator.workerIdentifier(), metricsFactory))
|
||||||
|
.workerIdentifier(leaseCoordinator.workerIdentifier())
|
||||||
|
.workerUtilizationAwareAssignmentConfig(leaseManagementConfig.workerUtilizationAwareAssignmentConfig())
|
||||||
|
.leaseAssignmentModeProvider(leaseAssignmentModeProvider)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start consuming data from the stream, and pass it to the application record processors.
|
* Start consuming data from the stream, and pass it to the application record processors.
|
||||||
*/
|
*/
|
||||||
|
|
@ -342,13 +442,19 @@ public class Scheduler implements Runnable {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final MetricsScope metricsScope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(metricsFactory, "Scheduler:Initialize");
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
initialize();
|
initialize();
|
||||||
|
success = true;
|
||||||
log.info("Initialization complete. Starting worker loop.");
|
log.info("Initialization complete. Starting worker loop.");
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e);
|
log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e);
|
||||||
workerStateChangeListener.onAllInitializationAttemptsFailed(e);
|
workerStateChangeListener.onAllInitializationAttemptsFailed(e);
|
||||||
shutdown();
|
shutdown();
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.addSuccess(metricsScope, "Initialize", success, MetricsLevel.SUMMARY);
|
||||||
}
|
}
|
||||||
while (!shouldShutdown()) {
|
while (!shouldShutdown()) {
|
||||||
runProcessLoop();
|
runProcessLoop();
|
||||||
|
|
@ -363,14 +469,13 @@ public class Scheduler implements Runnable {
|
||||||
synchronized (lock) {
|
synchronized (lock) {
|
||||||
registerErrorHandlerForUndeliverableAsyncTaskExceptions();
|
registerErrorHandlerForUndeliverableAsyncTaskExceptions();
|
||||||
workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.INITIALIZING);
|
workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.INITIALIZING);
|
||||||
|
|
||||||
boolean isDone = false;
|
boolean isDone = false;
|
||||||
Exception lastException = null;
|
Exception lastException = null;
|
||||||
|
|
||||||
for (int i = 0; (!isDone) && (i < maxInitializationAttempts); i++) {
|
for (int i = 0; (!isDone) && (i < maxInitializationAttempts); i++) {
|
||||||
try {
|
try {
|
||||||
log.info("Initializing LeaseCoordinator attempt {}", (i + 1));
|
log.info("Initializing LeaseCoordinator attempt {}", (i + 1));
|
||||||
leaseCoordinator.initialize();
|
leaseCoordinator.initialize();
|
||||||
|
|
||||||
if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) {
|
if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) {
|
||||||
if (shouldInitiateLeaseSync()) {
|
if (shouldInitiateLeaseSync()) {
|
||||||
log.info(
|
log.info(
|
||||||
|
|
@ -382,21 +487,29 @@ public class Scheduler implements Runnable {
|
||||||
log.info("Skipping shard sync per configuration setting (and lease table is not empty)");
|
log.info("Skipping shard sync per configuration setting (and lease table is not empty)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize the state machine after lease table has been initialized
|
||||||
|
// Migration state machine creates and waits for GSI if necessary,
|
||||||
|
// it must be initialized before starting leaseCoordinator, which runs LeaseDiscoverer
|
||||||
|
// and that requires GSI to be present and active. (migrationStateMachine.initialize is idempotent)
|
||||||
|
migrationStateMachine.initialize();
|
||||||
|
leaderDecider = migrationComponentsInitializer.leaderDecider();
|
||||||
|
|
||||||
leaseCleanupManager.start();
|
leaseCleanupManager.start();
|
||||||
|
|
||||||
// If we reach this point, then we either skipped the lease sync or did not have any exception
|
// If we reach this point, then we either skipped the lease sync or did not have any exception
|
||||||
// for any of the shard sync in the previous attempt.
|
// for any of the shard sync in the previous attempt.
|
||||||
|
|
||||||
if (!leaseCoordinator.isRunning()) {
|
if (!leaseCoordinator.isRunning()) {
|
||||||
log.info("Starting LeaseCoordinator");
|
log.info("Starting LeaseCoordinator");
|
||||||
leaseCoordinator.start();
|
leaseCoordinator.start(leaseAssignmentModeProvider);
|
||||||
} else {
|
} else {
|
||||||
log.info("LeaseCoordinator is already running. No need to start it.");
|
log.info("LeaseCoordinator is already running. No need to start it.");
|
||||||
}
|
}
|
||||||
log.info("Scheduling periodicShardSync");
|
log.info("Scheduling periodicShardSync");
|
||||||
leaderElectedPeriodicShardSyncManager.start();
|
leaderElectedPeriodicShardSyncManager.start(leaderDecider);
|
||||||
streamSyncWatch.start();
|
streamSyncWatch.start();
|
||||||
isDone = true;
|
isDone = true;
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
log.error("Caught exception when initializing LeaseCoordinator", e);
|
log.error("Caught exception when initializing LeaseCoordinator", e);
|
||||||
lastException = e;
|
lastException = e;
|
||||||
}
|
}
|
||||||
|
|
@ -863,7 +976,7 @@ public class Scheduler implements Runnable {
|
||||||
leaseCoordinator, lease, notificationCompleteLatch, shutdownCompleteLatch);
|
leaseCoordinator, lease, notificationCompleteLatch, shutdownCompleteLatch);
|
||||||
ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
|
ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
|
||||||
ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
|
ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
|
||||||
if (consumer != null) {
|
if (consumer != null && !consumer.isShutdown()) {
|
||||||
consumer.gracefulShutdown(shutdownNotification);
|
consumer.gracefulShutdown(shutdownNotification);
|
||||||
} else {
|
} else {
|
||||||
//
|
//
|
||||||
|
|
@ -912,6 +1025,8 @@ public class Scheduler implements Runnable {
|
||||||
shutdown = true;
|
shutdown = true;
|
||||||
shutdownStartTimeMillis = System.currentTimeMillis();
|
shutdownStartTimeMillis = System.currentTimeMillis();
|
||||||
|
|
||||||
|
migrationStateMachine.shutdown();
|
||||||
|
migrationComponentsInitializer.shutdown();
|
||||||
// Stop lease coordinator, so leases are not renewed or stolen from other workers.
|
// Stop lease coordinator, so leases are not renewed or stolen from other workers.
|
||||||
// Lost leases will force Worker to begin shutdown process for all shard consumers in
|
// Lost leases will force Worker to begin shutdown process for all shard consumers in
|
||||||
// Worker.run().
|
// Worker.run().
|
||||||
|
|
@ -1228,4 +1343,23 @@ public class Scheduler implements Runnable {
|
||||||
public Future<Void> requestShutdown() {
|
public Future<Void> requestShutdown() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If WorkerMetricStats list is empty and the disable flag is false, select WorkerMetricStats automatically.
|
||||||
|
*/
|
||||||
|
private void selectWorkerMetricsIfAvailable(
|
||||||
|
final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig) {
|
||||||
|
try {
|
||||||
|
if (workerUtilizationAwareAssignmentConfig.workerMetricList().isEmpty()
|
||||||
|
&& !workerUtilizationAwareAssignmentConfig.disableWorkerMetrics()) {
|
||||||
|
workerUtilizationAwareAssignmentConfig.workerMetricList(
|
||||||
|
WorkerMetricsSelector.create().getDefaultWorkerMetrics());
|
||||||
|
}
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.warn(
|
||||||
|
"Exception encountered during WorkerMetricStats selection. If this is persistent please try setting the "
|
||||||
|
+ "WorkerMetricStats explicitly.",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
package software.amazon.kinesis.coordinator.assignment;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
|
||||||
|
public interface LeaseAssignmentDecider {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assigns expiredOrUnAssignedLeases to the available workers.
|
||||||
|
*/
|
||||||
|
void assignExpiredOrUnassignedLeases(final List<Lease> expiredOrUnAssignedLeases);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Balances the leases between workers in the fleet.
|
||||||
|
* Implementation can choose to balance leases based on lease count or throughput or to bring the variance in
|
||||||
|
* resource utilization to a minimum.
|
||||||
|
* Check documentation on implementation class to see how it balances the leases.
|
||||||
|
*/
|
||||||
|
void balanceWorkerVariance();
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,719 @@
|
||||||
|
package software.amazon.kinesis.coordinator.assignment;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.CompletionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||||
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
import software.amazon.kinesis.metrics.NullMetricsScope;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs the LeaseAssignment for the application. This starts by loading the leases and workerMetrics from the
|
||||||
|
* storage and then starts by assignment (in-memory) of expired and/or unassigned leases after which it tries to perform
|
||||||
|
* balancing of load among the workers by re-assign leases.
|
||||||
|
* In the end, performs actual assignment by writing to storage.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
public final class LeaseAssignmentManager {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default number of continuous failure execution after which leadership is released.
|
||||||
|
*/
|
||||||
|
private static final int DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER = 3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default multiplier for LAM frequency with respect to leaseDurationMillis (lease failover millis).
|
||||||
|
* If leaseDurationMillis is 10000 millis, default LAM frequency is 20000 millis.
|
||||||
|
*/
|
||||||
|
private static final int DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default parallelism factor for scaling lease table.
|
||||||
|
*/
|
||||||
|
private static final int DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR = 10;
|
||||||
|
|
||||||
|
private static final String FORCE_LEADER_RELEASE_METRIC_NAME = "ForceLeaderRelease";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default retry attempt for loading leases and workers before giving up.
|
||||||
|
*/
|
||||||
|
private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal threadpool used to parallely perform assignment operation by calling storage.
|
||||||
|
*/
|
||||||
|
private static final ExecutorService LEASE_ASSIGNMENT_CALL_THREAD_POOL =
|
||||||
|
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
||||||
|
|
||||||
|
private static final String METRICS_LEASE_ASSIGNMENT_MANAGER = "LeaseAssignmentManager";
|
||||||
|
private static final String METRICS_INCOMPLETE_EXPIRED_LEASES_ASSIGNMENT =
|
||||||
|
"LeaseAssignmentManager.IncompleteExpiredLeasesAssignment";
|
||||||
|
public static final int DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD = 2;
|
||||||
|
|
||||||
|
private final LeaseRefresher leaseRefresher;
|
||||||
|
private final WorkerMetricStatsDAO workerMetricsDAO;
|
||||||
|
private final LeaderDecider leaderDecider;
|
||||||
|
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config;
|
||||||
|
private final String currentWorkerId;
|
||||||
|
private final Long leaseDurationMillis;
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
private final ScheduledExecutorService executorService;
|
||||||
|
private final Supplier<Long> nanoTimeProvider;
|
||||||
|
private final int maxLeasesForWorker;
|
||||||
|
private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig;
|
||||||
|
private boolean tookOverLeadershipInThisRun = false;
|
||||||
|
private final Map<String, Lease> prevRunLeasesState = new HashMap<>();
|
||||||
|
|
||||||
|
private Future<?> managerFuture;
|
||||||
|
|
||||||
|
private int noOfContinuousFailedAttempts = 0;
|
||||||
|
private int lamRunCounter = 0;
|
||||||
|
|
||||||
|
public synchronized void start() {
|
||||||
|
if (isNull(managerFuture)) {
|
||||||
|
// LAM can be dynamically started/stopped and restarted during MigrationStateMachine execution
|
||||||
|
// so reset the flag to refresh the state before processing during a restart of LAM.
|
||||||
|
tookOverLeadershipInThisRun = false;
|
||||||
|
managerFuture = executorService.scheduleWithFixedDelay(
|
||||||
|
this::performAssignment,
|
||||||
|
0L,
|
||||||
|
leaseDurationMillis * DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
log.info("Started LeaseAssignmentManager");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
log.info("LeaseAssignmentManager already running...");
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void stop() {
|
||||||
|
if (nonNull(managerFuture)) {
|
||||||
|
log.info("Completed shutdown of LeaseAssignmentManager");
|
||||||
|
managerFuture.cancel(true);
|
||||||
|
managerFuture = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
log.info("LeaseAssignmentManager is not running...");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates the MetricsScope for given {@param operation} by calling metricsFactory and falls back to
|
||||||
|
* NullMetricsScope if failed to create MetricsScope.
|
||||||
|
* @param operation Operation name for MetricsScope
|
||||||
|
* @return instance of MetricsScope
|
||||||
|
*/
|
||||||
|
private MetricsScope createMetricsScope(final String operation) {
|
||||||
|
try {
|
||||||
|
return MetricsUtil.createMetricsWithOperation(metricsFactory, operation);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.error("Failed to create metrics scope defaulting to no metrics.", e);
|
||||||
|
return new NullMetricsScope();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void performAssignment() {
|
||||||
|
|
||||||
|
final MetricsScope metricsScope = createMetricsScope(METRICS_LEASE_ASSIGNMENT_MANAGER);
|
||||||
|
final long startTime = System.currentTimeMillis();
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
// If the current worker is not leader, then do nothing as assignment is executed on leader.
|
||||||
|
if (!leaderDecider.isLeader(currentWorkerId)) {
|
||||||
|
log.info("Current worker {} is not a leader, ignore", currentWorkerId);
|
||||||
|
this.tookOverLeadershipInThisRun = false;
|
||||||
|
success = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.tookOverLeadershipInThisRun) {
|
||||||
|
// This means that there was leader change, perform cleanup of state as this is leader switch.
|
||||||
|
this.tookOverLeadershipInThisRun = true;
|
||||||
|
this.lamRunCounter = 0;
|
||||||
|
prepareAfterLeaderSwitch();
|
||||||
|
}
|
||||||
|
log.info("Current worker {} is a leader, performing assignment", currentWorkerId);
|
||||||
|
|
||||||
|
final InMemoryStorageView inMemoryStorageView = new InMemoryStorageView();
|
||||||
|
|
||||||
|
final long loadStartTime = System.currentTimeMillis();
|
||||||
|
inMemoryStorageView.loadInMemoryStorageView(metricsScope);
|
||||||
|
MetricsUtil.addLatency(metricsScope, "LeaseAndWorkerMetricsLoad", loadStartTime, MetricsLevel.DETAILED);
|
||||||
|
|
||||||
|
publishLeaseAndWorkerCountMetrics(metricsScope, inMemoryStorageView);
|
||||||
|
final LeaseAssignmentDecider leaseAssignmentDecider = new VarianceBasedLeaseAssignmentDecider(
|
||||||
|
inMemoryStorageView,
|
||||||
|
config.dampeningPercentage(),
|
||||||
|
config.reBalanceThresholdPercentage(),
|
||||||
|
config.allowThroughputOvershoot());
|
||||||
|
|
||||||
|
updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
|
||||||
|
inMemoryStorageView.getLeaseList(), inMemoryStorageView.getLeaseTableScanTime());
|
||||||
|
|
||||||
|
// This does not include the leases from the worker that has expired (based on WorkerMetricStats's
|
||||||
|
// lastUpdateTime)
|
||||||
|
// but the lease is not expired (based on the leaseCounter on lease).
|
||||||
|
// If a worker has died, the lease will be expired and assigned in next iteration.
|
||||||
|
final List<Lease> expiredOrUnAssignedLeases = inMemoryStorageView.getLeaseList().stream()
|
||||||
|
.filter(lease -> lease.isExpired(
|
||||||
|
TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis),
|
||||||
|
inMemoryStorageView.getLeaseTableScanTime()))
|
||||||
|
// marking them for direct reassignment.
|
||||||
|
.map(l -> l.isExpiredOrUnassigned(true))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
log.info("Total expiredOrUnassignedLeases count : {}", expiredOrUnAssignedLeases.size());
|
||||||
|
metricsScope.addData(
|
||||||
|
"ExpiredLeases", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
|
||||||
|
final long expiredAndUnassignedLeaseAssignmentStartTime = System.currentTimeMillis();
|
||||||
|
leaseAssignmentDecider.assignExpiredOrUnassignedLeases(expiredOrUnAssignedLeases);
|
||||||
|
MetricsUtil.addLatency(
|
||||||
|
metricsScope,
|
||||||
|
"AssignExpiredOrUnassignedLeases",
|
||||||
|
expiredAndUnassignedLeaseAssignmentStartTime,
|
||||||
|
MetricsLevel.DETAILED);
|
||||||
|
|
||||||
|
if (!expiredOrUnAssignedLeases.isEmpty()) {
|
||||||
|
// When expiredOrUnAssignedLeases is not empty, that means
|
||||||
|
// that we were not able to assign all expired or unassigned leases and hit the maxThroughput
|
||||||
|
// per worker for all workers.
|
||||||
|
log.warn("Not able to assign all expiredOrUnAssignedLeases");
|
||||||
|
metricsScope.addData(
|
||||||
|
"LeaseSpillover", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shouldRunVarianceBalancing()) {
|
||||||
|
final long balanceWorkerVarianceStartTime = System.currentTimeMillis();
|
||||||
|
final int totalNewAssignmentBeforeWorkerVarianceBalancing =
|
||||||
|
inMemoryStorageView.leaseToNewAssignedWorkerMap.size();
|
||||||
|
leaseAssignmentDecider.balanceWorkerVariance();
|
||||||
|
MetricsUtil.addLatency(
|
||||||
|
metricsScope, "BalanceWorkerVariance", balanceWorkerVarianceStartTime, MetricsLevel.DETAILED);
|
||||||
|
metricsScope.addData(
|
||||||
|
"NumOfLeasesReassignment",
|
||||||
|
inMemoryStorageView.leaseToNewAssignedWorkerMap.size()
|
||||||
|
- totalNewAssignmentBeforeWorkerVarianceBalancing,
|
||||||
|
StandardUnit.COUNT,
|
||||||
|
MetricsLevel.SUMMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inMemoryStorageView.leaseToNewAssignedWorkerMap.isEmpty()) {
|
||||||
|
log.info("No new lease assignment performed in this iteration");
|
||||||
|
}
|
||||||
|
|
||||||
|
parallelyAssignLeases(inMemoryStorageView, metricsScope);
|
||||||
|
printPerWorkerLeases(inMemoryStorageView);
|
||||||
|
deleteStaleWorkerMetricsEntries(inMemoryStorageView, metricsScope);
|
||||||
|
success = true;
|
||||||
|
noOfContinuousFailedAttempts = 0;
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.error("LeaseAssignmentManager failed to perform lease assignment.", e);
|
||||||
|
noOfContinuousFailedAttempts++;
|
||||||
|
if (noOfContinuousFailedAttempts >= DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER) {
|
||||||
|
log.error(
|
||||||
|
"Failed to perform assignment {} times in a row, releasing leadership from worker : {}",
|
||||||
|
DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER,
|
||||||
|
currentWorkerId);
|
||||||
|
MetricsUtil.addCount(metricsScope, FORCE_LEADER_RELEASE_METRIC_NAME, 1, MetricsLevel.SUMMARY);
|
||||||
|
leaderDecider.releaseLeadershipIfHeld();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
|
||||||
|
MetricsUtil.endScope(metricsScope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean shouldRunVarianceBalancing() {
|
||||||
|
final boolean response = this.lamRunCounter == 0;
|
||||||
|
/*
|
||||||
|
To avoid lamRunCounter grow large, keep it within [0,varianceBalancingFrequency).
|
||||||
|
If varianceBalancingFrequency is 5 lamRunCounter value will be within 0 to 4 and method return true when
|
||||||
|
lamRunCounter is 0.
|
||||||
|
*/
|
||||||
|
this.lamRunCounter = (this.lamRunCounter + 1) % config.varianceBalancingFrequency();
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes the WorkerMetricStats entries which are stale(not updated since long time, ref
|
||||||
|
* {@link LeaseAssignmentManager#isWorkerMetricsEntryStale} for the condition to evaluate staleness)
|
||||||
|
*/
|
||||||
|
private void deleteStaleWorkerMetricsEntries(
|
||||||
|
final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
|
||||||
|
final long startTime = System.currentTimeMillis();
|
||||||
|
try {
|
||||||
|
final List<WorkerMetricStats> staleWorkerMetricsList = inMemoryStorageView.getWorkerMetricsList().stream()
|
||||||
|
.filter(this::isWorkerMetricsEntryStale)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
MetricsUtil.addCount(
|
||||||
|
metricsScope, "TotalStaleWorkerMetricsEntry", staleWorkerMetricsList.size(), MetricsLevel.DETAILED);
|
||||||
|
log.info("Number of stale workerMetrics entries : {}", staleWorkerMetricsList.size());
|
||||||
|
log.info("Stale workerMetrics list : {}", staleWorkerMetricsList);
|
||||||
|
|
||||||
|
final List<CompletableFuture<Boolean>> completableFutures = staleWorkerMetricsList.stream()
|
||||||
|
.map(workerMetrics -> CompletableFuture.supplyAsync(
|
||||||
|
() -> workerMetricsDAO.deleteMetrics(workerMetrics), LEASE_ASSIGNMENT_CALL_THREAD_POOL))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
CompletableFuture.allOf(completableFutures.toArray(new CompletableFuture[0]))
|
||||||
|
.join();
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.addLatency(metricsScope, "StaleWorkerMetricsCleanup", startTime, MetricsLevel.DETAILED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WorkerMetricStats entry is considered stale if the lastUpdateTime of the workerMetrics is older than
|
||||||
|
* workerMetricsStalenessThreshold * workerMetricsReporterFreqInMillis.
|
||||||
|
*/
|
||||||
|
private boolean isWorkerMetricsEntryStale(final WorkerMetricStats workerMetrics) {
|
||||||
|
return Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now())
|
||||||
|
.toMillis()
|
||||||
|
> config.staleWorkerMetricsEntryCleanupDuration().toMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printPerWorkerLeases(final InMemoryStorageView storageView) {
|
||||||
|
storageView.getActiveWorkerIdSet().forEach(activeWorkerId -> {
|
||||||
|
log.info(
|
||||||
|
"Worker : {} and total leases : {} and totalThroughput : {}",
|
||||||
|
activeWorkerId,
|
||||||
|
Optional.ofNullable(storageView.getWorkerToLeasesMap().get(activeWorkerId))
|
||||||
|
.orElse(Collections.EMPTY_SET)
|
||||||
|
.size(),
|
||||||
|
storageView.getWorkerToTotalAssignedThroughputMap().get(activeWorkerId));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parallelyAssignLeases(final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
|
||||||
|
final AtomicInteger failedAssignmentCounter = new AtomicInteger(0);
|
||||||
|
final long startTime = System.currentTimeMillis();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CompletableFuture.allOf(inMemoryStorageView.getLeaseToNewAssignedWorkerMap().entrySet().stream()
|
||||||
|
// ignore leases that are heartbeating and pending graceful shutdown checkpoint.
|
||||||
|
.filter(entry -> !entry.getKey().blockedOnPendingCheckpoint(getNanoTimeMillis()))
|
||||||
|
.map(entry -> CompletableFuture.supplyAsync(
|
||||||
|
() -> {
|
||||||
|
try {
|
||||||
|
final Lease lease = entry.getKey();
|
||||||
|
if (gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled()
|
||||||
|
&& lease.isEligibleForGracefulShutdown()) {
|
||||||
|
return handleGracefulLeaseHandoff(
|
||||||
|
lease, entry.getValue(), failedAssignmentCounter);
|
||||||
|
} else {
|
||||||
|
return handleRegularLeaseAssignment(
|
||||||
|
lease, entry.getValue(), failedAssignmentCounter);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new CompletionException(e);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
LEASE_ASSIGNMENT_CALL_THREAD_POOL))
|
||||||
|
.toArray(CompletableFuture[]::new))
|
||||||
|
.join();
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.addCount(
|
||||||
|
metricsScope, "FailedAssignmentCount", failedAssignmentCounter.get(), MetricsLevel.DETAILED);
|
||||||
|
MetricsUtil.addSuccessAndLatency(
|
||||||
|
metricsScope, "ParallelyAssignLeases", success, startTime, MetricsLevel.DETAILED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean handleGracefulLeaseHandoff(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
|
||||||
|
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||||
|
final boolean response = leaseRefresher.initiateGracefulLeaseHandoff(lease, newOwner);
|
||||||
|
if (response) {
|
||||||
|
// new handoff assignment. add the timeout.
|
||||||
|
lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
|
||||||
|
} else {
|
||||||
|
failedAssignmentCounter.incrementAndGet();
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean handleRegularLeaseAssignment(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
|
||||||
|
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||||
|
final boolean response = leaseRefresher.assignLease(lease, newOwner);
|
||||||
|
if (response) {
|
||||||
|
// Successful assignment updates the leaseCounter, update the nanoTime for counter update.
|
||||||
|
lease.lastCounterIncrementNanos(nanoTimeProvider.get());
|
||||||
|
} else {
|
||||||
|
failedAssignmentCounter.incrementAndGet();
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void publishLeaseAndWorkerCountMetrics(
|
||||||
|
final MetricsScope metricsScope, final InMemoryStorageView inMemoryStorageView) {
|
||||||
|
// Names of the metrics are kept in sync with what is published in LeaseTaker.
|
||||||
|
metricsScope.addData(
|
||||||
|
"TotalLeases", inMemoryStorageView.leaseList.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
metricsScope.addData(
|
||||||
|
"NumWorkers", inMemoryStorageView.activeWorkerMetrics.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method updates all new leases with currentTime if the counter is updated since last run else keeps whatever
|
||||||
|
// was prev and update the prevRunLeasesState
|
||||||
|
private void updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
|
||||||
|
final List<Lease> leaseList, final Long scanTime) {
|
||||||
|
for (final Lease lease : leaseList) {
|
||||||
|
final Lease prevLease = prevRunLeasesState.get(lease.leaseKey());
|
||||||
|
|
||||||
|
// make sure lease shutdown timeouts are tracked.
|
||||||
|
if (lease.shutdownRequested()) {
|
||||||
|
// previous and current leases might have same next and checkpoint owners but there is no
|
||||||
|
// guarantee that the latest shutdown is the same shutdown in the previous lease for example
|
||||||
|
// some other leaders change the lease states while this worker waiting for it's LAM run.
|
||||||
|
// This is the best effort to prevent marking the incorrect timeout.
|
||||||
|
if (isNull(prevLease) || !prevLease.shutdownRequested() || !isSameOwners(lease, prevLease)) {
|
||||||
|
// Add new value if previous is null, previous lease is not shutdown pending or the owners
|
||||||
|
// don't match
|
||||||
|
lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
|
||||||
|
} else {
|
||||||
|
lease.checkpointOwnerTimeoutTimestampMillis(prevLease.checkpointOwnerTimeoutTimestampMillis());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isNull(prevLease)) {
|
||||||
|
lease.lastCounterIncrementNanos(
|
||||||
|
isNull(lease.actualOwner())
|
||||||
|
// This is an unassigned lease, mark as 0L that puts this in first in assignment order
|
||||||
|
? 0L
|
||||||
|
: scanTime);
|
||||||
|
} else {
|
||||||
|
lease.lastCounterIncrementNanos(
|
||||||
|
lease.leaseCounter() > prevLease.leaseCounter()
|
||||||
|
? scanTime
|
||||||
|
: prevLease.lastCounterIncrementNanos());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prevRunLeasesState.clear();
|
||||||
|
prevRunLeasesState.putAll(leaseList.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity())));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prepareAfterLeaderSwitch() {
|
||||||
|
prevRunLeasesState.clear();
|
||||||
|
noOfContinuousFailedAttempts = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In memory view of the leases and workerMetrics.
|
||||||
|
* This class supports queries (e.g., leases assigned to worker or total throughout assigned to worker).
|
||||||
|
*/
|
||||||
|
@Getter
|
||||||
|
class InMemoryStorageView {
|
||||||
|
|
||||||
|
// This is in-memory view of the workerToLeaseMapping, this is updated in-memory before actual
|
||||||
|
// changes to storage.
|
||||||
|
private final Map<String, Set<Lease>> workerToLeasesMap = new HashMap<>();
|
||||||
|
/**
|
||||||
|
* This is computed initially after the loading leases and then updated when the
|
||||||
|
* {@link InMemoryStorageView#performLeaseAssignment} is called.
|
||||||
|
*/
|
||||||
|
private final Map<String, Double> workerToTotalAssignedThroughputMap = new HashMap<>();
|
||||||
|
/**
|
||||||
|
* Captures the new assignment done during the lifecycle of single run.
|
||||||
|
*/
|
||||||
|
private final Map<Lease, String> leaseToNewAssignedWorkerMap = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of all leases in the application.
|
||||||
|
*/
|
||||||
|
private List<Lease> leaseList;
|
||||||
|
/**
|
||||||
|
* List of workers which are active (i.e., updated metric stats before the threshold ref)
|
||||||
|
* {@link this#computeWorkerExpiryThresholdInSecond})
|
||||||
|
*/
|
||||||
|
private List<WorkerMetricStats> activeWorkerMetrics;
|
||||||
|
/**
|
||||||
|
* List of all workerMetrics entries from storage.
|
||||||
|
*/
|
||||||
|
private List<WorkerMetricStats> workerMetricsList;
|
||||||
|
/**
|
||||||
|
* List of active workers ids.
|
||||||
|
*/
|
||||||
|
private Set<String> activeWorkerIdSet;
|
||||||
|
/**
|
||||||
|
* Wall time in nanoseconds when the lease table scan was completed.
|
||||||
|
*/
|
||||||
|
private long leaseTableScanTime = 0L;
|
||||||
|
/**
|
||||||
|
* Average throughput for all workers.
|
||||||
|
*/
|
||||||
|
private double targetAverageThroughput;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update {@ref inMemoryWorkerToLeasesMapping} with the change in ownership and update newLeaseAssignmentMap
|
||||||
|
*
|
||||||
|
* @param lease lease changing assignment
|
||||||
|
* @param newOwner new owner of the lease
|
||||||
|
*/
|
||||||
|
public void performLeaseAssignment(final Lease lease, final String newOwner) {
|
||||||
|
final String existingOwner = lease.actualOwner();
|
||||||
|
workerToLeasesMap.get(existingOwner).remove(lease);
|
||||||
|
workerToLeasesMap
|
||||||
|
.computeIfAbsent(newOwner, owner -> new HashSet<>())
|
||||||
|
.add(lease);
|
||||||
|
updateWorkerThroughput(newOwner, lease.throughputKBps());
|
||||||
|
// Remove the same lease throughput from oldOwner
|
||||||
|
updateWorkerThroughput(existingOwner, -lease.throughputKBps());
|
||||||
|
leaseToNewAssignedWorkerMap.put(lease, newOwner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scans the LeaseTable and WorkerMetricStats in parallel and load the data and populate datastructures used
|
||||||
|
* in lease assignment.
|
||||||
|
*/
|
||||||
|
public void loadInMemoryStorageView(final MetricsScope metricsScope) throws Exception {
|
||||||
|
final CompletableFuture<Map.Entry<List<Lease>, List<String>>> leaseListFuture = loadLeaseListAsync();
|
||||||
|
|
||||||
|
final CompletableFuture<List<WorkerMetricStats>> workerMetricsFuture = loadWorkerMetricStats();
|
||||||
|
|
||||||
|
final List<WorkerMetricStats> workerMetricsFromStorage = workerMetricsFuture.join();
|
||||||
|
|
||||||
|
final List<String> listOfWorkerIdOfInvalidWorkerMetricsEntry = workerMetricsFromStorage.stream()
|
||||||
|
.filter(workerMetrics -> !workerMetrics.isValidWorkerMetric())
|
||||||
|
.map(WorkerMetricStats::getWorkerId)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
log.warn("List of workerIds with invalid entries : {}", listOfWorkerIdOfInvalidWorkerMetricsEntry);
|
||||||
|
if (!listOfWorkerIdOfInvalidWorkerMetricsEntry.isEmpty()) {
|
||||||
|
metricsScope.addData(
|
||||||
|
"NumWorkersWithInvalidEntry",
|
||||||
|
listOfWorkerIdOfInvalidWorkerMetricsEntry.size(),
|
||||||
|
StandardUnit.COUNT,
|
||||||
|
MetricsLevel.SUMMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid entries are considered further, for validity of entry refer WorkerMetricStats#isValidWorkerMetrics
|
||||||
|
this.workerMetricsList = workerMetricsFromStorage.stream()
|
||||||
|
.filter(WorkerMetricStats::isValidWorkerMetric)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
log.info("Total WorkerMetricStats available : {}", workerMetricsList.size());
|
||||||
|
final long workerExpiryThreshold = computeWorkerExpiryThresholdInSecond();
|
||||||
|
|
||||||
|
final long countOfWorkersWithFailingWorkerMetric = workerMetricsList.stream()
|
||||||
|
.filter(WorkerMetricStats::isAnyWorkerMetricFailing)
|
||||||
|
.count();
|
||||||
|
if (countOfWorkersWithFailingWorkerMetric != 0) {
|
||||||
|
metricsScope.addData(
|
||||||
|
"NumWorkersWithFailingWorkerMetric",
|
||||||
|
countOfWorkersWithFailingWorkerMetric,
|
||||||
|
StandardUnit.COUNT,
|
||||||
|
MetricsLevel.SUMMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Map.Entry<List<Lease>, List<String>> leaseListResponse = leaseListFuture.join();
|
||||||
|
this.leaseList = leaseListResponse.getKey();
|
||||||
|
log.warn("Leases that failed deserialization : {}", leaseListResponse.getValue());
|
||||||
|
if (!leaseListResponse.getValue().isEmpty()) {
|
||||||
|
MetricsUtil.addCount(
|
||||||
|
metricsScope,
|
||||||
|
"LeaseDeserializationFailureCount",
|
||||||
|
leaseListResponse.getValue().size(),
|
||||||
|
MetricsLevel.SUMMARY);
|
||||||
|
}
|
||||||
|
this.leaseTableScanTime = nanoTimeProvider.get();
|
||||||
|
log.info("Total Leases available : {}", leaseList.size());
|
||||||
|
|
||||||
|
final double averageLeaseThroughput = leaseList.stream()
|
||||||
|
.filter(lease -> nonNull(lease.throughputKBps()))
|
||||||
|
.mapToDouble(Lease::throughputKBps)
|
||||||
|
.average()
|
||||||
|
// If none of the leases has any value, that means its app
|
||||||
|
// startup time and thus assigns 0 in that case to start with.
|
||||||
|
.orElse(0D);
|
||||||
|
/*
|
||||||
|
* If a workerMetrics has a metric (i.e. has -1 value in last index which denotes failure),
|
||||||
|
* skip it from activeWorkerMetrics and no new action on it will be done
|
||||||
|
* (new assignment etc.) until the metric has non -1 value in last index. This is to avoid performing action
|
||||||
|
* with the stale data on worker.
|
||||||
|
*/
|
||||||
|
this.activeWorkerMetrics = workerMetricsList.stream()
|
||||||
|
.filter(workerMetrics -> workerMetrics.getLastUpdateTime() >= workerExpiryThreshold
|
||||||
|
&& !workerMetrics.isAnyWorkerMetricFailing())
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
log.info("activeWorkerMetrics : {}", activeWorkerMetrics.size());
|
||||||
|
targetAverageThroughput =
|
||||||
|
averageLeaseThroughput * leaseList.size() / Math.max(1, activeWorkerMetrics.size());
|
||||||
|
leaseList.forEach(lease -> {
|
||||||
|
if (isNull(lease.throughputKBps())) {
|
||||||
|
// If the lease is unassigned, it will not have any throughput value, use average throughput
|
||||||
|
// as good enough value to start with.
|
||||||
|
lease.throughputKBps(averageLeaseThroughput);
|
||||||
|
}
|
||||||
|
workerToLeasesMap
|
||||||
|
.computeIfAbsent(lease.actualOwner(), workerId -> new HashSet<>())
|
||||||
|
.add(lease);
|
||||||
|
updateWorkerThroughput(lease.actualOwner(), lease.throughputKBps());
|
||||||
|
});
|
||||||
|
|
||||||
|
this.activeWorkerIdSet = new HashSet<>();
|
||||||
|
// Calculate initial ratio
|
||||||
|
this.activeWorkerMetrics.forEach(workerMetrics -> {
|
||||||
|
activeWorkerIdSet.add(workerMetrics.getWorkerId());
|
||||||
|
workerMetrics.setEmaAlpha(config.workerMetricsEMAAlpha());
|
||||||
|
if (workerMetrics.isUsingDefaultWorkerMetric()) {
|
||||||
|
setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
|
||||||
|
workerMetrics,
|
||||||
|
getTotalAssignedThroughput(workerMetrics.getWorkerId()) / targetAverageThroughput);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateWorkerThroughput(final String workerId, final double leaseThroughput) {
|
||||||
|
double value = workerToTotalAssignedThroughputMap.computeIfAbsent(workerId, worker -> (double) 0L);
|
||||||
|
workerToTotalAssignedThroughputMap.put(workerId, value + leaseThroughput);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
|
||||||
|
final WorkerMetricStats workerMetrics, final Double ratio) {
|
||||||
|
// for workers with default WorkerMetricStats, the operating range ceiling of 100 represents the
|
||||||
|
// target throughput. This way, with either heterogeneous or homogeneous fleets
|
||||||
|
// of explicit WorkerMetricStats and default WorkerMetricStats applications, load will be evenly
|
||||||
|
// distributed.
|
||||||
|
log.info(
|
||||||
|
"Worker [{}] is using default WorkerMetricStats, setting initial utilization ratio to [{}].",
|
||||||
|
workerMetrics.getWorkerId(),
|
||||||
|
ratio);
|
||||||
|
workerMetrics.setOperatingRange(ImmutableMap.of("T", ImmutableList.of(100L)));
|
||||||
|
workerMetrics.setMetricStats(ImmutableMap.of("T", ImmutableList.of(ratio * 100, ratio * 100)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the value threshold in seconds for a worker to be considered as active.
|
||||||
|
* If a worker has not updated the WorkerMetricStats entry within this threshold, the worker is not considered
|
||||||
|
* as active.
|
||||||
|
*
|
||||||
|
* @return wall time in seconds
|
||||||
|
*/
|
||||||
|
private long computeWorkerExpiryThresholdInSecond() {
|
||||||
|
final long timeInSeconds = Duration.ofMillis(System.currentTimeMillis()
|
||||||
|
- DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
|
||||||
|
* config.workerMetricsReporterFreqInMillis())
|
||||||
|
.getSeconds();
|
||||||
|
log.info("WorkerMetricStats expiry time in seconds : {}", timeInSeconds);
|
||||||
|
return timeInSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks at inMemoryWorkerToLeasesMapping for lease assignment and figures out if there is room considering
|
||||||
|
* any new assignment that would have happened.
|
||||||
|
*/
|
||||||
|
public boolean isWorkerTotalThroughputLessThanMaxThroughput(final String workerId) {
|
||||||
|
return getTotalAssignedThroughput(workerId) <= config.maxThroughputPerHostKBps();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks at inMemoryWorkerToLeasesMapping for lease assignment of a worker and returns true if the worker has
|
||||||
|
* no leases assigned or less than maxNumberOfLeasesPerHost else false.
|
||||||
|
*/
|
||||||
|
public boolean isWorkerAssignedLeasesLessThanMaxLeases(final String workerId) {
|
||||||
|
final Set<Lease> assignedLeases = workerToLeasesMap.get(workerId);
|
||||||
|
if (CollectionUtils.isEmpty(assignedLeases)) {
|
||||||
|
// There are no leases assigned to the worker, that means its less than maxNumberOfLeasesPerHost.
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return assignedLeases.size() < maxLeasesForWorker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Double getTotalAssignedThroughput(final String workerId) {
|
||||||
|
return workerToTotalAssignedThroughputMap.getOrDefault(workerId, 0D);
|
||||||
|
}
|
||||||
|
|
||||||
|
private CompletableFuture<List<WorkerMetricStats>> loadWorkerMetricStats() {
|
||||||
|
return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricsDAO::getAllWorkerMetricStats));
|
||||||
|
}
|
||||||
|
|
||||||
|
private CompletableFuture<Map.Entry<List<Lease>, List<String>>> loadLeaseListAsync() {
|
||||||
|
return CompletableFuture.supplyAsync(() -> loadWithRetry(() -> leaseRefresher.listLeasesParallely(
|
||||||
|
LEASE_ASSIGNMENT_CALL_THREAD_POOL, DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T loadWithRetry(final Callable<T> loadFunction) {
|
||||||
|
int retryAttempt = 0;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
return loadFunction.call();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
|
||||||
|
log.warn(
|
||||||
|
"Failed to load : {}, retrying",
|
||||||
|
loadFunction.getClass().getName(),
|
||||||
|
e);
|
||||||
|
retryAttempt++;
|
||||||
|
} else {
|
||||||
|
throw new CompletionException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getCheckpointOwnerTimeoutTimestampMillis() {
|
||||||
|
// this is a future timestamp in millis that the graceful lease handoff shutdown can be considered
|
||||||
|
// expired. LeaseDurationMillis is used here to account for how long it might take for the
|
||||||
|
// lease owner to receive the shutdown signal before executing shutdown.
|
||||||
|
return getNanoTimeMillis()
|
||||||
|
+ gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis()
|
||||||
|
+ leaseDurationMillis;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getNanoTimeMillis() {
|
||||||
|
// this is not a wall clock time. But if we stick with using this time provider for calculating the elapsed
|
||||||
|
// time it should be okay to use in checkpoint expiration calculation.
|
||||||
|
return TimeUnit.NANOSECONDS.toMillis(nanoTimeProvider.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isSameOwners(Lease currentLease, Lease previousLease) {
|
||||||
|
return Objects.equals(currentLease.leaseOwner(), previousLease.leaseOwner())
|
||||||
|
&& Objects.equals(currentLease.checkpointOwner(), previousLease.checkpointOwner());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,348 @@
|
||||||
|
package software.amazon.kinesis.coordinator.assignment;
|
||||||
|
|
||||||
|
import java.util.AbstractMap.SimpleEntry;
|
||||||
|
import java.util.ArrayDeque;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* VarianceBasedLeaseAssignmentDecider
|
||||||
|
* This implementation of LeaseAssignmentDecider performs lease assignment by considering the WorkerMetricStats values of workers
|
||||||
|
* with respect to fleet level average of that WorkerMetricStats.
|
||||||
|
* Rebalanced leases are assigned to workers which has maximum capacity to in terms of throughput to reach fleet level
|
||||||
|
* across the WorkerMetricStats value. In case of multiple WorkerMetricStats, the capacity to reach fleet level average is determined by outlier
|
||||||
|
* WorkerMetricStats.
|
||||||
|
* To minimize the variance, the algorithm picks the fleet level average of the WorkerMetricStats for workers as a
|
||||||
|
* pivot point and uses it to determine workers to take leases from and then assign to other workers.
|
||||||
|
* The threshold for considering a worker for re-balance is configurable via
|
||||||
|
* {@code reBalanceThreshold}. During reassignments the {@code dampeningPercentageValue} is used to achieve
|
||||||
|
* critical dampening.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
public final class VarianceBasedLeaseAssignmentDecider implements LeaseAssignmentDecider {
|
||||||
|
private final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView;
|
||||||
|
private final int dampeningPercentageValue;
|
||||||
|
private final int reBalanceThreshold;
|
||||||
|
private final boolean allowThroughputOvershoot;
|
||||||
|
private final Map<String, Double> workerMetricsToFleetLevelAverageMap = new HashMap<>();
|
||||||
|
private final PriorityQueue<WorkerMetricStats> assignableWorkerSortedByAvailableCapacity;
|
||||||
|
private int targetLeasePerWorker;
|
||||||
|
|
||||||
|
public VarianceBasedLeaseAssignmentDecider(
|
||||||
|
final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView,
|
||||||
|
final int dampeningPercentageValue,
|
||||||
|
final int reBalanceThreshold,
|
||||||
|
final boolean allowThroughputOvershoot) {
|
||||||
|
this.inMemoryStorageView = inMemoryStorageView;
|
||||||
|
this.dampeningPercentageValue = dampeningPercentageValue;
|
||||||
|
this.reBalanceThreshold = reBalanceThreshold;
|
||||||
|
this.allowThroughputOvershoot = allowThroughputOvershoot;
|
||||||
|
initialize();
|
||||||
|
final Comparator<WorkerMetricStats> comparator = Comparator.comparingDouble(
|
||||||
|
workerMetrics -> workerMetrics.computePercentageToReachAverage(workerMetricsToFleetLevelAverageMap));
|
||||||
|
this.assignableWorkerSortedByAvailableCapacity = new PriorityQueue<>(comparator.reversed());
|
||||||
|
this.assignableWorkerSortedByAvailableCapacity.addAll(
|
||||||
|
getAvailableWorkersForAssignment(inMemoryStorageView.getActiveWorkerMetrics()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize() {
|
||||||
|
final Map<String, Double> workerMetricsNameToAverage = inMemoryStorageView.getActiveWorkerMetrics().stream()
|
||||||
|
.flatMap(workerMetrics -> workerMetrics.getMetricStats().keySet().stream()
|
||||||
|
.map(workerMetricsName ->
|
||||||
|
new SimpleEntry<>(workerMetricsName, workerMetrics.getMetricStat(workerMetricsName))))
|
||||||
|
.collect(Collectors.groupingBy(
|
||||||
|
SimpleEntry::getKey, HashMap::new, Collectors.averagingDouble(SimpleEntry::getValue)));
|
||||||
|
|
||||||
|
workerMetricsToFleetLevelAverageMap.putAll(workerMetricsNameToAverage);
|
||||||
|
|
||||||
|
final int totalWorkers =
|
||||||
|
Math.max(inMemoryStorageView.getActiveWorkerMetrics().size(), 1);
|
||||||
|
this.targetLeasePerWorker = Math.max(inMemoryStorageView.getLeaseList().size() / totalWorkers, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<WorkerMetricStats> getAvailableWorkersForAssignment(final List<WorkerMetricStats> workerMetricsList) {
|
||||||
|
// Workers with WorkerMetricStats running hot are also available for assignment as the goal is to balance
|
||||||
|
// utilization
|
||||||
|
// always (e.g., if all workers have hot WorkerMetricStats, balance the variance between them too)
|
||||||
|
return workerMetricsList.stream()
|
||||||
|
.filter(workerMetrics -> inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(
|
||||||
|
workerMetrics.getWorkerId())
|
||||||
|
&& inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void assignExpiredOrUnassignedLeases(final List<Lease> expiredOrUnAssignedLeases) {
|
||||||
|
// Sort the expiredOrUnAssignedLeases using lastCounterIncrementNanos such that leases expired first are
|
||||||
|
// picked first.
|
||||||
|
// Unassigned leases have lastCounterIncrementNanos as zero and thus assigned first.
|
||||||
|
Collections.sort(expiredOrUnAssignedLeases, Comparator.comparing(Lease::lastCounterIncrementNanos));
|
||||||
|
final Set<Lease> assignedLeases = new HashSet<>();
|
||||||
|
for (final Lease lease : expiredOrUnAssignedLeases) {
|
||||||
|
final WorkerMetricStats workerToAssignLease = assignableWorkerSortedByAvailableCapacity.poll();
|
||||||
|
if (nonNull(workerToAssignLease)) {
|
||||||
|
assignLease(lease, workerToAssignLease);
|
||||||
|
assignedLeases.add(lease);
|
||||||
|
} else {
|
||||||
|
log.info("No worker available to assign lease {}", lease.leaseKey());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
expiredOrUnAssignedLeases.removeAll(assignedLeases);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<WorkerMetricStats> getWorkersToTakeLeasesFromIfRequired(
|
||||||
|
final List<WorkerMetricStats> currentWorkerMetrics,
|
||||||
|
final String workerMetricsName,
|
||||||
|
final double workerMetricsValueAvg) {
|
||||||
|
final List<WorkerMetricStats> workerIdsAboveAverage = new ArrayList<>();
|
||||||
|
|
||||||
|
final double upperLimit = workerMetricsValueAvg * (1.0D + (double) reBalanceThreshold / 100);
|
||||||
|
final double lowerLimit = workerMetricsValueAvg * (1.0D - (double) reBalanceThreshold / 100);
|
||||||
|
|
||||||
|
WorkerMetricStats mostLoadedWorker = null;
|
||||||
|
|
||||||
|
log.info("Range for re-balance upper threshold {} and lower threshold {}", upperLimit, lowerLimit);
|
||||||
|
|
||||||
|
boolean shouldTriggerReBalance = false;
|
||||||
|
for (final WorkerMetricStats workerMetrics : currentWorkerMetrics) {
|
||||||
|
final double currentWorkerMetricsValue = workerMetrics.getMetricStat(workerMetricsName);
|
||||||
|
final boolean isCurrentWorkerMetricsAboveOperatingRange =
|
||||||
|
workerMetrics.isWorkerMetricAboveOperatingRange(workerMetricsName);
|
||||||
|
/*
|
||||||
|
If there is any worker, whose WorkerMetricStats value is between +/- reBalanceThreshold % of workerMetricsValueAvg or if
|
||||||
|
worker's WorkerMetricStats value is above operating range trigger re-balance
|
||||||
|
*/
|
||||||
|
if (currentWorkerMetricsValue > upperLimit
|
||||||
|
|| currentWorkerMetricsValue < lowerLimit
|
||||||
|
|| isCurrentWorkerMetricsAboveOperatingRange) {
|
||||||
|
shouldTriggerReBalance = true;
|
||||||
|
}
|
||||||
|
// Perform re-balance on the worker if its above upperLimit or if current WorkerMetricStats is above
|
||||||
|
// operating range.
|
||||||
|
if (currentWorkerMetricsValue >= upperLimit || isCurrentWorkerMetricsAboveOperatingRange) {
|
||||||
|
workerIdsAboveAverage.add(workerMetrics);
|
||||||
|
}
|
||||||
|
if (mostLoadedWorker == null
|
||||||
|
|| mostLoadedWorker.getMetricStat(workerMetricsName) < currentWorkerMetricsValue) {
|
||||||
|
mostLoadedWorker = workerMetrics;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
If workerIdsAboveAverage is empty that means there is no worker with WorkerMetricStats value above upperLimit so pick
|
||||||
|
the worker with higher CPU. This can happen when there is worker with WorkerMetricStats value below lowerLimit but
|
||||||
|
all other workers are within upperLimit.
|
||||||
|
*/
|
||||||
|
if (workerIdsAboveAverage.isEmpty()) {
|
||||||
|
workerIdsAboveAverage.add(mostLoadedWorker);
|
||||||
|
}
|
||||||
|
|
||||||
|
return shouldTriggerReBalance ? workerIdsAboveAverage : Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs the balancing of the throughput assigned to workers based on the WorkerMetricsValues of worker with respect
|
||||||
|
* to fleet level average.
|
||||||
|
* Each WorkerMetricStats is treated independently to determine workers for re-balance computed (computed based on
|
||||||
|
* reBalanceThreshold) are determined.
|
||||||
|
* The magnitude of throughput to take is determined by how much worker is away from the average of that WorkerMetricStats
|
||||||
|
* across fleet and in case of multiple WorkerMetricStats, the one with maximum magnitude of throughput is considered.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void balanceWorkerVariance() {
|
||||||
|
final List<WorkerMetricStats> activeWorkerMetrics = inMemoryStorageView.getActiveWorkerMetrics();
|
||||||
|
|
||||||
|
log.info("WorkerMetricStats to corresponding fleet level average : {}", workerMetricsToFleetLevelAverageMap);
|
||||||
|
log.info("Active WorkerMetricStats : {}", activeWorkerMetrics);
|
||||||
|
|
||||||
|
final Map<String, Double> workerIdToThroughputToTakeMap = new HashMap<>();
|
||||||
|
String largestOutlierWorkerMetricsName = "";
|
||||||
|
double maxThroughputTake = -1.0D;
|
||||||
|
|
||||||
|
for (final Map.Entry<String, Double> workerMetricsToFleetLevelAverageEntry :
|
||||||
|
workerMetricsToFleetLevelAverageMap.entrySet()) {
|
||||||
|
final String workerMetricsName = workerMetricsToFleetLevelAverageEntry.getKey();
|
||||||
|
|
||||||
|
// Filter workers that does not have current WorkerMetricStats. This is possible if application is adding a
|
||||||
|
// new WorkerMetricStats and currently in phase of deployment.
|
||||||
|
final List<WorkerMetricStats> currentWorkerMetrics = activeWorkerMetrics.stream()
|
||||||
|
.filter(workerMetrics -> workerMetrics.containsMetricStat(workerMetricsName))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
final double fleetAverageForWorkerMetrics = workerMetricsToFleetLevelAverageEntry.getValue();
|
||||||
|
|
||||||
|
final List<WorkerMetricStats> workerToTakeLeasesFrom = getWorkersToTakeLeasesFromIfRequired(
|
||||||
|
currentWorkerMetrics, workerMetricsName, fleetAverageForWorkerMetrics);
|
||||||
|
|
||||||
|
final Map<String, Double> workerIdToThroughputToTakeForCurrentWorkerMetrics = new HashMap<>();
|
||||||
|
double totalThroughputToTakeForCurrentWorkerMetrics = 0D;
|
||||||
|
for (final WorkerMetricStats workerToTakeLease : workerToTakeLeasesFrom) {
|
||||||
|
final double workerMetricsValueForWorker = workerToTakeLease.getMetricStat(workerMetricsName);
|
||||||
|
// Load to take based on the difference compared to the fleet level average
|
||||||
|
final double loadPercentageToTake =
|
||||||
|
(workerMetricsValueForWorker - fleetAverageForWorkerMetrics) / workerMetricsValueForWorker;
|
||||||
|
// Dampen the load based on dampeningPercentageValue
|
||||||
|
final double dampenedLoadPercentageToTake =
|
||||||
|
loadPercentageToTake * ((double) dampeningPercentageValue / 100);
|
||||||
|
final double throughputToTake =
|
||||||
|
inMemoryStorageView.getTotalAssignedThroughput(workerToTakeLease.getWorkerId())
|
||||||
|
* dampenedLoadPercentageToTake;
|
||||||
|
log.info(
|
||||||
|
"For worker : {} taking throughput : {} after dampening based on WorkerMetricStats : {}",
|
||||||
|
workerToTakeLease.getWorkerId(),
|
||||||
|
throughputToTake,
|
||||||
|
workerMetricsName);
|
||||||
|
totalThroughputToTakeForCurrentWorkerMetrics += throughputToTake;
|
||||||
|
workerIdToThroughputToTakeForCurrentWorkerMetrics.put(
|
||||||
|
workerToTakeLease.getWorkerId(), throughputToTake);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
If totalThroughputToTakeForCurrentWorkerMetrics is more than maxThroughputTake that means this WorkerMetricStats is more
|
||||||
|
outlier so consider this for reBalancing
|
||||||
|
*/
|
||||||
|
if (maxThroughputTake < totalThroughputToTakeForCurrentWorkerMetrics) {
|
||||||
|
largestOutlierWorkerMetricsName = workerMetricsName;
|
||||||
|
workerIdToThroughputToTakeMap.clear();
|
||||||
|
workerIdToThroughputToTakeMap.putAll(workerIdToThroughputToTakeForCurrentWorkerMetrics);
|
||||||
|
maxThroughputTake = totalThroughputToTakeForCurrentWorkerMetrics;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Largest outlier WorkerMetricStats is : {} and total of {} throughput will be rebalanced",
|
||||||
|
largestOutlierWorkerMetricsName,
|
||||||
|
maxThroughputTake);
|
||||||
|
log.info("Workers to throughput taken from them is : {}", workerIdToThroughputToTakeMap);
|
||||||
|
|
||||||
|
final List<Map.Entry<String, Double>> sortedWorkerIdToThroughputToTakeEntries =
|
||||||
|
new ArrayList<>(workerIdToThroughputToTakeMap.entrySet());
|
||||||
|
// sort entries by values.
|
||||||
|
Collections.sort(sortedWorkerIdToThroughputToTakeEntries, (e1, e2) -> e2.getValue()
|
||||||
|
.compareTo(e1.getValue()));
|
||||||
|
|
||||||
|
for (final Map.Entry<String, Double> workerIdToThroughputToTakeEntry :
|
||||||
|
sortedWorkerIdToThroughputToTakeEntries) {
|
||||||
|
final String workerId = workerIdToThroughputToTakeEntry.getKey();
|
||||||
|
|
||||||
|
final double throughputToTake = workerIdToThroughputToTakeEntry.getValue();
|
||||||
|
|
||||||
|
final Queue<Lease> leasesToTake = getLeasesToTake(workerId, throughputToTake);
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Leases taken from worker : {} are : {}",
|
||||||
|
workerId,
|
||||||
|
leasesToTake.stream().map(Lease::leaseKey).collect(Collectors.toSet()));
|
||||||
|
|
||||||
|
for (final Lease lease : leasesToTake) {
|
||||||
|
final WorkerMetricStats workerToAssign = assignableWorkerSortedByAvailableCapacity.poll();
|
||||||
|
if (nonNull(workerToAssign)
|
||||||
|
&& workerToAssign.willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange(
|
||||||
|
workerMetricsToFleetLevelAverageMap,
|
||||||
|
inMemoryStorageView.getTargetAverageThroughput(),
|
||||||
|
lease.throughputKBps(),
|
||||||
|
targetLeasePerWorker)) {
|
||||||
|
log.info("No worker to assign anymore in this iteration due to hitting average values");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (nonNull(workerToAssign)) {
|
||||||
|
assignLease(lease, workerToAssign);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printWorkerToUtilizationLog(inMemoryStorageView.getActiveWorkerMetrics());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Queue<Lease> getLeasesToTake(final String workerId, final double throughputToTake) {
|
||||||
|
final Set<Lease> existingLeases =
|
||||||
|
inMemoryStorageView.getWorkerToLeasesMap().get(workerId);
|
||||||
|
|
||||||
|
if (isNull(existingLeases) || existingLeases.isEmpty()) {
|
||||||
|
return new ArrayDeque<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inMemoryStorageView.getTotalAssignedThroughput(workerId) == 0D) {
|
||||||
|
// This is the case where throughput of this worker is zero and have 1 or more leases assigned.
|
||||||
|
// Its not possible to determine leases to take based on throughput so simply take 1 lease and move on.
|
||||||
|
return new ArrayDeque<>(new ArrayList<>(existingLeases).subList(0, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
return getLeasesCombiningToThroughput(workerId, throughputToTake);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assignLease(final Lease lease, final WorkerMetricStats workerMetrics) {
|
||||||
|
if (nonNull(lease.actualOwner()) && lease.actualOwner().equals(workerMetrics.getWorkerId())) {
|
||||||
|
// if a new owner and current owner are same then no assignment to do
|
||||||
|
// put back the worker as well as no assignment is done
|
||||||
|
assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
workerMetrics.extrapolateMetricStatValuesForAddedThroughput(
|
||||||
|
workerMetricsToFleetLevelAverageMap,
|
||||||
|
inMemoryStorageView.getTargetAverageThroughput(),
|
||||||
|
lease.throughputKBps(),
|
||||||
|
targetLeasePerWorker);
|
||||||
|
log.info("Assigning lease : {} to worker : {}", lease.leaseKey(), workerMetrics.getWorkerId());
|
||||||
|
inMemoryStorageView.performLeaseAssignment(lease, workerMetrics.getWorkerId());
|
||||||
|
if (inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(workerMetrics.getWorkerId())
|
||||||
|
&& inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId())) {
|
||||||
|
assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printWorkerToUtilizationLog(final List<WorkerMetricStats> activeWorkerMetrics) {
|
||||||
|
activeWorkerMetrics.forEach(workerMetrics -> log.info(
|
||||||
|
"WorkerId : {} and average WorkerMetricStats data : {}",
|
||||||
|
workerMetrics.getWorkerId(),
|
||||||
|
workerMetrics.getMetricStatsMap()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Queue<Lease> getLeasesCombiningToThroughput(final String workerId, final double throughputToGet) {
|
||||||
|
final List<Lease> assignedLeases =
|
||||||
|
new ArrayList<>(inMemoryStorageView.getWorkerToLeasesMap().get(workerId));
|
||||||
|
if (assignedLeases.isEmpty()) {
|
||||||
|
// This is possible if the worker is having high utilization but does not have any leases assigned to it
|
||||||
|
return new ArrayDeque<>();
|
||||||
|
}
|
||||||
|
// Shuffle leases to randomize what leases gets picked.
|
||||||
|
Collections.shuffle(assignedLeases);
|
||||||
|
final Queue<Lease> response = new ArrayDeque<>();
|
||||||
|
double remainingThroughputToGet = throughputToGet;
|
||||||
|
for (final Lease lease : assignedLeases) {
|
||||||
|
// if adding this lease makes throughout to take go below zero avoid taking this lease.
|
||||||
|
if (remainingThroughputToGet - lease.throughputKBps() <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
remainingThroughputToGet -= lease.throughputKBps();
|
||||||
|
response.add(lease);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If allowThroughputOvershoot is set to true, take a minimum throughput lease
|
||||||
|
if (allowThroughputOvershoot && response.isEmpty()) {
|
||||||
|
assignedLeases.stream()
|
||||||
|
.min(Comparator.comparingDouble(Lease::throughputKBps))
|
||||||
|
.ifPresent(response::add);
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ClientVersion support during upgrade from KCLv2.x to KCLv3.x
|
||||||
|
*
|
||||||
|
* This enum is persisted in storage, so any changes to it needs to be backward compatible.
|
||||||
|
* Reorganizing the values is not backward compatible, also if versions are removed, the corresponding
|
||||||
|
* enum value cannot be reused without backward compatibility considerations.
|
||||||
|
*/
|
||||||
|
public enum ClientVersion {
|
||||||
|
/**
|
||||||
|
* This is a transient start state version used during initialization of the Migration State Machine.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_INIT,
|
||||||
|
/**
|
||||||
|
* This version is used during the upgrade of an application from KCLv2.x to KCLv3.x, in this version
|
||||||
|
* KCL workers will emit WorkerMetricStats and run KCLv2.x algorithms for leader election and lease
|
||||||
|
* assignment. KCL will also monitor for upgrade to KCLv3.x readiness of the worker fleet.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_UPGRADE_FROM_2X,
|
||||||
|
/**
|
||||||
|
* This version is used during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK,
|
||||||
|
* which can only be initiated using a KCL migration tool, when customer wants to revert to KCLv2.x functionality.
|
||||||
|
* In this version, KCL will not emit WorkerMetricStats and run KCLv2.x algorithms for leader election
|
||||||
|
* and lease assignment. In this version, KCL will monitor for roll-forward scenario where
|
||||||
|
* client version is updated to CLIENT_VERSION_UPGRADE_FROM_2X using the migration tool.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_2X,
|
||||||
|
/**
|
||||||
|
* When workers are operating in CLIENT_VERSION_UPGRADE_FROM_2X and when worker fleet is determined to be
|
||||||
|
* KCLv3.x ready (when lease table GSI is active and worker-metrics are being emitted by all lease owners)
|
||||||
|
* then the leader will initiate the switch to KCLv3.x algorithms for leader election and lease assignment,
|
||||||
|
* by using this version and persisting it in the {@link MigrationState} that allows all worker hosts
|
||||||
|
* to also flip to KCLv3.x functionality. In this KCL will also monitor for rollback to detect when the
|
||||||
|
* customer updates version to CLIENT_VERSION_2X using migration tool, so that it instantly flips back
|
||||||
|
* to CLIENT_VERSION_2X.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_3X_WITH_ROLLBACK,
|
||||||
|
/**
|
||||||
|
* A new application starting KCLv3.x or an upgraded application from KCLv2.x after upgrade is successful
|
||||||
|
* can use this version to default all KCLv3.x algorithms without any monitor to rollback.
|
||||||
|
*/
|
||||||
|
CLIENT_VERSION_3X;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,161 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.ScheduledFuture;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Change monitor for MigrationState.clientVersion to notify a callback if the value
|
||||||
|
* changes from a given value. This monitor will be run to monitor
|
||||||
|
* rollback, roll-forward and also upgrade to 3.x scenarios. Look at {@link ClientVersion}
|
||||||
|
* for more details.
|
||||||
|
*
|
||||||
|
* Since all KCL workers will be running the monitor, the monitor poll interval uses
|
||||||
|
* a random jitter to stagger the reads to ddb.
|
||||||
|
*
|
||||||
|
* The class is thread-safe and will invoke callback on a separate thread.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@ThreadSafe
|
||||||
|
public class ClientVersionChangeMonitor implements Runnable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface of a callback to invoke when monitor condition is true.
|
||||||
|
*/
|
||||||
|
public interface ClientVersionChangeCallback {
|
||||||
|
void accept(final MigrationState currentMigrationState) throws InvalidStateException, DependencyException;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
|
||||||
|
private static final double JITTER_FACTOR = 0.1;
|
||||||
|
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ScheduledExecutorService stateMachineThreadPool;
|
||||||
|
private final ClientVersionChangeCallback callback;
|
||||||
|
private final ClientVersion expectedVersion;
|
||||||
|
private final Random random;
|
||||||
|
private long monitorIntervalMillis;
|
||||||
|
|
||||||
|
private ScheduledFuture<?> scheduledFuture;
|
||||||
|
|
||||||
|
public synchronized void startMonitor() {
|
||||||
|
if (scheduledFuture == null) {
|
||||||
|
final long jitter = (long) (random.nextDouble() * MONITOR_INTERVAL_MILLIS * JITTER_FACTOR);
|
||||||
|
monitorIntervalMillis = MONITOR_INTERVAL_MILLIS + jitter;
|
||||||
|
log.info(
|
||||||
|
"Monitoring for MigrationState client version change from {} every {}ms",
|
||||||
|
expectedVersion,
|
||||||
|
monitorIntervalMillis);
|
||||||
|
scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
|
||||||
|
this, monitorIntervalMillis, monitorIntervalMillis, TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new StringBuilder(getClass().getSimpleName())
|
||||||
|
.append("[")
|
||||||
|
.append(expectedVersion)
|
||||||
|
.append("]")
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cancel the monitor explicity before the condition is met, e.g. when the worker is going down.
|
||||||
|
* Note on synchronization: callback of this monitor is invoked while holding the lock on this monitor object.
|
||||||
|
* If cancel is called from within the same lock context that callback uses, then it can lead to
|
||||||
|
* deadlock. Ensure synchronization context between callback the caller of cancel is not shared.
|
||||||
|
*/
|
||||||
|
public synchronized void cancel() {
|
||||||
|
if (scheduledFuture != null) {
|
||||||
|
log.info("Cancelling {}", this);
|
||||||
|
scheduledFuture.cancel(false);
|
||||||
|
} else {
|
||||||
|
log.info("Monitor {} is not running", this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void run() {
|
||||||
|
try {
|
||||||
|
if (scheduledFuture == null) {
|
||||||
|
log.debug("Monitor has been cancelled, not running...");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final MigrationState migrationState =
|
||||||
|
(MigrationState) coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
|
||||||
|
if (migrationState != null) {
|
||||||
|
if (migrationState.getClientVersion() != expectedVersion) {
|
||||||
|
log.info("MigrationState client version has changed {}, invoking monitor callback", migrationState);
|
||||||
|
callback.accept(migrationState);
|
||||||
|
log.info("Callback successful, monitoring cancelling itself.");
|
||||||
|
// stop further monitoring
|
||||||
|
scheduledFuture.cancel(false);
|
||||||
|
scheduledFuture = null;
|
||||||
|
} else {
|
||||||
|
emitMetrics();
|
||||||
|
log.debug("No change detected {}", this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.warn(
|
||||||
|
"Exception occurred when monitoring for client version change from {}, will retry in {}",
|
||||||
|
expectedVersion,
|
||||||
|
monitorIntervalMillis,
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void emitMetrics() {
|
||||||
|
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
|
||||||
|
try {
|
||||||
|
switch (expectedVersion) {
|
||||||
|
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||||
|
scope.addData("CurrentState:3xWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_2X:
|
||||||
|
case CLIENT_VERSION_UPGRADE_FROM_2X:
|
||||||
|
scope.addData("CurrentState:2xCompatibleWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException(String.format("Unexpected version %s", expectedVersion.name()));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,159 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
|
||||||
|
import lombok.NonNull;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State for CLIENT_VERSION_2X. In this state, the only allowed valid transition is
|
||||||
|
* the roll-forward scenario which can only be performed using the KCL Migration tool.
|
||||||
|
* So when the state machine enters this state, a monitor is started to detect the
|
||||||
|
* roll-forward scenario.
|
||||||
|
*/
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationClientVersion2xState implements MigrationClientVersionState {
|
||||||
|
private final MigrationStateMachine stateMachine;
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ScheduledExecutorService stateMachineThreadPool;
|
||||||
|
private final DynamicMigrationComponentsInitializer initializer;
|
||||||
|
private final Random random;
|
||||||
|
|
||||||
|
private ClientVersionChangeMonitor rollForwardMonitor;
|
||||||
|
private boolean entered = false;
|
||||||
|
private boolean left = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ClientVersion clientVersion() {
|
||||||
|
return CLIENT_VERSION_2X;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void enter(final ClientVersion fromClientVersion) {
|
||||||
|
if (!entered) {
|
||||||
|
log.info("Entering {} from {}", this, fromClientVersion);
|
||||||
|
initializer.initializeClientVersionFor2x(fromClientVersion);
|
||||||
|
|
||||||
|
log.info("Starting roll-forward monitor");
|
||||||
|
rollForwardMonitor = new ClientVersionChangeMonitor(
|
||||||
|
initializer.metricsFactory(),
|
||||||
|
coordinatorStateDAO,
|
||||||
|
stateMachineThreadPool,
|
||||||
|
this::onClientVersionChange,
|
||||||
|
clientVersion(),
|
||||||
|
random);
|
||||||
|
rollForwardMonitor.startMonitor();
|
||||||
|
entered = true;
|
||||||
|
} else {
|
||||||
|
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void leave() {
|
||||||
|
if (entered && !left) {
|
||||||
|
log.info("Leaving {}", this);
|
||||||
|
cancelRollForwardMonitor();
|
||||||
|
left = false;
|
||||||
|
} else {
|
||||||
|
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback handler to handle client version changes in MigrationState in DDB.
|
||||||
|
* @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_2X
|
||||||
|
* @throws InvalidStateException during transition to the next state based on the new ClientVersion
|
||||||
|
* or if the new state in DDB is unexpected.
|
||||||
|
*/
|
||||||
|
private synchronized void onClientVersionChange(@NonNull final MigrationState newState)
|
||||||
|
throws InvalidStateException, DependencyException {
|
||||||
|
if (!entered || left) {
|
||||||
|
log.warn("Received client version change notification on inactive state {}", this);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final MetricsScope scope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||||
|
try {
|
||||||
|
if (newState.getClientVersion() == CLIENT_VERSION_UPGRADE_FROM_2X) {
|
||||||
|
log.info(
|
||||||
|
"A roll-forward has been initiated for the application. Transition to {}",
|
||||||
|
CLIENT_VERSION_UPGRADE_FROM_2X);
|
||||||
|
// If this succeeds, the monitor will cancel itself.
|
||||||
|
stateMachine.transitionTo(CLIENT_VERSION_UPGRADE_FROM_2X, newState);
|
||||||
|
} else {
|
||||||
|
// This should not happen, so throw an exception that allows the monitor to continue monitoring
|
||||||
|
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
|
||||||
|
// state transition is possible.
|
||||||
|
// However, there could be a split brain here, new workers will use DDB value as source of truth,
|
||||||
|
// so we could also write back CLIENT_VERSION_2X to DDB to ensure all workers have consistent
|
||||||
|
// behavior.
|
||||||
|
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
|
||||||
|
// so keeping it simple and not writing back to DDB, the error log below would help capture
|
||||||
|
// any strange behavior if this happens.
|
||||||
|
log.error(
|
||||||
|
"Migration state has invalid client version {}. Transition from {} is not supported",
|
||||||
|
newState,
|
||||||
|
CLIENT_VERSION_2X);
|
||||||
|
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
|
||||||
|
}
|
||||||
|
} catch (final InvalidStateException | DependencyException e) {
|
||||||
|
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cancelRollForwardMonitor() {
|
||||||
|
if (rollForwardMonitor != null) {
|
||||||
|
final ClientVersionChangeMonitor localRollForwardMonitor = rollForwardMonitor;
|
||||||
|
CompletableFuture.supplyAsync(() -> {
|
||||||
|
log.info("Cancelling roll-forward monitor");
|
||||||
|
localRollForwardMonitor.cancel();
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
rollForwardMonitor = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State for CLIENT_VERSION_3X which enables KCL to run 3.x algorithms on new KCLv3.x application
|
||||||
|
* or successfully upgraded application which upgraded from v2.x. This is a terminal state of the
|
||||||
|
* state machine and no rollbacks are supported in this state.
|
||||||
|
*/
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationClientVersion3xState implements MigrationClientVersionState {
|
||||||
|
private final MigrationStateMachine stateMachine;
|
||||||
|
private final DynamicMigrationComponentsInitializer initializer;
|
||||||
|
private boolean entered = false;
|
||||||
|
private boolean left = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ClientVersion clientVersion() {
|
||||||
|
return ClientVersion.CLIENT_VERSION_3X;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
|
||||||
|
if (!entered) {
|
||||||
|
log.info("Entering {} from {}", this, fromClientVersion);
|
||||||
|
initializer.initializeClientVersionFor3x(fromClientVersion);
|
||||||
|
entered = true;
|
||||||
|
} else {
|
||||||
|
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void leave() {
|
||||||
|
if (entered && !left) {
|
||||||
|
log.info("Leaving {}", this);
|
||||||
|
entered = false;
|
||||||
|
left = true;
|
||||||
|
} else {
|
||||||
|
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,156 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State for CLIENT_VERSION_3X_WITH_ROLLBACK which enables KCL to run its 3.x compliant algorithms
|
||||||
|
* during the upgrade process after all KCL workers in the fleet are 3.x complaint. Since this
|
||||||
|
* is an instant switch from CLIENT_VERSION_UPGRADE_FROM_2X, it also supports rollback if customers
|
||||||
|
* see regression to allow for instant rollbacks as well. This would be achieved by customers
|
||||||
|
* running a KCL migration tool to update MigrationState in DDB. So this state monitors for
|
||||||
|
* rollback triggers and performs state transitions accordingly.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationClientVersion3xWithRollbackState implements MigrationClientVersionState {
|
||||||
|
|
||||||
|
private final MigrationStateMachine stateMachine;
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ScheduledExecutorService stateMachineThreadPool;
|
||||||
|
private final DynamicMigrationComponentsInitializer initializer;
|
||||||
|
private final Random random;
|
||||||
|
|
||||||
|
private ClientVersionChangeMonitor rollbackMonitor;
|
||||||
|
private boolean entered;
|
||||||
|
private boolean left;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ClientVersion clientVersion() {
|
||||||
|
return ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
|
||||||
|
if (!entered) {
|
||||||
|
log.info("Entering {} from {}", this, fromClientVersion);
|
||||||
|
initializer.initializeClientVersionFor3xWithRollback(fromClientVersion);
|
||||||
|
// we need to run the rollback monitor
|
||||||
|
log.info("Starting rollback monitor");
|
||||||
|
rollbackMonitor = new ClientVersionChangeMonitor(
|
||||||
|
initializer.metricsFactory(),
|
||||||
|
coordinatorStateDAO,
|
||||||
|
stateMachineThreadPool,
|
||||||
|
this::onClientVersionChange,
|
||||||
|
clientVersion(),
|
||||||
|
random);
|
||||||
|
rollbackMonitor.startMonitor();
|
||||||
|
entered = true;
|
||||||
|
} else {
|
||||||
|
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void leave() {
|
||||||
|
if (entered && !left) {
|
||||||
|
log.info("Leaving {}", this);
|
||||||
|
cancelRollbackMonitor();
|
||||||
|
entered = false;
|
||||||
|
left = true;
|
||||||
|
} else {
|
||||||
|
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void onClientVersionChange(final MigrationState newState)
|
||||||
|
throws InvalidStateException, DependencyException {
|
||||||
|
if (!entered || left) {
|
||||||
|
log.warn("Received client version change notification on inactive state {}", this);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final MetricsScope scope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||||
|
try {
|
||||||
|
switch (newState.getClientVersion()) {
|
||||||
|
case CLIENT_VERSION_2X:
|
||||||
|
log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X);
|
||||||
|
stateMachine.transitionTo(ClientVersion.CLIENT_VERSION_2X, newState);
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_3X:
|
||||||
|
log.info("Customer has switched to 3.x after successful upgrade, state machine will move to a"
|
||||||
|
+ "terminal state and stop monitoring. Rollbacks will no longer be supported anymore");
|
||||||
|
stateMachine.transitionTo(CLIENT_VERSION_3X, newState);
|
||||||
|
// This worker will still be running the migrationAdaptive components in 3.x mode which will
|
||||||
|
// no longer dynamically switch back to 2.x mode, however to directly run 3.x component without
|
||||||
|
// adaption to migration (i.e. move to CLIENT_VERSION_3X state), it requires this worker to go
|
||||||
|
// through the current deployment which initiated the switch to 3.x mode.
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// This should not happen, so throw an exception that allows the monitor to continue monitoring
|
||||||
|
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
|
||||||
|
// state transition is possible.
|
||||||
|
// However, there could be a split brain here, new workers will use DDB value as source of truth,
|
||||||
|
// so we could also write back CLIENT_VERSION_3X_WITH_ROLLBACK to DDB to ensure all workers have
|
||||||
|
// consistent behavior.
|
||||||
|
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
|
||||||
|
// so keeping it simple and not writing back to DDB, the error log below would help capture
|
||||||
|
// any strange behavior if this happens.
|
||||||
|
log.error("Migration state has invalid client version {}", newState);
|
||||||
|
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
|
||||||
|
}
|
||||||
|
} catch (final InvalidStateException | DependencyException e) {
|
||||||
|
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cancelRollbackMonitor() {
|
||||||
|
if (rollbackMonitor != null) {
|
||||||
|
final ClientVersionChangeMonitor localRollbackMonitor = rollbackMonitor;
|
||||||
|
CompletableFuture.supplyAsync(() -> {
|
||||||
|
log.info("Cancelling rollback monitor");
|
||||||
|
localRollbackMonitor.cancel();
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
rollbackMonitor = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface of a state implementation for the MigrationStateMachine
|
||||||
|
*/
|
||||||
|
public interface MigrationClientVersionState {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The associated clientVersion this state corresponds to
|
||||||
|
* @return ClientVersion that this state implements the logic for.
|
||||||
|
*/
|
||||||
|
ClientVersion clientVersion();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enter the state and perform the business logic of being in this state
|
||||||
|
* which includes performing any monitoring that allows the next state
|
||||||
|
* transition and also initializing the KCL based on the ClientVersion.
|
||||||
|
* @param fromClientVersion from previous state if any specific action must
|
||||||
|
* be taken based on the state from which this state
|
||||||
|
* is being entered from.
|
||||||
|
* @throws DependencyException if DDB fails in unexpected ways for those states
|
||||||
|
* that create the GSI
|
||||||
|
*/
|
||||||
|
void enter(ClientVersion fromClientVersion) throws DependencyException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invoked after the transition to another state has occurred
|
||||||
|
* to allow printing any helpful logs or performing cleanup.
|
||||||
|
*/
|
||||||
|
void leave();
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,263 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.util.AbstractMap.SimpleEntry;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorState;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializer to determine start state of the state machine which identifies the
|
||||||
|
* state to initialize KCL when it is starting up. The initial state is determined based on the
|
||||||
|
* customer configured {@link ClientVersionConfig} and the current {@link MigrationState} in DDB,
|
||||||
|
* as follows
|
||||||
|
* ClientVersionConfig | MigrationState (DDB) | initial client version
|
||||||
|
* --------------------+---------------------------------+--------------------------------
|
||||||
|
* COMPATIBLE_WITH_2X | Does not exist | CLIENT_VERSION_UPGRADE_FROM_2X
|
||||||
|
* 3X | Does not exist | CLIENT_VERSION_3X
|
||||||
|
* COMPATIBLE_WITH_2X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X_WITH_ROLLBACK
|
||||||
|
* 3X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X
|
||||||
|
* any | CLIENT_VERSION_2X | CLIENT_VERSION_2X
|
||||||
|
* any | CLIENT_VERSION_UPGRADE_FROM_2X | CLIENT_VERSION_UPGRADE_FROM_2X
|
||||||
|
* any | CLIENT_VERSION_3X | CLIENT_VERSION_3X
|
||||||
|
*/
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationClientVersionStateInitializer {
|
||||||
|
private static final int MAX_INITIALIZATION_RETRY = 10;
|
||||||
|
private static final long INITIALIZATION_RETRY_DELAY_MILLIS = 1000L;
|
||||||
|
/**
|
||||||
|
* A jitter factor of 10% to stagger the retries.
|
||||||
|
*/
|
||||||
|
private static final double JITTER_FACTOR = 0.1;
|
||||||
|
|
||||||
|
private final Callable<Long> timeProvider;
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ClientVersionConfig clientVersionConfig;
|
||||||
|
private final Random random;
|
||||||
|
private final String workerIdentifier;
|
||||||
|
|
||||||
|
public SimpleEntry<ClientVersion, MigrationState> getInitialState() throws DependencyException {
|
||||||
|
log.info("Initializing migration state machine starting state, configured version {}", clientVersionConfig);
|
||||||
|
|
||||||
|
try {
|
||||||
|
MigrationState migrationState = getMigrationStateFromDynamo();
|
||||||
|
int retryCount = 0;
|
||||||
|
while (retryCount++ < MAX_INITIALIZATION_RETRY) {
|
||||||
|
final ClientVersion initialClientVersion = getClientVersionForInitialization(migrationState);
|
||||||
|
if (migrationState.getClientVersion() != initialClientVersion) {
|
||||||
|
// If update fails, the value represents current state in dynamo
|
||||||
|
migrationState = updateMigrationStateInDynamo(migrationState, initialClientVersion);
|
||||||
|
if (migrationState.getClientVersion() == initialClientVersion) {
|
||||||
|
// update succeeded. Transition to the state
|
||||||
|
return new SimpleEntry<>(initialClientVersion, migrationState);
|
||||||
|
}
|
||||||
|
final long delay = getInitializationRetryDelay();
|
||||||
|
log.warn(
|
||||||
|
"Failed to update migration state with {}, retry after delay {}",
|
||||||
|
initialClientVersion,
|
||||||
|
delay);
|
||||||
|
safeSleep(delay);
|
||||||
|
} else {
|
||||||
|
return new SimpleEntry<>(initialClientVersion, migrationState);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (final InvalidStateException e) {
|
||||||
|
log.error("Unable to initialize state machine", e);
|
||||||
|
}
|
||||||
|
throw new DependencyException(
|
||||||
|
new RuntimeException("Unable to determine initial state for migration state machine"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClientVersion getClientVersionForInitialization(final MigrationState migrationState) {
|
||||||
|
final ClientVersion nextClientVersion;
|
||||||
|
switch (migrationState.getClientVersion()) {
|
||||||
|
case CLIENT_VERSION_INIT:
|
||||||
|
// There is no state in DDB, set state to config version and transition to configured version.
|
||||||
|
nextClientVersion = getNextClientVersionBasedOnConfigVersion();
|
||||||
|
log.info("Application is starting in {}", nextClientVersion);
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||||
|
if (clientVersionConfig == ClientVersionConfig.CLIENT_VERSION_CONFIG_3X) {
|
||||||
|
// upgrade successful, allow transition to 3x.
|
||||||
|
log.info("Application has successfully upgraded, transitioning to {}", CLIENT_VERSION_3X);
|
||||||
|
nextClientVersion = CLIENT_VERSION_3X;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
log.info("Initialize with {}", CLIENT_VERSION_3X_WITH_ROLLBACK);
|
||||||
|
nextClientVersion = migrationState.getClientVersion();
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_2X:
|
||||||
|
log.info("Application has rolled-back, initialize with {}", CLIENT_VERSION_2X);
|
||||||
|
nextClientVersion = migrationState.getClientVersion();
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_UPGRADE_FROM_2X:
|
||||||
|
log.info("Application is upgrading, initialize with {}", CLIENT_VERSION_UPGRADE_FROM_2X);
|
||||||
|
nextClientVersion = migrationState.getClientVersion();
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_3X:
|
||||||
|
log.info("Initialize with {}", CLIENT_VERSION_3X);
|
||||||
|
nextClientVersion = migrationState.getClientVersion();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException(String.format("Unknown version in DDB %s", migrationState));
|
||||||
|
}
|
||||||
|
return nextClientVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the migration state's client version in dynamo conditional on the current client version
|
||||||
|
* in dynamo. So that if another worker updates the value first, the update fails. If the update fails,
|
||||||
|
* the method will read the latest value and return so that initialization can be retried.
|
||||||
|
* If the value does not exist in dynamo, it will creat it.
|
||||||
|
*/
|
||||||
|
private MigrationState updateMigrationStateInDynamo(
|
||||||
|
final MigrationState migrationState, final ClientVersion nextClientVersion) throws InvalidStateException {
|
||||||
|
try {
|
||||||
|
if (migrationState.getClientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
|
||||||
|
migrationState.update(nextClientVersion, workerIdentifier);
|
||||||
|
log.info("Creating {}", migrationState);
|
||||||
|
final boolean created = coordinatorStateDAO.createCoordinatorStateIfNotExists(migrationState);
|
||||||
|
if (!created) {
|
||||||
|
log.debug("Create {} did not succeed", migrationState);
|
||||||
|
return getMigrationStateFromDynamo();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.info("Updating {} with {}", migrationState, nextClientVersion);
|
||||||
|
final Map<String, ExpectedAttributeValue> expectations =
|
||||||
|
migrationState.getDynamoClientVersionExpectation();
|
||||||
|
migrationState.update(nextClientVersion, workerIdentifier);
|
||||||
|
final boolean updated =
|
||||||
|
coordinatorStateDAO.updateCoordinatorStateWithExpectation(migrationState, expectations);
|
||||||
|
if (!updated) {
|
||||||
|
log.debug("Update {} did not succeed", migrationState);
|
||||||
|
return getMigrationStateFromDynamo();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return migrationState;
|
||||||
|
} catch (final ProvisionedThroughputException | DependencyException e) {
|
||||||
|
log.debug(
|
||||||
|
"Failed to update migration state {} with {}, return previous value to trigger a retry",
|
||||||
|
migrationState,
|
||||||
|
nextClientVersion,
|
||||||
|
e);
|
||||||
|
return migrationState;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ClientVersion getNextClientVersionBasedOnConfigVersion() {
|
||||||
|
switch (clientVersionConfig) {
|
||||||
|
case CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X:
|
||||||
|
return CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||||
|
case CLIENT_VERSION_CONFIG_3X:
|
||||||
|
return CLIENT_VERSION_3X;
|
||||||
|
}
|
||||||
|
throw new IllegalStateException(String.format("Unknown configured Client version %s", clientVersionConfig));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the current {@link MigrationState} from DDB with retries.
|
||||||
|
* @return current Migration state from DDB, if none exists, an initial Migration State with CLIENT_VERSION_INIT
|
||||||
|
* will be returned
|
||||||
|
* @throws InvalidStateException, this occurs when dynamo table does not exist in which retrying is not useful.
|
||||||
|
*/
|
||||||
|
private MigrationState getMigrationStateFromDynamo() throws InvalidStateException {
|
||||||
|
return executeCallableWithRetryAndJitter(
|
||||||
|
() -> {
|
||||||
|
final CoordinatorState state = coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
|
||||||
|
if (state == null) {
|
||||||
|
log.info("No Migration state available in DDB");
|
||||||
|
return new MigrationState(MIGRATION_HASH_KEY, workerIdentifier);
|
||||||
|
}
|
||||||
|
if (state instanceof MigrationState) {
|
||||||
|
log.info("Current migration state in DDB {}", state);
|
||||||
|
return (MigrationState) state;
|
||||||
|
}
|
||||||
|
throw new InvalidStateException(
|
||||||
|
String.format("Unexpected state found not confirming to MigrationState schema %s", state));
|
||||||
|
},
|
||||||
|
"get MigrationState from DDB");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to retry a given callable upto MAX_INITIALIZATION_RETRY times for all retryable exceptions.
|
||||||
|
* It considers InvalidStateException as non-retryable exception. During retry, it will compute a delay
|
||||||
|
* with jitter before retrying.
|
||||||
|
* @param callable callable to invoke either until it succeeds or max retry attempts exceed.
|
||||||
|
* @param description a meaningful description to log exceptions
|
||||||
|
* @return the value returned by the callable
|
||||||
|
* @param <T> Return type of the callable
|
||||||
|
* @throws InvalidStateException If the callable throws InvalidStateException, it will not be retried and will
|
||||||
|
* be thrown back.
|
||||||
|
*/
|
||||||
|
private <T> T executeCallableWithRetryAndJitter(final Callable<T> callable, final String description)
|
||||||
|
throws InvalidStateException {
|
||||||
|
int retryCount = 0;
|
||||||
|
while (retryCount++ < MAX_INITIALIZATION_RETRY) {
|
||||||
|
try {
|
||||||
|
return callable.call();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
if (e instanceof InvalidStateException) {
|
||||||
|
// throw the non-retryable exception
|
||||||
|
throw (InvalidStateException) e;
|
||||||
|
}
|
||||||
|
final long delay = getInitializationRetryDelay();
|
||||||
|
log.warn("Failed to {}, retry after delay {}", description, delay, e);
|
||||||
|
|
||||||
|
safeSleep(delay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new RuntimeException(
|
||||||
|
String.format("Failed to %s after %d retries, giving up", description, MAX_INITIALIZATION_RETRY));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void safeSleep(final long delay) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(delay);
|
||||||
|
} catch (final InterruptedException ie) {
|
||||||
|
log.debug("Interrupted sleep during state machine initialization retry");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a delay with jitter that is factor of the interval.
|
||||||
|
* @return delay with jitter
|
||||||
|
*/
|
||||||
|
private long getInitializationRetryDelay() {
|
||||||
|
final long jitter = (long) (random.nextDouble() * JITTER_FACTOR * INITIALIZATION_RETRY_DELAY_MILLIS);
|
||||||
|
return INITIALIZATION_RETRY_DELAY_MILLIS + jitter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,241 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State for CLIENT_VERSION_UPGRADE_FROM_2X. When state machine enters this state,
|
||||||
|
* KCL is initialized to operate in dual mode for Lease assignment and Leader decider algorithms
|
||||||
|
* which initially start in 2.x compatible mode and when all the KCL workers are 3.x compliant,
|
||||||
|
* it dynamically switches to the 3.x algorithms. It also monitors for rollback
|
||||||
|
* initiated from customer via the KCL migration tool and instantly switches back to the 2.x
|
||||||
|
* complaint algorithms.
|
||||||
|
* The allowed state transitions are to CLIENT_VERSION_3X_WITH_ROLLBACK when KCL workers are
|
||||||
|
* 3.x complaint, and to CLIENT_VERSION_2X when customer has initiated a rollback.
|
||||||
|
* Only the leader KCL worker performs migration ready monitor and notifies all workers (including
|
||||||
|
* itself) via a MigrationState update. When all worker's monitor notice the MigrationState change
|
||||||
|
* (including itself), it will transition to CLIENT_VERSION_3X_WITH_ROLLBACK.
|
||||||
|
*/
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationClientVersionUpgradeFrom2xState implements MigrationClientVersionState {
|
||||||
|
private final MigrationStateMachine stateMachine;
|
||||||
|
private final Callable<Long> timeProvider;
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ScheduledExecutorService stateMachineThreadPool;
|
||||||
|
private final DynamicMigrationComponentsInitializer initializer;
|
||||||
|
private final Random random;
|
||||||
|
private final MigrationState currentMigrationState;
|
||||||
|
private final long flipTo3XStabilizerTimeInSeconds;
|
||||||
|
|
||||||
|
private MigrationReadyMonitor migrationMonitor;
|
||||||
|
private ClientVersionChangeMonitor clientVersionChangeMonitor;
|
||||||
|
private boolean entered = false;
|
||||||
|
private boolean left = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ClientVersion clientVersion() {
|
||||||
|
return ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
|
||||||
|
if (!entered) {
|
||||||
|
log.info("Entering state {} from {}", this, fromClientVersion);
|
||||||
|
initializer.initializeClientVersionForUpgradeFrom2x(fromClientVersion);
|
||||||
|
|
||||||
|
log.info("Starting migration ready monitor to monitor 3.x compliance of the KCL workers");
|
||||||
|
migrationMonitor = new MigrationReadyMonitor(
|
||||||
|
initializer.metricsFactory(),
|
||||||
|
timeProvider,
|
||||||
|
initializer.leaderDecider(),
|
||||||
|
initializer.workerIdentifier(),
|
||||||
|
initializer.workerMetricsDAO(),
|
||||||
|
initializer.workerMetricsExpirySeconds(),
|
||||||
|
initializer.leaseRefresher(),
|
||||||
|
stateMachineThreadPool,
|
||||||
|
this::onMigrationReady,
|
||||||
|
flipTo3XStabilizerTimeInSeconds);
|
||||||
|
migrationMonitor.startMonitor();
|
||||||
|
|
||||||
|
log.info("Starting monitor for rollback and flip to 3.x");
|
||||||
|
clientVersionChangeMonitor = new ClientVersionChangeMonitor(
|
||||||
|
initializer.metricsFactory(),
|
||||||
|
coordinatorStateDAO,
|
||||||
|
stateMachineThreadPool,
|
||||||
|
this::onClientVersionChange,
|
||||||
|
clientVersion(),
|
||||||
|
random);
|
||||||
|
clientVersionChangeMonitor.startMonitor();
|
||||||
|
entered = true;
|
||||||
|
} else {
|
||||||
|
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void leave() {
|
||||||
|
if (entered && !left) {
|
||||||
|
log.info("Leaving {}", this);
|
||||||
|
cancelMigrationReadyMonitor();
|
||||||
|
cancelClientChangeVersionMonitor();
|
||||||
|
entered = false;
|
||||||
|
} else {
|
||||||
|
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getClass().getSimpleName();
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void onMigrationReady() {
|
||||||
|
// this is invoked on the leader worker only
|
||||||
|
if (!entered || left || migrationMonitor == null) {
|
||||||
|
log.info("Ignoring migration ready monitor, state already transitioned");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// update dynamo with the state to toggle to 3.x
|
||||||
|
// and let the clientVersionChange kick in to do state transition
|
||||||
|
// this way both leader and non-leader worker all transition when
|
||||||
|
// it discovers the update from ddb.
|
||||||
|
if (updateDynamoStateForTransition()) {
|
||||||
|
// successfully toggled the state, now we can cancel the monitor
|
||||||
|
cancelMigrationReadyMonitor();
|
||||||
|
}
|
||||||
|
// else - either migration ready monitor will retry or
|
||||||
|
// client Version change callback will initiate the next state transition.
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cancelMigrationReadyMonitor() {
|
||||||
|
if (migrationMonitor != null) {
|
||||||
|
final MigrationReadyMonitor localMigrationMonitor = migrationMonitor;
|
||||||
|
CompletableFuture.supplyAsync(() -> {
|
||||||
|
log.info("Cancelling migration ready monitor");
|
||||||
|
localMigrationMonitor.cancel();
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
migrationMonitor = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cancelClientChangeVersionMonitor() {
|
||||||
|
if (clientVersionChangeMonitor != null) {
|
||||||
|
final ClientVersionChangeMonitor localClientVersionChangeMonitor = clientVersionChangeMonitor;
|
||||||
|
CompletableFuture.supplyAsync(() -> {
|
||||||
|
log.info("Cancelling client change version monitor");
|
||||||
|
localClientVersionChangeMonitor.cancel();
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
clientVersionChangeMonitor = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback handler to handle client version changes in MigrationState in DDB.
|
||||||
|
* @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_UPGRADE_FROM_2X
|
||||||
|
* @throws InvalidStateException during transition to the next state based on the new ClientVersion
|
||||||
|
* or if the new state in DDB is unexpected.
|
||||||
|
*/
|
||||||
|
private synchronized void onClientVersionChange(final MigrationState newState)
|
||||||
|
throws InvalidStateException, DependencyException {
|
||||||
|
if (!entered || left) {
|
||||||
|
log.warn("Received client version change notification on inactive state {}", this);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final MetricsScope scope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||||
|
try {
|
||||||
|
switch (newState.getClientVersion()) {
|
||||||
|
case CLIENT_VERSION_2X:
|
||||||
|
log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X);
|
||||||
|
// cancel monitor asynchronously
|
||||||
|
cancelMigrationReadyMonitor();
|
||||||
|
stateMachine.transitionTo(CLIENT_VERSION_2X, newState);
|
||||||
|
break;
|
||||||
|
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||||
|
log.info("KCL workers are v3.x compliant, transition to {}", CLIENT_VERSION_3X_WITH_ROLLBACK);
|
||||||
|
cancelMigrationReadyMonitor();
|
||||||
|
stateMachine.transitionTo(CLIENT_VERSION_3X_WITH_ROLLBACK, newState);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// This should not happen, so throw an exception that allows the monitor to continue monitoring
|
||||||
|
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
|
||||||
|
// state transition is possible.
|
||||||
|
// However, there could be a split brain here, new workers will use DDB value as source of truth,
|
||||||
|
// so we could also write back CLIENT_VERSION_UPGRADE_FROM_2X to DDB to ensure all workers have
|
||||||
|
// consistent behavior.
|
||||||
|
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
|
||||||
|
// so keeping it simple and not writing back to DDB, the error log below would help capture
|
||||||
|
// any strange behavior if this happens.
|
||||||
|
log.error("Migration state has invalid client version {}", newState);
|
||||||
|
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
|
||||||
|
}
|
||||||
|
} catch (final DependencyException | InvalidStateException e) {
|
||||||
|
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean updateDynamoStateForTransition() {
|
||||||
|
final MetricsScope scope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||||
|
try {
|
||||||
|
final MigrationState newMigrationState = currentMigrationState
|
||||||
|
.copy()
|
||||||
|
.update(CLIENT_VERSION_3X_WITH_ROLLBACK, initializer.workerIdentifier());
|
||||||
|
log.info("Updating Migration State in DDB with {} prev state {}", newMigrationState, currentMigrationState);
|
||||||
|
return coordinatorStateDAO.updateCoordinatorStateWithExpectation(
|
||||||
|
newMigrationState, currentMigrationState.getDynamoClientVersionExpectation());
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.warn(
|
||||||
|
"Exception occurred when toggling to {}, upgradeReadyMonitor will retry the update"
|
||||||
|
+ " if upgrade condition is still true",
|
||||||
|
CLIENT_VERSION_3X_WITH_ROLLBACK,
|
||||||
|
e);
|
||||||
|
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,352 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.CompletionException;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.ScheduledFuture;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
|
||||||
|
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Monitor for KCL workers 3.x readiness. This monitor is started on all workers but only
|
||||||
|
* executed on the leader of the fleet. The leader determines 3.x readiness if GSI of the lease
|
||||||
|
* table is active and all lease owners are emitting WorkerMetricStats. The monitor performs this
|
||||||
|
* check periodically and will invoke callback if the readiness conditions are true. Monitor
|
||||||
|
* needs to be explicitly cancelled after the readiness trigger has successfully been handled.
|
||||||
|
*
|
||||||
|
* Thread safety - Guard for safety against public method invocation and internal runnable method.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationReadyMonitor implements Runnable {
|
||||||
|
private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
|
||||||
|
private static final long LOG_INTERVAL_NANOS = Duration.ofMinutes(5).toNanos();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default retry attempt for loading leases and workers before giving up.
|
||||||
|
*/
|
||||||
|
private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
|
||||||
|
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
private final Callable<Long> timeProvider;
|
||||||
|
private final LeaderDecider leaderDecider;
|
||||||
|
private final String currentWorkerId;
|
||||||
|
private final WorkerMetricStatsDAO workerMetricStatsDAO;
|
||||||
|
private final long workerMetricStatsExpirySeconds;
|
||||||
|
private final LeaseRefresher leaseRefresher;
|
||||||
|
private final ScheduledExecutorService stateMachineThreadPool;
|
||||||
|
private final MonitorTriggerStabilizer triggerStabilizer;
|
||||||
|
|
||||||
|
private final LogRateLimiter rateLimitedStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
|
||||||
|
private ScheduledFuture<?> scheduledFuture;
|
||||||
|
private boolean gsiStatusReady;
|
||||||
|
private boolean workerMetricsReady;
|
||||||
|
private Set<String> lastKnownUniqueLeaseOwners = new HashSet<>();
|
||||||
|
private Set<String> lastKnownWorkersWithActiveWorkerMetrics = new HashSet<>();
|
||||||
|
|
||||||
|
public MigrationReadyMonitor(
|
||||||
|
final MetricsFactory metricsFactory,
|
||||||
|
final Callable<Long> timeProvider,
|
||||||
|
final LeaderDecider leaderDecider,
|
||||||
|
final String currentWorkerId,
|
||||||
|
final WorkerMetricStatsDAO workerMetricStatsDAO,
|
||||||
|
final long workerMetricsExpirySeconds,
|
||||||
|
final LeaseRefresher leaseRefresher,
|
||||||
|
final ScheduledExecutorService stateMachineThreadPool,
|
||||||
|
final Runnable callback,
|
||||||
|
final long callbackStabilizationInSeconds) {
|
||||||
|
this.metricsFactory = metricsFactory;
|
||||||
|
this.timeProvider = timeProvider;
|
||||||
|
this.leaderDecider = leaderDecider;
|
||||||
|
this.currentWorkerId = currentWorkerId;
|
||||||
|
this.workerMetricStatsDAO = workerMetricStatsDAO;
|
||||||
|
this.workerMetricStatsExpirySeconds = workerMetricsExpirySeconds;
|
||||||
|
this.leaseRefresher = leaseRefresher;
|
||||||
|
this.stateMachineThreadPool = stateMachineThreadPool;
|
||||||
|
this.triggerStabilizer =
|
||||||
|
new MonitorTriggerStabilizer(timeProvider, callbackStabilizationInSeconds, callback, currentWorkerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void startMonitor() {
|
||||||
|
if (Objects.isNull(scheduledFuture)) {
|
||||||
|
|
||||||
|
log.info("Starting migration ready monitor");
|
||||||
|
scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
|
||||||
|
this, MONITOR_INTERVAL_MILLIS, MONITOR_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
|
||||||
|
} else {
|
||||||
|
log.info("Ignoring monitor request, since it is already started");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cancel the monitor. Once the method returns callback will not be invoked,
|
||||||
|
* but callback can be invoked reentrantly before this method returns.
|
||||||
|
*/
|
||||||
|
public synchronized void cancel() {
|
||||||
|
if (Objects.nonNull(scheduledFuture)) {
|
||||||
|
log.info("Cancelled migration ready monitor");
|
||||||
|
scheduledFuture.cancel(true);
|
||||||
|
scheduledFuture = null;
|
||||||
|
} else {
|
||||||
|
log.info("{} is currently not active", this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void run() {
|
||||||
|
try {
|
||||||
|
if (Thread.currentThread().isInterrupted()) {
|
||||||
|
log.info("{} cancelled, exiting...", this);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!leaderDecider.isLeader(currentWorkerId)) {
|
||||||
|
log.debug("Not the leader, not performing migration ready check {}", this);
|
||||||
|
triggerStabilizer.reset();
|
||||||
|
lastKnownUniqueLeaseOwners.clear();
|
||||||
|
lastKnownWorkersWithActiveWorkerMetrics.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
triggerStabilizer.call(isReadyForUpgradeTo3x());
|
||||||
|
rateLimitedStatusLogger.log(() -> log.info("Monitor ran successfully {}", this));
|
||||||
|
} catch (final Throwable t) {
|
||||||
|
log.warn("{} failed, will retry after {}", this, MONITOR_INTERVAL_MILLIS, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new StringBuilder("UpgradeReadyMonitor[")
|
||||||
|
.append("G=")
|
||||||
|
.append(gsiStatusReady)
|
||||||
|
.append(",W=")
|
||||||
|
.append(workerMetricsReady)
|
||||||
|
.append("]")
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isReadyForUpgradeTo3x() throws DependencyException {
|
||||||
|
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
|
||||||
|
try {
|
||||||
|
// If GSI is not ready, optimize to not check if worker metrics are being emitted
|
||||||
|
final boolean localGsiReadyStatus = leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive();
|
||||||
|
if (localGsiReadyStatus != gsiStatusReady) {
|
||||||
|
gsiStatusReady = localGsiReadyStatus;
|
||||||
|
log.info("Gsi ready status changed to {}", gsiStatusReady);
|
||||||
|
} else {
|
||||||
|
log.debug("GsiReady status {}", gsiStatusReady);
|
||||||
|
}
|
||||||
|
return gsiStatusReady && areLeaseOwnersEmittingWorkerMetrics();
|
||||||
|
} finally {
|
||||||
|
scope.addData("GsiReadyStatus", gsiStatusReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
scope.addData(
|
||||||
|
"WorkerMetricsReadyStatus", workerMetricsReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean areLeaseOwnersEmittingWorkerMetrics() {
|
||||||
|
final CompletableFuture<List<Lease>> leaseListFuture = loadLeaseListAsync();
|
||||||
|
final CompletableFuture<List<WorkerMetricStats>> workerMetricsFuture = loadWorkerMetricStats();
|
||||||
|
|
||||||
|
final List<Lease> leaseList = leaseListFuture.join();
|
||||||
|
final Set<String> leaseOwners = getUniqueLeaseOwnersFromLeaseTable(leaseList);
|
||||||
|
final List<WorkerMetricStats> workerMetricStatsList = workerMetricsFuture.join();
|
||||||
|
final Set<String> workersWithActiveWorkerMetrics = getWorkersWithActiveWorkerMetricStats(workerMetricStatsList);
|
||||||
|
|
||||||
|
// Leases are not checked for expired condition because:
|
||||||
|
// If some worker has gone down and is not active, but has lease assigned to it, those leases
|
||||||
|
// maybe expired. Since the worker is down, it may not have worker-metrics, or worker-metrics may not be active,
|
||||||
|
// In that case, the migration condition is not considered to be met.
|
||||||
|
// However, those leases should be assigned to another worker and so the check in the next
|
||||||
|
// iteration could succeed. This is intentional to make sure all leases owners are accounted for
|
||||||
|
// and the old owner does not come back up without worker metrics and reacquires the lease.
|
||||||
|
final boolean localWorkerMetricsReady = leaseOwners.equals(workersWithActiveWorkerMetrics);
|
||||||
|
if (localWorkerMetricsReady != workerMetricsReady) {
|
||||||
|
workerMetricsReady = localWorkerMetricsReady;
|
||||||
|
log.info("WorkerMetricStats status changed to {}", workerMetricsReady);
|
||||||
|
log.info("Lease List {}", leaseList);
|
||||||
|
log.info("WorkerMetricStats {}", workerMetricStatsList);
|
||||||
|
} else {
|
||||||
|
log.debug("WorkerMetricStats ready status {}", workerMetricsReady);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastKnownUniqueLeaseOwners == null) {
|
||||||
|
log.info("Unique lease owners {}", leaseOwners);
|
||||||
|
} else if (!lastKnownUniqueLeaseOwners.equals(leaseOwners)) {
|
||||||
|
log.info("Unique lease owners changed to {}", leaseOwners);
|
||||||
|
}
|
||||||
|
lastKnownUniqueLeaseOwners = leaseOwners;
|
||||||
|
|
||||||
|
if (lastKnownWorkersWithActiveWorkerMetrics == null) {
|
||||||
|
log.info("Workers with active worker metric stats {}", workersWithActiveWorkerMetrics);
|
||||||
|
} else if (!lastKnownWorkersWithActiveWorkerMetrics.equals(workersWithActiveWorkerMetrics)) {
|
||||||
|
log.info("Workers with active worker metric stats changed {}", workersWithActiveWorkerMetrics);
|
||||||
|
}
|
||||||
|
lastKnownWorkersWithActiveWorkerMetrics = workersWithActiveWorkerMetrics;
|
||||||
|
|
||||||
|
return workerMetricsReady;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> getUniqueLeaseOwnersFromLeaseTable(final List<Lease> leaseList) {
|
||||||
|
return leaseList.stream().map(Lease::leaseOwner).collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> getWorkersWithActiveWorkerMetricStats(final List<WorkerMetricStats> workerMetricStats) {
|
||||||
|
final long nowInSeconds = Duration.ofMillis(now(timeProvider)).getSeconds();
|
||||||
|
return workerMetricStats.stream()
|
||||||
|
.filter(metricStats -> isWorkerMetricStatsActive(metricStats, nowInSeconds))
|
||||||
|
.map(WorkerMetricStats::getWorkerId)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isWorkerMetricStatsActive(final WorkerMetricStats metricStats, final long nowInSeconds) {
|
||||||
|
return (metricStats.getLastUpdateTime() + workerMetricStatsExpirySeconds) > nowInSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
private CompletableFuture<List<WorkerMetricStats>> loadWorkerMetricStats() {
|
||||||
|
return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricStatsDAO::getAllWorkerMetricStats));
|
||||||
|
}
|
||||||
|
|
||||||
|
private CompletableFuture<List<Lease>> loadLeaseListAsync() {
|
||||||
|
return CompletableFuture.supplyAsync(() -> loadWithRetry(leaseRefresher::listLeases));
|
||||||
|
}
|
||||||
|
|
||||||
|
private <T> T loadWithRetry(final Callable<T> loadFunction) {
|
||||||
|
int retryAttempt = 0;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
return loadFunction.call();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
|
||||||
|
log.warn(
|
||||||
|
"Failed to load : {}, retrying",
|
||||||
|
loadFunction.getClass().getName(),
|
||||||
|
e);
|
||||||
|
retryAttempt++;
|
||||||
|
} else {
|
||||||
|
throw new CompletionException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long now(final Callable<Long> timeProvider) {
|
||||||
|
try {
|
||||||
|
return timeProvider.call();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.debug("Time provider threw exception, using System.currentTimeMillis", e);
|
||||||
|
return System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stabilize the monitor trigger before invoking the callback
|
||||||
|
* to ensure we are consistently seeing the trigger for a configured
|
||||||
|
* stabilizationDurationInMillis
|
||||||
|
*/
|
||||||
|
private static class MonitorTriggerStabilizer {
|
||||||
|
private final Callable<Long> timeProvider;
|
||||||
|
private final long stabilizationDurationInSeconds;
|
||||||
|
private final Runnable callback;
|
||||||
|
private final String currentWorkerId;
|
||||||
|
private final LogRateLimiter rateLimitedTriggerStatusLogger;
|
||||||
|
|
||||||
|
private long lastToggleTimeInMillis;
|
||||||
|
private boolean currentTriggerStatus;
|
||||||
|
|
||||||
|
public MonitorTriggerStabilizer(
|
||||||
|
final Callable<Long> timeProvider,
|
||||||
|
final long stabilizationDurationInSeconds,
|
||||||
|
final Runnable callback,
|
||||||
|
final String currentWorkerId) {
|
||||||
|
this.timeProvider = timeProvider;
|
||||||
|
this.stabilizationDurationInSeconds = stabilizationDurationInSeconds;
|
||||||
|
this.callback = callback;
|
||||||
|
this.currentWorkerId = currentWorkerId;
|
||||||
|
this.rateLimitedTriggerStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void call(final boolean isMonitorTriggered) {
|
||||||
|
final long now = now(timeProvider);
|
||||||
|
if (currentTriggerStatus != isMonitorTriggered) {
|
||||||
|
log.info("Trigger status has changed to {}", isMonitorTriggered);
|
||||||
|
currentTriggerStatus = isMonitorTriggered;
|
||||||
|
lastToggleTimeInMillis = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentTriggerStatus) {
|
||||||
|
final long deltaSeconds =
|
||||||
|
Duration.ofMillis(now - lastToggleTimeInMillis).getSeconds();
|
||||||
|
if (deltaSeconds >= stabilizationDurationInSeconds) {
|
||||||
|
log.info("Trigger has been consistently true for {}s, invoking callback", deltaSeconds);
|
||||||
|
callback.run();
|
||||||
|
} else {
|
||||||
|
rateLimitedTriggerStatusLogger.log(() -> log.info(
|
||||||
|
"Trigger has been true for {}s, waiting for stabilization time of {}s",
|
||||||
|
deltaSeconds,
|
||||||
|
stabilizationDurationInSeconds));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reset() {
|
||||||
|
if (currentTriggerStatus) {
|
||||||
|
log.info("This worker {} is no longer the leader, reset current status", currentWorkerId);
|
||||||
|
}
|
||||||
|
currentTriggerStatus = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
private static class LogRateLimiter {
|
||||||
|
private final long logIntervalInNanos;
|
||||||
|
|
||||||
|
private long nextLogTime = System.nanoTime();
|
||||||
|
|
||||||
|
public void log(final Runnable logger) {
|
||||||
|
final long now = System.nanoTime();
|
||||||
|
if (now >= nextLogTime) {
|
||||||
|
logger.run();
|
||||||
|
nextLogTime = now + logIntervalInNanos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,231 @@
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.ToString;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
|
||||||
|
import software.amazon.kinesis.common.StackTraceUtils;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data model of the Migration state. This is used to track the state related to migration
|
||||||
|
* from KCLv2.x to KCLv3.x.
|
||||||
|
*/
|
||||||
|
@Getter
|
||||||
|
@ToString(callSuper = true)
|
||||||
|
@Slf4j
|
||||||
|
public class MigrationState extends CoordinatorState {
|
||||||
|
/**
|
||||||
|
* Key value for the item in the CoordinatorState table
|
||||||
|
*/
|
||||||
|
public static final String MIGRATION_HASH_KEY = "Migration3.0";
|
||||||
|
/**
|
||||||
|
* Attribute name in migration state item, whose value is used during
|
||||||
|
* the KCL v3.x migration process to know whether the workers need to
|
||||||
|
* perform KCL v2.x compatible operations or can perform native KCL v3.x
|
||||||
|
* operations.
|
||||||
|
*/
|
||||||
|
public static final String CLIENT_VERSION_ATTRIBUTE_NAME = "cv";
|
||||||
|
|
||||||
|
public static final String MODIFIED_BY_ATTRIBUTE_NAME = "mb";
|
||||||
|
public static final String MODIFIED_TIMESTAMP_ATTRIBUTE_NAME = "mts";
|
||||||
|
public static final String HISTORY_ATTRIBUTE_NAME = "h";
|
||||||
|
private static final int MAX_HISTORY_ENTRIES = 10;
|
||||||
|
|
||||||
|
private ClientVersion clientVersion;
|
||||||
|
private String modifiedBy;
|
||||||
|
private long modifiedTimestamp;
|
||||||
|
private final List<HistoryEntry> history;
|
||||||
|
|
||||||
|
private MigrationState(
|
||||||
|
final String key,
|
||||||
|
final ClientVersion clientVersion,
|
||||||
|
final String modifiedBy,
|
||||||
|
final long modifiedTimestamp,
|
||||||
|
final List<HistoryEntry> historyEntries,
|
||||||
|
final Map<String, AttributeValue> others) {
|
||||||
|
setKey(key);
|
||||||
|
setAttributes(others);
|
||||||
|
this.clientVersion = clientVersion;
|
||||||
|
this.modifiedBy = modifiedBy;
|
||||||
|
this.modifiedTimestamp = modifiedTimestamp;
|
||||||
|
this.history = historyEntries;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MigrationState(final String key, final String modifiedBy) {
|
||||||
|
this(
|
||||||
|
key,
|
||||||
|
ClientVersion.CLIENT_VERSION_INIT,
|
||||||
|
modifiedBy,
|
||||||
|
System.currentTimeMillis(),
|
||||||
|
new ArrayList<>(),
|
||||||
|
new HashMap<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
public HashMap<String, AttributeValue> serialize() {
|
||||||
|
final HashMap<String, AttributeValue> result = new HashMap<>();
|
||||||
|
result.put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(clientVersion.name()));
|
||||||
|
result.put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(modifiedBy));
|
||||||
|
result.put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(modifiedTimestamp)));
|
||||||
|
|
||||||
|
if (!history.isEmpty()) {
|
||||||
|
final List<AttributeValue> historyList = new ArrayList<>();
|
||||||
|
for (final HistoryEntry entry : history) {
|
||||||
|
historyList.add(AttributeValue.builder().m(entry.serialize()).build());
|
||||||
|
}
|
||||||
|
result.put(
|
||||||
|
HISTORY_ATTRIBUTE_NAME,
|
||||||
|
AttributeValue.builder().l(historyList).build());
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MigrationState deserialize(final String key, final HashMap<String, AttributeValue> attributes) {
|
||||||
|
if (!MIGRATION_HASH_KEY.equals(key)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
final HashMap<String, AttributeValue> mutableAttributes = new HashMap<>(attributes);
|
||||||
|
final ClientVersion clientVersion = ClientVersion.valueOf(
|
||||||
|
mutableAttributes.remove(CLIENT_VERSION_ATTRIBUTE_NAME).s());
|
||||||
|
final String modifiedBy =
|
||||||
|
mutableAttributes.remove(MODIFIED_BY_ATTRIBUTE_NAME).s();
|
||||||
|
final long modifiedTimestamp = Long.parseLong(
|
||||||
|
mutableAttributes.remove(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n());
|
||||||
|
|
||||||
|
final List<HistoryEntry> historyList = new ArrayList<>();
|
||||||
|
if (attributes.containsKey(HISTORY_ATTRIBUTE_NAME)) {
|
||||||
|
mutableAttributes.remove(HISTORY_ATTRIBUTE_NAME).l().stream()
|
||||||
|
.map(historyEntry -> HistoryEntry.deserialize(historyEntry.m()))
|
||||||
|
.forEach(historyList::add);
|
||||||
|
}
|
||||||
|
final MigrationState migrationState = new MigrationState(
|
||||||
|
MIGRATION_HASH_KEY, clientVersion, modifiedBy, modifiedTimestamp, historyList, mutableAttributes);
|
||||||
|
|
||||||
|
if (!mutableAttributes.isEmpty()) {
|
||||||
|
log.info("Unknown attributes {} for state {}", mutableAttributes, migrationState);
|
||||||
|
}
|
||||||
|
return migrationState;
|
||||||
|
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.warn("Unable to deserialize state with key {} and attributes {}", key, attributes, e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, ExpectedAttributeValue> getDynamoClientVersionExpectation() {
|
||||||
|
return new HashMap<String, ExpectedAttributeValue>() {
|
||||||
|
{
|
||||||
|
put(
|
||||||
|
CLIENT_VERSION_ATTRIBUTE_NAME,
|
||||||
|
ExpectedAttributeValue.builder()
|
||||||
|
.value(AttributeValue.fromS(clientVersion.name()))
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public MigrationState copy() {
|
||||||
|
return new MigrationState(
|
||||||
|
getKey(),
|
||||||
|
getClientVersion(),
|
||||||
|
getModifiedBy(),
|
||||||
|
getModifiedTimestamp(),
|
||||||
|
new ArrayList<>(getHistory()),
|
||||||
|
new HashMap<>(getAttributes()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public MigrationState update(final ClientVersion clientVersion, final String modifiedBy) {
|
||||||
|
log.info(
|
||||||
|
"Migration state is being updated to {} current state {} caller {}",
|
||||||
|
clientVersion,
|
||||||
|
this,
|
||||||
|
StackTraceUtils.getPrintableStackTrace(Thread.currentThread().getStackTrace()));
|
||||||
|
addHistoryEntry(this.clientVersion, this.modifiedBy, this.modifiedTimestamp);
|
||||||
|
this.clientVersion = clientVersion;
|
||||||
|
this.modifiedBy = modifiedBy;
|
||||||
|
this.modifiedTimestamp = System.currentTimeMillis();
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addHistoryEntry(
|
||||||
|
final ClientVersion lastClientVersion, final String lastModifiedBy, final long lastModifiedTimestamp) {
|
||||||
|
history.add(0, new HistoryEntry(lastClientVersion, lastModifiedBy, lastModifiedTimestamp));
|
||||||
|
if (history.size() > MAX_HISTORY_ENTRIES) {
|
||||||
|
log.info("Limit {} reached, dropping history {}", MAX_HISTORY_ENTRIES, history.remove(history.size() - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, AttributeValueUpdate> getDynamoUpdate() {
|
||||||
|
final HashMap<String, AttributeValueUpdate> updates = new HashMap<>();
|
||||||
|
updates.put(
|
||||||
|
CLIENT_VERSION_ATTRIBUTE_NAME,
|
||||||
|
AttributeValueUpdate.builder()
|
||||||
|
.value(AttributeValue.fromS(clientVersion.name()))
|
||||||
|
.action(AttributeAction.PUT)
|
||||||
|
.build());
|
||||||
|
updates.put(
|
||||||
|
MODIFIED_BY_ATTRIBUTE_NAME,
|
||||||
|
AttributeValueUpdate.builder()
|
||||||
|
.value(AttributeValue.fromS(modifiedBy))
|
||||||
|
.action(AttributeAction.PUT)
|
||||||
|
.build());
|
||||||
|
updates.put(
|
||||||
|
MODIFIED_TIMESTAMP_ATTRIBUTE_NAME,
|
||||||
|
AttributeValueUpdate.builder()
|
||||||
|
.value(AttributeValue.fromN(String.valueOf(modifiedTimestamp)))
|
||||||
|
.action(AttributeAction.PUT)
|
||||||
|
.build());
|
||||||
|
if (!history.isEmpty()) {
|
||||||
|
updates.put(
|
||||||
|
HISTORY_ATTRIBUTE_NAME,
|
||||||
|
AttributeValueUpdate.builder()
|
||||||
|
.value(AttributeValue.fromL(
|
||||||
|
history.stream().map(HistoryEntry::toAv).collect(Collectors.toList())))
|
||||||
|
.action(AttributeAction.PUT)
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
return updates;
|
||||||
|
}
|
||||||
|
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@ToString
|
||||||
|
public static class HistoryEntry {
|
||||||
|
private final ClientVersion lastClientVersion;
|
||||||
|
private final String lastModifiedBy;
|
||||||
|
private final long lastModifiedTimestamp;
|
||||||
|
|
||||||
|
public AttributeValue toAv() {
|
||||||
|
return AttributeValue.fromM(serialize());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, AttributeValue> serialize() {
|
||||||
|
return new HashMap<String, AttributeValue>() {
|
||||||
|
{
|
||||||
|
put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(lastClientVersion.name()));
|
||||||
|
put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(lastModifiedBy));
|
||||||
|
put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(lastModifiedTimestamp)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HistoryEntry deserialize(final Map<String, AttributeValue> map) {
|
||||||
|
return new HistoryEntry(
|
||||||
|
ClientVersion.valueOf(map.get(CLIENT_VERSION_ATTRIBUTE_NAME).s()),
|
||||||
|
map.get(MODIFIED_BY_ATTRIBUTE_NAME).s(),
|
||||||
|
Long.parseLong(map.get(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State machine that provides:
|
||||||
|
* 1. Seamless upgrade from 2.x to 3.x - 3.x has introduced new algorithms that are not compatible with 2.x
|
||||||
|
* workers, so the state machine allows to seamlessly run the 2.x functionality to be compliant with any
|
||||||
|
* 2.x worker in the fleet, and also seamlessly switch to 3.x functionality when all KCL workers are
|
||||||
|
* 3.x complaint.
|
||||||
|
* 2. Instant rollbacks - Rollbacks are supported using the KCL Migration tool to revert back to 2.x functionality
|
||||||
|
* if customer finds regressions in 3.x functionality.
|
||||||
|
* 3. Instant roll-forwards - Once any issue has been mitigated, rollfowards are supported instantly
|
||||||
|
* with KCL Migration tool.
|
||||||
|
*/
|
||||||
|
public interface MigrationStateMachine {
|
||||||
|
/**
|
||||||
|
* Initialize the state machine by identifying the initial state when the KCL worker comes up for the first time.
|
||||||
|
* @throws DependencyException When unable to identify the initial state.
|
||||||
|
*/
|
||||||
|
void initialize() throws DependencyException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown state machine and perform necessary cleanup for the worker to gracefully shutdown
|
||||||
|
*/
|
||||||
|
void shutdown();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the state machine when it reaches a terminal state, which is a successful upgrade
|
||||||
|
* to v3.x.
|
||||||
|
*/
|
||||||
|
void terminate();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Peform transition from current state to the given new ClientVersion
|
||||||
|
* @param nextClientVersion clientVersion of the new state the state machine must transition to
|
||||||
|
* @param state the current MigrationState in dynamo
|
||||||
|
* @throws InvalidStateException when transition fails, this allows the state machine to stay
|
||||||
|
* in the current state until a valid transition is possible
|
||||||
|
* @throws DependencyException when transition fails due to dependency on DDB failing in
|
||||||
|
* unexpected ways.
|
||||||
|
*/
|
||||||
|
void transitionTo(final ClientVersion nextClientVersion, final MigrationState state)
|
||||||
|
throws InvalidStateException, DependencyException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the ClientVersion of current state machine state.
|
||||||
|
* @return ClientVersion of current state machine state
|
||||||
|
*/
|
||||||
|
ClientVersion getCurrentClientVersion();
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,254 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package software.amazon.kinesis.coordinator.migration;
|
||||||
|
|
||||||
|
import java.util.AbstractMap.SimpleEntry;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation of {@link MigrationStateMachine}
|
||||||
|
*/
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@Getter
|
||||||
|
@Slf4j
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationStateMachineImpl implements MigrationStateMachine {
|
||||||
|
public static final String FAULT_METRIC = "Fault";
|
||||||
|
public static final String METRICS_OPERATION = "Migration";
|
||||||
|
|
||||||
|
private static final long THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS = 5L;
|
||||||
|
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
private final Callable<Long> timeProvider;
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||||
|
private final ScheduledExecutorService stateMachineThreadPool;
|
||||||
|
private DynamicMigrationComponentsInitializer initializer;
|
||||||
|
private final ClientVersionConfig clientVersionConfig;
|
||||||
|
private final Random random;
|
||||||
|
private final String workerId;
|
||||||
|
private final long flipTo3XStabilizerTimeInSeconds;
|
||||||
|
private MigrationState startingMigrationState;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private ClientVersion startingClientVersion;
|
||||||
|
|
||||||
|
private MigrationClientVersionState currentMigrationClientVersionState = new MigrationClientVersionState() {
|
||||||
|
@Override
|
||||||
|
public ClientVersion clientVersion() {
|
||||||
|
return ClientVersion.CLIENT_VERSION_INIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void enter(final ClientVersion fromClientVersion) {
|
||||||
|
log.info("Entered {}...", clientVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void leave() {
|
||||||
|
log.info("Left {}...", clientVersion());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
private boolean terminated = false;
|
||||||
|
|
||||||
|
public MigrationStateMachineImpl(
|
||||||
|
final MetricsFactory metricsFactory,
|
||||||
|
final Callable<Long> timeProvider,
|
||||||
|
final CoordinatorStateDAO coordinatorStateDAO,
|
||||||
|
final ScheduledExecutorService stateMachineThreadPool,
|
||||||
|
final ClientVersionConfig clientVersionConfig,
|
||||||
|
final Random random,
|
||||||
|
final DynamicMigrationComponentsInitializer initializer,
|
||||||
|
final String workerId,
|
||||||
|
final long flipTo3XStabilizerTimeInSeconds) {
|
||||||
|
this.metricsFactory = metricsFactory;
|
||||||
|
this.timeProvider = timeProvider;
|
||||||
|
this.coordinatorStateDAO = coordinatorStateDAO;
|
||||||
|
this.stateMachineThreadPool = stateMachineThreadPool;
|
||||||
|
this.clientVersionConfig = clientVersionConfig;
|
||||||
|
this.random = random;
|
||||||
|
this.initializer = initializer;
|
||||||
|
this.workerId = workerId;
|
||||||
|
this.flipTo3XStabilizerTimeInSeconds = flipTo3XStabilizerTimeInSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() throws DependencyException {
|
||||||
|
if (startingClientVersion == null) {
|
||||||
|
log.info("Initializing MigrationStateMachine");
|
||||||
|
coordinatorStateDAO.initialize();
|
||||||
|
final MigrationClientVersionStateInitializer startingStateInitializer =
|
||||||
|
new MigrationClientVersionStateInitializer(
|
||||||
|
timeProvider, coordinatorStateDAO, clientVersionConfig, random, workerId);
|
||||||
|
final SimpleEntry<ClientVersion, MigrationState> dataForInitialization =
|
||||||
|
startingStateInitializer.getInitialState();
|
||||||
|
initializer.initialize(dataForInitialization.getKey());
|
||||||
|
transitionTo(dataForInitialization.getKey(), dataForInitialization.getValue());
|
||||||
|
startingClientVersion = dataForInitialization.getKey();
|
||||||
|
startingMigrationState = dataForInitialization.getValue();
|
||||||
|
log.info("MigrationStateMachine initial clientVersion {}", startingClientVersion);
|
||||||
|
} else {
|
||||||
|
log.info("MigrationStateMachine already initialized with clientVersion {}", startingClientVersion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void shutdown() {
|
||||||
|
terminate();
|
||||||
|
if (!stateMachineThreadPool.isShutdown()) {
|
||||||
|
stateMachineThreadPool.shutdown();
|
||||||
|
try {
|
||||||
|
if (stateMachineThreadPool.awaitTermination(THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
|
||||||
|
log.info(
|
||||||
|
"StateMachineThreadPool did not shutdown within {} seconds, forcefully shutting down",
|
||||||
|
THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS);
|
||||||
|
stateMachineThreadPool.shutdownNow();
|
||||||
|
}
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
log.info("Interrupted when shutting down StateMachineThreadPool, forcefully shutting down");
|
||||||
|
stateMachineThreadPool.shutdownNow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.info("Shutdown successfully");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void terminate() {
|
||||||
|
if (!terminated && currentMigrationClientVersionState != null) {
|
||||||
|
log.info("State machine is about to terminate");
|
||||||
|
currentMigrationClientVersionState.leave();
|
||||||
|
currentMigrationClientVersionState = null;
|
||||||
|
log.info("State machine reached a terminal state.");
|
||||||
|
terminated = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void transitionTo(final ClientVersion nextClientVersion, final MigrationState migrationState)
|
||||||
|
throws DependencyException {
|
||||||
|
if (terminated) {
|
||||||
|
throw new IllegalStateException(String.format(
|
||||||
|
"Cannot transition to %s after state machine is terminated, %s",
|
||||||
|
nextClientVersion.name(), migrationState));
|
||||||
|
}
|
||||||
|
|
||||||
|
final MigrationClientVersionState nextMigrationClientVersionState =
|
||||||
|
createMigrationClientVersionState(nextClientVersion, migrationState);
|
||||||
|
log.info(
|
||||||
|
"Attempting to transition from {} to {}",
|
||||||
|
currentMigrationClientVersionState.clientVersion(),
|
||||||
|
nextClientVersion);
|
||||||
|
currentMigrationClientVersionState.leave();
|
||||||
|
|
||||||
|
enter(nextMigrationClientVersionState);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enter with retry. When entering the state machine for the first time, the caller has retry so exceptions
|
||||||
|
* will be re-thrown. Once the state machine has initialized all transitions will be an indefinite retry.
|
||||||
|
* It is possible the DDB state has changed by the time enter succeeds but that will occur as a new
|
||||||
|
* state transition after entering the state. Usually the failures are due to unexpected issues with
|
||||||
|
* DDB which will be transitional and will recover on a retry.
|
||||||
|
* @param nextMigrationClientVersionState the state to transition to
|
||||||
|
* @throws DependencyException If entering fails during state machine initialization.
|
||||||
|
*/
|
||||||
|
private void enter(final MigrationClientVersionState nextMigrationClientVersionState) throws DependencyException {
|
||||||
|
boolean success = false;
|
||||||
|
while (!success) {
|
||||||
|
try {
|
||||||
|
// Enter should never fail unless it is the starting state and fails to create the GSI,
|
||||||
|
// in which case it is an unrecoverable error that is bubbled up and KCL start up will fail.
|
||||||
|
nextMigrationClientVersionState.enter(currentMigrationClientVersionState.clientVersion());
|
||||||
|
|
||||||
|
currentMigrationClientVersionState = nextMigrationClientVersionState;
|
||||||
|
log.info("Successfully transitioned to {}", nextMigrationClientVersionState.clientVersion());
|
||||||
|
if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_3X) {
|
||||||
|
terminate();
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} catch (final DependencyException e) {
|
||||||
|
if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
log.info(
|
||||||
|
"Transitioning from {} to {} failed, retrying after a minute",
|
||||||
|
currentMigrationClientVersionState.clientVersion(),
|
||||||
|
nextMigrationClientVersionState.clientVersion(),
|
||||||
|
e);
|
||||||
|
|
||||||
|
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
|
||||||
|
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (final InterruptedException ie) {
|
||||||
|
log.info("Interrupted while sleeping before retrying state machine transition", ie);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private MigrationClientVersionState createMigrationClientVersionState(
|
||||||
|
final ClientVersion clientVersion, final MigrationState migrationState) {
|
||||||
|
switch (clientVersion) {
|
||||||
|
case CLIENT_VERSION_2X:
|
||||||
|
return new MigrationClientVersion2xState(
|
||||||
|
this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
|
||||||
|
case CLIENT_VERSION_UPGRADE_FROM_2X:
|
||||||
|
return new MigrationClientVersionUpgradeFrom2xState(
|
||||||
|
this,
|
||||||
|
timeProvider,
|
||||||
|
coordinatorStateDAO,
|
||||||
|
stateMachineThreadPool,
|
||||||
|
initializer,
|
||||||
|
random,
|
||||||
|
migrationState,
|
||||||
|
flipTo3XStabilizerTimeInSeconds);
|
||||||
|
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||||
|
return new MigrationClientVersion3xWithRollbackState(
|
||||||
|
this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
|
||||||
|
case CLIENT_VERSION_3X:
|
||||||
|
return new MigrationClientVersion3xState(this, initializer);
|
||||||
|
}
|
||||||
|
throw new IllegalStateException(String.format("Unknown client version %s", clientVersion));
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClientVersion getCurrentClientVersion() {
|
||||||
|
if (currentMigrationClientVersionState != null) {
|
||||||
|
return currentMigrationClientVersionState.clientVersion();
|
||||||
|
} else if (terminated) {
|
||||||
|
return ClientVersion.CLIENT_VERSION_3X;
|
||||||
|
}
|
||||||
|
throw new UnsupportedOperationException(
|
||||||
|
"No current state when state machine is either not initialized" + " or already terminated");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,270 @@
|
||||||
|
package software.amazon.kinesis.leader;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.AbstractMap;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.AcquireLockOptions;
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClient;
|
||||||
|
import com.amazonaws.services.dynamodbv2.GetLockOptions;
|
||||||
|
import com.amazonaws.services.dynamodbv2.LockItem;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.LockCurrentlyUnavailableException;
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
|
||||||
|
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||||
|
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
import static software.amazon.kinesis.coordinator.CoordinatorState.LEADER_HASH_KEY;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implementation for LeaderDecider to elect leader using lock on dynamo db table. This class uses
|
||||||
|
* AmazonDynamoDBLockClient library to perform the leader election.
|
||||||
|
*/
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class DynamoDBLockBasedLeaderDecider implements LeaderDecider {
|
||||||
|
private static final Long DEFAULT_LEASE_DURATION_MILLIS =
|
||||||
|
Duration.ofMinutes(2).toMillis();
|
||||||
|
// Heartbeat frequency should be at-least 3 times smaller the lease duration according to LockClient documentation
|
||||||
|
private static final Long DEFAULT_HEARTBEAT_PERIOD_MILLIS =
|
||||||
|
Duration.ofSeconds(30).toMillis();
|
||||||
|
|
||||||
|
private final CoordinatorStateDAO coordinatorStateDao;
|
||||||
|
private final AmazonDynamoDBLockClient dynamoDBLockClient;
|
||||||
|
private final Long heartbeatPeriodMillis;
|
||||||
|
private final String workerId;
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
|
||||||
|
private long lastCheckTimeInMillis = 0L;
|
||||||
|
private boolean lastIsLeaderResult = false;
|
||||||
|
private final AtomicBoolean isShutdown = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
private long lastIsAnyLeaderElectedDDBReadTimeMillis = 0L;
|
||||||
|
private boolean lastIsAnyLeaderElectedResult = false;
|
||||||
|
/**
|
||||||
|
* Key value pair of LockItem to the time when it was first discovered.
|
||||||
|
* If a new LockItem fetched from ddb has different recordVersionNumber than the one in-memory,
|
||||||
|
* its considered as new LockItem, and the time when it was fetched is stored in memory to identify lockItem
|
||||||
|
* expiry. This is used only in the context of isAnyLeaderElected method.
|
||||||
|
*/
|
||||||
|
private AbstractMap.SimpleEntry<LockItem, Long> lastIsAnyLeaderCheckLockItemToFirstEncounterTime = null;
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
static DynamoDBLockBasedLeaderDecider create(
|
||||||
|
final CoordinatorStateDAO coordinatorStateDao,
|
||||||
|
final String workerId,
|
||||||
|
final Long leaseDuration,
|
||||||
|
final Long heartbeatPeriod,
|
||||||
|
final MetricsFactory metricsFactory) {
|
||||||
|
final AmazonDynamoDBLockClient dynamoDBLockClient = new AmazonDynamoDBLockClient(coordinatorStateDao
|
||||||
|
.getDDBLockClientOptionsBuilder()
|
||||||
|
.withTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
|
.withLeaseDuration(leaseDuration)
|
||||||
|
.withHeartbeatPeriod(heartbeatPeriod)
|
||||||
|
.withCreateHeartbeatBackgroundThread(true)
|
||||||
|
.withOwnerName(workerId)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
return new DynamoDBLockBasedLeaderDecider(
|
||||||
|
coordinatorStateDao, dynamoDBLockClient, heartbeatPeriod, workerId, metricsFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DynamoDBLockBasedLeaderDecider create(
|
||||||
|
final CoordinatorStateDAO coordinatorStateDao, final String workerId, final MetricsFactory metricsFactory) {
|
||||||
|
return create(
|
||||||
|
coordinatorStateDao,
|
||||||
|
workerId,
|
||||||
|
DEFAULT_LEASE_DURATION_MILLIS,
|
||||||
|
DEFAULT_HEARTBEAT_PERIOD_MILLIS,
|
||||||
|
metricsFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
log.info("Initializing DDB Lock based leader decider");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check the lockItem in storage and if the current worker is not leader worker, then tries to acquire lock and
|
||||||
|
* returns true if it was able to acquire lock else false.
|
||||||
|
* @param workerId ID of the worker
|
||||||
|
* @return true if current worker is leader else false.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public synchronized Boolean isLeader(final String workerId) {
|
||||||
|
// if the decider has shutdown, then return false and don't try acquireLock anymore.
|
||||||
|
if (isShutdown.get()) {
|
||||||
|
publishIsLeaderMetrics(false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// If the last time we tried to take lock and didnt get lock, don't try to take again for heartbeatPeriodMillis
|
||||||
|
// this is to avoid unnecessary calls to dynamoDB.
|
||||||
|
// Different modules in KCL can request for isLeader check within heartbeatPeriodMillis, and this optimization
|
||||||
|
// will help in those cases.
|
||||||
|
// In case the last call returned true, we want to check the source always to ensure the correctness of leader.
|
||||||
|
if (!lastIsLeaderResult && lastCheckTimeInMillis + heartbeatPeriodMillis > System.currentTimeMillis()) {
|
||||||
|
publishIsLeaderMetrics(lastIsLeaderResult);
|
||||||
|
return lastIsLeaderResult;
|
||||||
|
}
|
||||||
|
boolean response;
|
||||||
|
// Get the lockItem from storage (if present
|
||||||
|
final Optional<LockItem> lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
|
||||||
|
lockItem.ifPresent(item -> log.info("Worker : {} is the current leader.", item.getOwnerName()));
|
||||||
|
|
||||||
|
// If the lockItem is present and is expired, that means either current worker is not leader.
|
||||||
|
if (!lockItem.isPresent() || lockItem.get().isExpired()) {
|
||||||
|
try {
|
||||||
|
// Current worker does not hold the lock, try to acquireOne.
|
||||||
|
final Optional<LockItem> leaderLockItem =
|
||||||
|
dynamoDBLockClient.tryAcquireLock(AcquireLockOptions.builder(LEADER_HASH_KEY)
|
||||||
|
.withRefreshPeriod(heartbeatPeriodMillis)
|
||||||
|
.withTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
|
.withShouldSkipBlockingWait(true)
|
||||||
|
.build());
|
||||||
|
leaderLockItem.ifPresent(item -> log.info("Worker : {} is new leader", item.getOwnerName()));
|
||||||
|
// if leaderLockItem optional is empty, that means the lock is not acquired by this worker.
|
||||||
|
response = leaderLockItem.isPresent();
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
// Something bad happened, don't assume leadership and also release lock just in case the
|
||||||
|
// lock was granted and still interrupt happened.
|
||||||
|
releaseLeadershipIfHeld();
|
||||||
|
log.error("Acquiring lock was interrupted in between", e);
|
||||||
|
response = false;
|
||||||
|
|
||||||
|
} catch (final LockCurrentlyUnavailableException e) {
|
||||||
|
response = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
response = lockItem.get().getOwnerName().equals(workerId);
|
||||||
|
}
|
||||||
|
|
||||||
|
lastCheckTimeInMillis = System.currentTimeMillis();
|
||||||
|
lastIsLeaderResult = response;
|
||||||
|
publishIsLeaderMetrics(response);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void publishIsLeaderMetrics(final boolean response) {
|
||||||
|
final MetricsScope metricsScope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
|
||||||
|
metricsScope.addData(
|
||||||
|
METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
|
||||||
|
MetricsUtil.endScope(metricsScope);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases the lock if held by current worker when this method is invoked.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void shutdown() {
|
||||||
|
if (!isShutdown.getAndSet(true)) {
|
||||||
|
releaseLeadershipIfHeld();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void releaseLeadershipIfHeld() {
|
||||||
|
try {
|
||||||
|
final Optional<LockItem> lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
|
||||||
|
if (lockItem.isPresent()
|
||||||
|
&& !lockItem.get().isExpired()
|
||||||
|
&& lockItem.get().getOwnerName().equals(workerId)) {
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"Current worker : {} holds the lock, releasing it.",
|
||||||
|
lockItem.get().getOwnerName());
|
||||||
|
// LockItem.close() will release the lock if current worker owns it else this call is no op.
|
||||||
|
lockItem.get().close();
|
||||||
|
}
|
||||||
|
} catch (final Exception e) {
|
||||||
|
log.error("Failed to complete releaseLeadershipIfHeld call.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns if any ACTIVE leader exists that is elected by the current implementation which can be outside the
|
||||||
|
* scope of this worker. That is leader elected by this implementation in any worker in fleet.
|
||||||
|
* DynamoDBLockClient does not provide an interface which can tell if an active lock exists or not, thus
|
||||||
|
* we need to put custom implementation.
|
||||||
|
* The implementation performs DDB get every heartbeatPeriodMillis to have low RCU consumption, which means that
|
||||||
|
* the leader could have been elected from the last time the check happened and before check happens again.
|
||||||
|
* The information returned from this method has eventual consistency (up to heartbeatPeriodMillis interval).
|
||||||
|
*
|
||||||
|
* @return true, if any leader is elected else false.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public synchronized boolean isAnyLeaderElected() {
|
||||||
|
// Avoid going to ddb for every call and do it once every heartbeatPeriod to have low RCU usage.
|
||||||
|
if (Duration.between(
|
||||||
|
Instant.ofEpochMilli(lastIsAnyLeaderElectedDDBReadTimeMillis),
|
||||||
|
Instant.ofEpochMilli(System.currentTimeMillis()))
|
||||||
|
.toMillis()
|
||||||
|
> heartbeatPeriodMillis) {
|
||||||
|
final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(
|
||||||
|
metricsFactory, this.getClass().getSimpleName() + ":isAnyLeaderElected");
|
||||||
|
final long startTime = System.currentTimeMillis();
|
||||||
|
try {
|
||||||
|
lastIsAnyLeaderElectedDDBReadTimeMillis = System.currentTimeMillis();
|
||||||
|
final Optional<LockItem> lockItem = dynamoDBLockClient.getLockFromDynamoDB(
|
||||||
|
GetLockOptions.builder(LEADER_HASH_KEY).build());
|
||||||
|
|
||||||
|
if (!lockItem.isPresent()) {
|
||||||
|
// There is no LockItem in the ddb table, that means no one is holding lock.
|
||||||
|
lastIsAnyLeaderElectedResult = false;
|
||||||
|
log.info("LockItem present : {}", false);
|
||||||
|
} else {
|
||||||
|
final LockItem ddbLockItem = lockItem.get();
|
||||||
|
if (isNull(lastIsAnyLeaderCheckLockItemToFirstEncounterTime)
|
||||||
|
|| !ddbLockItem
|
||||||
|
.getRecordVersionNumber()
|
||||||
|
.equals(lastIsAnyLeaderCheckLockItemToFirstEncounterTime
|
||||||
|
.getKey()
|
||||||
|
.getRecordVersionNumber())) {
|
||||||
|
// This is the first isAnyLeaderElected call, so we can't evaluate if the LockItem has expired
|
||||||
|
// or not yet so consider LOCK as ACTIVE.
|
||||||
|
// OR LockItem in ddb and in-memory LockItem have different RecordVersionNumber
|
||||||
|
// and thus the LOCK is still ACTIVE
|
||||||
|
lastIsAnyLeaderElectedResult = true;
|
||||||
|
lastIsAnyLeaderCheckLockItemToFirstEncounterTime =
|
||||||
|
new AbstractMap.SimpleEntry<>(ddbLockItem, lastIsAnyLeaderElectedDDBReadTimeMillis);
|
||||||
|
log.info(
|
||||||
|
"LockItem present : {}, and this is either first call OR lockItem has had "
|
||||||
|
+ "a heartbeat",
|
||||||
|
true);
|
||||||
|
} else {
|
||||||
|
// There is no change in the ddb lock item, so if the last update time is more than
|
||||||
|
// lease duration, the lock is expired else it is still ACTIVE,
|
||||||
|
lastIsAnyLeaderElectedResult = lastIsAnyLeaderCheckLockItemToFirstEncounterTime.getValue()
|
||||||
|
+ ddbLockItem.getLeaseDuration()
|
||||||
|
> lastIsAnyLeaderElectedDDBReadTimeMillis;
|
||||||
|
log.info("LockItem present : {}, and lease expiry: {}", true, lastIsAnyLeaderElectedResult);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (final ResourceNotFoundException exception) {
|
||||||
|
log.info("Lock table does not exists...");
|
||||||
|
// If the table itself doesn't exist, there is no elected leader.
|
||||||
|
lastIsAnyLeaderElectedResult = false;
|
||||||
|
} finally {
|
||||||
|
metricsScope.addData(
|
||||||
|
"Latency",
|
||||||
|
System.currentTimeMillis() - startTime,
|
||||||
|
StandardUnit.MILLISECONDS,
|
||||||
|
MetricsLevel.DETAILED);
|
||||||
|
MetricsUtil.endScope(metricsScope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lastIsAnyLeaderElectedResult;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
package software.amazon.kinesis.leader;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MigrationAdaptiveLeaderDecider that wraps around the actual LeaderDecider which can dynamically
|
||||||
|
* change based on the MigrationStateMachine.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@ThreadSafe
|
||||||
|
public class MigrationAdaptiveLeaderDecider implements LeaderDecider {
|
||||||
|
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
private LeaderDecider currentLeaderDecider;
|
||||||
|
|
||||||
|
public MigrationAdaptiveLeaderDecider(final MetricsFactory metricsFactory) {
|
||||||
|
this.metricsFactory = metricsFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized Boolean isLeader(final String workerId) {
|
||||||
|
if (currentLeaderDecider == null) {
|
||||||
|
throw new IllegalStateException("LeaderDecider uninitialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
final MetricsScope scope =
|
||||||
|
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
|
||||||
|
try {
|
||||||
|
publishSelectedLeaderDeciderMetrics(scope, currentLeaderDecider);
|
||||||
|
return currentLeaderDecider.isLeader(workerId);
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void publishSelectedLeaderDeciderMetrics(
|
||||||
|
final MetricsScope scope, final LeaderDecider leaderDecider) {
|
||||||
|
scope.addData(
|
||||||
|
String.format(leaderDecider.getClass().getSimpleName()), 1D, StandardUnit.COUNT, MetricsLevel.DETAILED);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void updateLeaderDecider(final LeaderDecider leaderDecider) {
|
||||||
|
if (currentLeaderDecider != null) {
|
||||||
|
currentLeaderDecider.shutdown();
|
||||||
|
log.info(
|
||||||
|
"Updating leader decider dynamically from {} to {}",
|
||||||
|
this.currentLeaderDecider.getClass().getSimpleName(),
|
||||||
|
leaderDecider.getClass().getSimpleName());
|
||||||
|
} else {
|
||||||
|
log.info(
|
||||||
|
"Initializing dynamic leader decider with {}",
|
||||||
|
leaderDecider.getClass().getSimpleName());
|
||||||
|
}
|
||||||
|
currentLeaderDecider = leaderDecider;
|
||||||
|
currentLeaderDecider.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void shutdown() {
|
||||||
|
if (nonNull(currentLeaderDecider)) {
|
||||||
|
log.info("Shutting down current {}", currentLeaderDecider.getClass().getSimpleName());
|
||||||
|
currentLeaderDecider.shutdown();
|
||||||
|
currentLeaderDecider = null;
|
||||||
|
} else {
|
||||||
|
log.info("LeaderDecider has already been shutdown");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -81,8 +81,20 @@ public class DynamoUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static AttributeValue createAttributeValue(Double doubleValue) {
|
||||||
|
if (doubleValue == null) {
|
||||||
|
throw new IllegalArgumentException("Double attributeValues cannot be null.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return AttributeValue.builder().n(doubleValue.toString()).build();
|
||||||
|
}
|
||||||
|
|
||||||
public static String safeGetString(Map<String, AttributeValue> dynamoRecord, String key) {
|
public static String safeGetString(Map<String, AttributeValue> dynamoRecord, String key) {
|
||||||
AttributeValue av = dynamoRecord.get(key);
|
AttributeValue av = dynamoRecord.get(key);
|
||||||
|
return safeGetString(av);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String safeGetString(AttributeValue av) {
|
||||||
if (av == null) {
|
if (av == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -99,4 +111,13 @@ public class DynamoUtils {
|
||||||
return av.ss();
|
return av.ss();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Double safeGetDouble(Map<String, AttributeValue> dynamoRecord, String key) {
|
||||||
|
AttributeValue av = dynamoRecord.get(key);
|
||||||
|
if (av == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return new Double(av.n());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,26 +103,6 @@ public class KinesisShardDetector implements ShardDetector {
|
||||||
|
|
||||||
private static final Boolean THROW_RESOURCE_NOT_FOUND_EXCEPTION = true;
|
private static final Boolean THROW_RESOURCE_NOT_FOUND_EXCEPTION = true;
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public KinesisShardDetector(
|
|
||||||
KinesisAsyncClient kinesisClient,
|
|
||||||
String streamName,
|
|
||||||
long listShardsBackoffTimeInMillis,
|
|
||||||
int maxListShardsRetryAttempts,
|
|
||||||
long listShardsCacheAllowedAgeInSeconds,
|
|
||||||
int maxCacheMissesBeforeReload,
|
|
||||||
int cacheMissWarningModulus) {
|
|
||||||
this(
|
|
||||||
kinesisClient,
|
|
||||||
StreamIdentifier.singleStreamInstance(streamName),
|
|
||||||
listShardsBackoffTimeInMillis,
|
|
||||||
maxListShardsRetryAttempts,
|
|
||||||
listShardsCacheAllowedAgeInSeconds,
|
|
||||||
maxCacheMissesBeforeReload,
|
|
||||||
cacheMissWarningModulus,
|
|
||||||
LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT);
|
|
||||||
}
|
|
||||||
|
|
||||||
public KinesisShardDetector(
|
public KinesisShardDetector(
|
||||||
KinesisAsyncClient kinesisClient,
|
KinesisAsyncClient kinesisClient,
|
||||||
StreamIdentifier streamIdentifier,
|
StreamIdentifier streamIdentifier,
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,11 @@ import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
||||||
"lastCounterIncrementNanos",
|
"lastCounterIncrementNanos",
|
||||||
"childShardIds",
|
"childShardIds",
|
||||||
"pendingCheckpointState",
|
"pendingCheckpointState",
|
||||||
"isMarkedForLeaseSteal"
|
"isMarkedForLeaseSteal",
|
||||||
|
"throughputKBps",
|
||||||
|
"checkpointOwner",
|
||||||
|
"checkpointOwnerTimeoutTimestampMillis",
|
||||||
|
"isExpiredOrUnassigned"
|
||||||
})
|
})
|
||||||
@ToString
|
@ToString
|
||||||
public class Lease {
|
public class Lease {
|
||||||
|
|
@ -104,6 +108,33 @@ public class Lease {
|
||||||
@Setter
|
@Setter
|
||||||
private boolean isMarkedForLeaseSteal;
|
private boolean isMarkedForLeaseSteal;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If true, this indicates that lease is ready to be immediately reassigned.
|
||||||
|
*/
|
||||||
|
@Setter
|
||||||
|
private boolean isExpiredOrUnassigned;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throughput in Kbps for the lease.
|
||||||
|
*/
|
||||||
|
private Double throughputKBps;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Owner of the checkpoint. The attribute is used for graceful shutdowns to indicate the owner that
|
||||||
|
* is allowed to write the checkpoint.
|
||||||
|
*/
|
||||||
|
@Setter
|
||||||
|
private String checkpointOwner;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This field is used for tracking when the shutdown was requested on the lease so we can expire it. This is
|
||||||
|
* deliberately not persisted in DynamoDB because leaseOwner are expected to transfer lease from itself to the
|
||||||
|
* next owner during shutdown. If the worker dies before shutdown the lease will just become expired then we can
|
||||||
|
* pick it up. If for some reason worker is not able to shut down and continues holding onto the lease
|
||||||
|
* this timeout will kick in and force a lease transfer.
|
||||||
|
*/
|
||||||
|
@Setter
|
||||||
|
private Long checkpointOwnerTimeoutTimestampMillis;
|
||||||
/**
|
/**
|
||||||
* Count of distinct lease holders between checkpoints.
|
* Count of distinct lease holders between checkpoints.
|
||||||
*/
|
*/
|
||||||
|
|
@ -242,6 +273,54 @@ public class Lease {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if checkpoint owner is set. Indicating a requested shutdown.
|
||||||
|
*/
|
||||||
|
public boolean shutdownRequested() {
|
||||||
|
return checkpointOwner != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether lease should be blocked on pending checkpoint. We DON'T block if
|
||||||
|
* - lease is expired (Expired lease should be assigned right away) OR
|
||||||
|
* ----- at this point we know lease is assigned -----
|
||||||
|
* - lease is shardEnd (No more processing possible) OR
|
||||||
|
* - lease is NOT requested for shutdown OR
|
||||||
|
* - lease shutdown expired
|
||||||
|
*
|
||||||
|
* @param currentTimeMillis current time in milliseconds
|
||||||
|
* @return true if lease is blocked on pending checkpoint
|
||||||
|
*/
|
||||||
|
public boolean blockedOnPendingCheckpoint(long currentTimeMillis) {
|
||||||
|
// using ORs and negate
|
||||||
|
return !(isExpiredOrUnassigned
|
||||||
|
|| ExtendedSequenceNumber.SHARD_END.equals(checkpoint)
|
||||||
|
|| !shutdownRequested()
|
||||||
|
// if shutdown requested then checkpointOwnerTimeoutTimestampMillis should present
|
||||||
|
|| currentTimeMillis - checkpointOwnerTimeoutTimestampMillis >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether lease is eligible for graceful shutdown. It's eligible if
|
||||||
|
* - lease is still assigned (not expired) AND
|
||||||
|
* - lease is NOT shardEnd (No more processing possible AND
|
||||||
|
* - lease is NOT requested for shutdown
|
||||||
|
*
|
||||||
|
* @return true if lease is eligible for graceful shutdown
|
||||||
|
*/
|
||||||
|
public boolean isEligibleForGracefulShutdown() {
|
||||||
|
return !isExpiredOrUnassigned && !ExtendedSequenceNumber.SHARD_END.equals(checkpoint) && !shutdownRequested();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Need to handle the case during graceful shutdown where leaseOwner isn't the current owner
|
||||||
|
*
|
||||||
|
* @return the actual owner
|
||||||
|
*/
|
||||||
|
public String actualOwner() {
|
||||||
|
return checkpointOwner == null ? leaseOwner : checkpointOwner;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if lease is not currently owned
|
* @return true if lease is not currently owned
|
||||||
*/
|
*/
|
||||||
|
|
@ -343,6 +422,15 @@ public class Lease {
|
||||||
this.childShardIds.addAll(childShardIds);
|
this.childShardIds.addAll(childShardIds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets throughputKbps.
|
||||||
|
*
|
||||||
|
* @param throughputKBps may not be null
|
||||||
|
*/
|
||||||
|
public void throughputKBps(double throughputKBps) {
|
||||||
|
this.throughputKBps = throughputKBps;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the hash range key for this shard.
|
* Set the hash range key for this shard.
|
||||||
* @param hashKeyRangeForLease
|
* @param hashKeyRangeForLease
|
||||||
|
|
@ -370,6 +458,8 @@ public class Lease {
|
||||||
* @return A deep copy of this object.
|
* @return A deep copy of this object.
|
||||||
*/
|
*/
|
||||||
public Lease copy() {
|
public Lease copy() {
|
||||||
return new Lease(this);
|
final Lease lease = new Lease(this);
|
||||||
|
lease.checkpointOwner(this.checkpointOwner);
|
||||||
|
return lease;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
|
||||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
|
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
|
||||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
|
@ -38,11 +39,14 @@ public interface LeaseCoordinator {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start background LeaseHolder and LeaseTaker threads.
|
* Start background LeaseHolder and LeaseTaker threads.
|
||||||
|
* @param leaseAssignmentModeProvider provider of Lease Assignment mode to determine whether to start components
|
||||||
|
* for both V2 and V3 functionality or only V3 functionality
|
||||||
* @throws ProvisionedThroughputException If we can't talk to DynamoDB due to insufficient capacity.
|
* @throws ProvisionedThroughputException If we can't talk to DynamoDB due to insufficient capacity.
|
||||||
* @throws InvalidStateException If the lease table doesn't exist
|
* @throws InvalidStateException If the lease table doesn't exist
|
||||||
* @throws DependencyException If we encountered exception taking to DynamoDB
|
* @throws DependencyException If we encountered exception taking to DynamoDB
|
||||||
*/
|
*/
|
||||||
void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs a single iteration of the lease taker - used by integration tests.
|
* Runs a single iteration of the lease taker - used by integration tests.
|
||||||
|
|
@ -152,4 +156,9 @@ public interface LeaseCoordinator {
|
||||||
* @return LeaseCoordinator
|
* @return LeaseCoordinator
|
||||||
*/
|
*/
|
||||||
DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity);
|
DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return instance of {@link LeaseStatsRecorder}
|
||||||
|
*/
|
||||||
|
LeaseStatsRecorder leaseStatsRecorder();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
package software.amazon.kinesis.leases;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
|
||||||
|
public interface LeaseDiscoverer {
|
||||||
|
/**
|
||||||
|
* Identifies the leases that are assigned to the current worker but are not being tracked and processed by the
|
||||||
|
* current worker.
|
||||||
|
*
|
||||||
|
* @return list of leases assigned to worker which doesn't exist in {@param currentHeldLeaseKeys}
|
||||||
|
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||||
|
* @throws InvalidStateException if lease table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||||
|
*/
|
||||||
|
List<Lease> discoverNewLeases() throws ProvisionedThroughputException, InvalidStateException, DependencyException;
|
||||||
|
}
|
||||||
|
|
@ -16,7 +16,9 @@
|
||||||
package software.amazon.kinesis.leases;
|
package software.amazon.kinesis.leases;
|
||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.SynchronousQueue;
|
import java.util.concurrent.SynchronousQueue;
|
||||||
import java.util.concurrent.ThreadFactory;
|
import java.util.concurrent.ThreadFactory;
|
||||||
|
|
@ -25,6 +27,7 @@ import java.util.concurrent.TimeUnit;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.experimental.Accessors;
|
import lombok.experimental.Accessors;
|
||||||
|
|
@ -34,14 +37,17 @@ import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
import software.amazon.awssdk.services.dynamodb.model.Tag;
|
import software.amazon.awssdk.services.dynamodb.model.Tag;
|
||||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||||
|
import software.amazon.kinesis.common.DdbTableConfig;
|
||||||
import software.amazon.kinesis.common.InitialPositionInStream;
|
import software.amazon.kinesis.common.InitialPositionInStream;
|
||||||
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
||||||
import software.amazon.kinesis.common.LeaseCleanupConfig;
|
import software.amazon.kinesis.common.LeaseCleanupConfig;
|
||||||
import software.amazon.kinesis.common.StreamConfig;
|
import software.amazon.kinesis.common.StreamConfig;
|
||||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseManagementFactory;
|
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseManagementFactory;
|
||||||
|
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer;
|
||||||
import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback;
|
import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback;
|
||||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
import software.amazon.kinesis.metrics.NullMetricsFactory;
|
import software.amazon.kinesis.metrics.NullMetricsFactory;
|
||||||
|
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used by the KCL to configure lease management.
|
* Used by the KCL to configure lease management.
|
||||||
|
|
@ -209,6 +215,9 @@ public class LeaseManagementConfig {
|
||||||
|
|
||||||
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
|
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
|
||||||
|
|
||||||
|
private WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig =
|
||||||
|
new WorkerUtilizationAwareAssignmentConfig();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether to enable deletion protection on the DynamoDB lease table created by KCL. This does not update
|
* Whether to enable deletion protection on the DynamoDB lease table created by KCL. This does not update
|
||||||
* already existing tables.
|
* already existing tables.
|
||||||
|
|
@ -276,14 +285,17 @@ public class LeaseManagementConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
public LeaseManagementConfig(
|
public LeaseManagementConfig(
|
||||||
String tableName,
|
final String tableName,
|
||||||
DynamoDbAsyncClient dynamoDBClient,
|
final String applicationName,
|
||||||
KinesisAsyncClient kinesisClient,
|
final DynamoDbAsyncClient dynamoDBClient,
|
||||||
String workerIdentifier) {
|
final KinesisAsyncClient kinesisClient,
|
||||||
|
final String workerIdentifier) {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
this.dynamoDBClient = dynamoDBClient;
|
this.dynamoDBClient = dynamoDBClient;
|
||||||
this.kinesisClient = kinesisClient;
|
this.kinesisClient = kinesisClient;
|
||||||
this.workerIdentifier = workerIdentifier;
|
this.workerIdentifier = workerIdentifier;
|
||||||
|
this.workerUtilizationAwareAssignmentConfig.workerMetricsTableConfig =
|
||||||
|
new WorkerMetricsTableConfig(applicationName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -350,10 +362,18 @@ public class LeaseManagementConfig {
|
||||||
*/
|
*/
|
||||||
private TableCreatorCallback tableCreatorCallback = TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK;
|
private TableCreatorCallback tableCreatorCallback = TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated never used and will be removed in future releases
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
private HierarchicalShardSyncer hierarchicalShardSyncer;
|
private HierarchicalShardSyncer hierarchicalShardSyncer;
|
||||||
|
|
||||||
private LeaseManagementFactory leaseManagementFactory;
|
private LeaseManagementFactory leaseManagementFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated never used and will be removed in future releases
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public HierarchicalShardSyncer hierarchicalShardSyncer() {
|
public HierarchicalShardSyncer hierarchicalShardSyncer() {
|
||||||
if (hierarchicalShardSyncer == null) {
|
if (hierarchicalShardSyncer == null) {
|
||||||
hierarchicalShardSyncer = new HierarchicalShardSyncer();
|
hierarchicalShardSyncer = new HierarchicalShardSyncer();
|
||||||
|
|
@ -361,39 +381,63 @@ public class LeaseManagementConfig {
|
||||||
return hierarchicalShardSyncer;
|
return hierarchicalShardSyncer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration class for controlling the graceful handoff of leases.
|
||||||
|
* This configuration allows tuning of the shutdown behavior during lease transfers.
|
||||||
|
* <p>
|
||||||
|
* It provides settings to control the timeout period for waiting on the record processor
|
||||||
|
* to shut down and an option to enable or disable graceful lease handoff.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@Accessors(fluent = true)
|
||||||
|
public static class GracefulLeaseHandoffConfig {
|
||||||
|
/**
|
||||||
|
* The minimum amount of time (in milliseconds) to wait for the current shard's RecordProcessor
|
||||||
|
* to gracefully shut down before forcefully transferring the lease to the next owner.
|
||||||
|
* <p>
|
||||||
|
* If each call to {@code processRecords} is expected to run longer than the default value,
|
||||||
|
* it makes sense to set this to a higher value to ensure the RecordProcessor has enough
|
||||||
|
* time to complete its processing.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Default value is 30,000 milliseconds (30 seconds).
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
@Builder.Default
|
||||||
|
private long gracefulLeaseHandoffTimeoutMillis = 30_000L;
|
||||||
|
/**
|
||||||
|
* Flag to enable or disable the graceful lease handoff mechanism.
|
||||||
|
* <p>
|
||||||
|
* When set to {@code true}, the KCL will attempt to gracefully transfer leases by
|
||||||
|
* allowing the shard's RecordProcessor sufficient time to complete processing before
|
||||||
|
* handing off the lease to another worker. When {@code false}, the lease will be
|
||||||
|
* handed off without waiting for the RecordProcessor to shut down gracefully. Note
|
||||||
|
* that checkpointing is expected to be implemented inside {@code shutdownRequested}
|
||||||
|
* for this feature to work end to end.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Default value is {@code true}.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
@Builder.Default
|
||||||
|
private boolean isGracefulLeaseHandoffEnabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
|
||||||
|
GracefulLeaseHandoffConfig.builder().build();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated This is no longer invoked, but {@code leaseManagementFactory(LeaseSerializer, boolean)}
|
||||||
|
* is invoked instead. Please remove implementation for this method as future
|
||||||
|
* releases will remove this API.
|
||||||
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public LeaseManagementFactory leaseManagementFactory() {
|
public LeaseManagementFactory leaseManagementFactory() {
|
||||||
if (leaseManagementFactory == null) {
|
if (leaseManagementFactory == null) {
|
||||||
Validate.notEmpty(streamName(), "Stream name is empty");
|
Validate.notEmpty(streamName(), "Stream name is empty");
|
||||||
leaseManagementFactory = new DynamoDBLeaseManagementFactory(
|
leaseManagementFactory(new DynamoDBLeaseSerializer(), false);
|
||||||
kinesisClient(),
|
|
||||||
streamName(),
|
|
||||||
dynamoDBClient(),
|
|
||||||
tableName(),
|
|
||||||
workerIdentifier(),
|
|
||||||
executorService(),
|
|
||||||
initialPositionInStream(),
|
|
||||||
failoverTimeMillis(),
|
|
||||||
epsilonMillis(),
|
|
||||||
maxLeasesForWorker(),
|
|
||||||
maxLeasesToStealAtOneTime(),
|
|
||||||
maxLeaseRenewalThreads(),
|
|
||||||
cleanupLeasesUponShardCompletion(),
|
|
||||||
ignoreUnexpectedChildShards(),
|
|
||||||
shardSyncIntervalMillis(),
|
|
||||||
consistentReads(),
|
|
||||||
listShardsBackoffTimeInMillis(),
|
|
||||||
maxListShardsRetryAttempts(),
|
|
||||||
maxCacheMissesBeforeReload(),
|
|
||||||
listShardsCacheAllowedAgeInSeconds(),
|
|
||||||
cacheMissWarningModulus(),
|
|
||||||
initialLeaseTableReadCapacity(),
|
|
||||||
initialLeaseTableWriteCapacity(),
|
|
||||||
hierarchicalShardSyncer(),
|
|
||||||
tableCreatorCallback(),
|
|
||||||
dynamoDbRequestTimeout(),
|
|
||||||
billingMode(),
|
|
||||||
tags());
|
|
||||||
}
|
}
|
||||||
return leaseManagementFactory;
|
return leaseManagementFactory;
|
||||||
}
|
}
|
||||||
|
|
@ -430,7 +474,6 @@ public class LeaseManagementConfig {
|
||||||
cacheMissWarningModulus(),
|
cacheMissWarningModulus(),
|
||||||
initialLeaseTableReadCapacity(),
|
initialLeaseTableReadCapacity(),
|
||||||
initialLeaseTableWriteCapacity(),
|
initialLeaseTableWriteCapacity(),
|
||||||
hierarchicalShardSyncer(),
|
|
||||||
tableCreatorCallback(),
|
tableCreatorCallback(),
|
||||||
dynamoDbRequestTimeout(),
|
dynamoDbRequestTimeout(),
|
||||||
billingMode(),
|
billingMode(),
|
||||||
|
|
@ -440,7 +483,9 @@ public class LeaseManagementConfig {
|
||||||
leaseSerializer,
|
leaseSerializer,
|
||||||
customShardDetectorProvider(),
|
customShardDetectorProvider(),
|
||||||
isMultiStreamingMode,
|
isMultiStreamingMode,
|
||||||
leaseCleanupConfig());
|
leaseCleanupConfig(),
|
||||||
|
workerUtilizationAwareAssignmentConfig(),
|
||||||
|
gracefulLeaseHandoffConfig);
|
||||||
}
|
}
|
||||||
return leaseManagementFactory;
|
return leaseManagementFactory;
|
||||||
}
|
}
|
||||||
|
|
@ -454,4 +499,90 @@ public class LeaseManagementConfig {
|
||||||
this.leaseManagementFactory = leaseManagementFactory;
|
this.leaseManagementFactory = leaseManagementFactory;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Accessors(fluent = true)
|
||||||
|
public static class WorkerUtilizationAwareAssignmentConfig {
|
||||||
|
/**
|
||||||
|
* This defines the frequency of capturing worker metric stats in memory. Default is 1s
|
||||||
|
*/
|
||||||
|
private long inMemoryWorkerMetricsCaptureFrequencyMillis =
|
||||||
|
Duration.ofSeconds(1L).toMillis();
|
||||||
|
/**
|
||||||
|
* This defines the frequency of reporting worker metric stats to storage. Default is 30s
|
||||||
|
*/
|
||||||
|
private long workerMetricsReporterFreqInMillis = Duration.ofSeconds(30).toMillis();
|
||||||
|
/**
|
||||||
|
* These are the no. of metrics that are persisted in storage in WorkerMetricStats ddb table.
|
||||||
|
*/
|
||||||
|
private int noOfPersistedMetricsPerWorkerMetrics = 10;
|
||||||
|
/**
|
||||||
|
* Option to disable workerMetrics to use in lease balancing.
|
||||||
|
*/
|
||||||
|
private boolean disableWorkerMetrics = false;
|
||||||
|
/**
|
||||||
|
* List of workerMetrics for the application.
|
||||||
|
*/
|
||||||
|
private List<WorkerMetric> workerMetricList = new ArrayList<>();
|
||||||
|
/**
|
||||||
|
* Max throughput per host KBps, default is unlimited.
|
||||||
|
*/
|
||||||
|
private double maxThroughputPerHostKBps = Double.MAX_VALUE;
|
||||||
|
/**
|
||||||
|
* Percentage of value to achieve critical dampening during this case
|
||||||
|
*/
|
||||||
|
private int dampeningPercentage = 60;
|
||||||
|
/**
|
||||||
|
* Percentage value used to trigger reBalance. If fleet has workers which are have metrics value more or less
|
||||||
|
* than 10% of fleet level average then reBalance is triggered.
|
||||||
|
* Leases are taken from workers with metrics value more than fleet level average. The load to take from these
|
||||||
|
* workers is determined by evaluating how far they are with respect to fleet level average.
|
||||||
|
*/
|
||||||
|
private int reBalanceThresholdPercentage = 10;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The allowThroughputOvershoot flag determines whether leases should still be taken even if
|
||||||
|
* it causes the total assigned throughput to exceed the desired throughput to take for re-balance.
|
||||||
|
* Enabling this flag provides more flexibility for the LeaseAssignmentManager to explore additional
|
||||||
|
* assignment possibilities, which can lead to faster throughput convergence.
|
||||||
|
*/
|
||||||
|
private boolean allowThroughputOvershoot = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Duration after which workerMetricStats entry from WorkerMetricStats table will be cleaned up. When an entry's
|
||||||
|
* lastUpdateTime is older than staleWorkerMetricsEntryCleanupDuration from current time, entry will be removed
|
||||||
|
* from the table.
|
||||||
|
*/
|
||||||
|
private Duration staleWorkerMetricsEntryCleanupDuration = Duration.ofDays(1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* configuration to configure how to create the WorkerMetricStats table, such as table name,
|
||||||
|
* billing mode, provisioned capacity. If no table name is specified, the table name will
|
||||||
|
* default to applicationName-WorkerMetricStats. If no billing more is chosen, default is
|
||||||
|
* On-Demand.
|
||||||
|
*/
|
||||||
|
private WorkerMetricsTableConfig workerMetricsTableConfig;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Frequency to perform worker variance balancing. This value is used with respect to the LAM frequency,
|
||||||
|
* that is every third (as default) iteration of LAM the worker variance balancing will be performed.
|
||||||
|
* Setting it to 1 will make varianceBalancing run on every iteration of LAM and 2 on every 2nd iteration
|
||||||
|
* and so on.
|
||||||
|
* NOTE: LAM frequency = failoverTimeMillis
|
||||||
|
*/
|
||||||
|
private int varianceBalancingFrequency = 3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alpha value used for calculating exponential moving average of worker's metricStats. Selecting
|
||||||
|
* higher alpha value gives more weightage to recent value and thus low smoothing effect on computed average
|
||||||
|
* and selecting smaller alpha values gives more weightage to past value and high smoothing effect.
|
||||||
|
*/
|
||||||
|
private double workerMetricsEMAAlpha = 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class WorkerMetricsTableConfig extends DdbTableConfig {
|
||||||
|
public WorkerMetricsTableConfig(final String applicationName) {
|
||||||
|
super(applicationName, "WorkerMetricStats");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,12 @@
|
||||||
|
|
||||||
package software.amazon.kinesis.leases;
|
package software.amazon.kinesis.leases;
|
||||||
|
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
|
||||||
import software.amazon.kinesis.common.StreamConfig;
|
import software.amazon.kinesis.common.StreamConfig;
|
||||||
import software.amazon.kinesis.coordinator.DeletedStreamListProvider;
|
import software.amazon.kinesis.coordinator.DeletedStreamListProvider;
|
||||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher;
|
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher;
|
||||||
|
import software.amazon.kinesis.lifecycle.ShardConsumer;
|
||||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -26,10 +29,27 @@ import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
public interface LeaseManagementFactory {
|
public interface LeaseManagementFactory {
|
||||||
LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory);
|
LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory);
|
||||||
|
|
||||||
ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory);
|
default LeaseCoordinator createLeaseCoordinator(
|
||||||
|
MetricsFactory metricsFactory, ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
|
||||||
|
throw new UnsupportedOperationException("Not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated This method is never invoked, please remove implementation of this method
|
||||||
|
* as it will be removed in future releases.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory) {
|
||||||
|
throw new UnsupportedOperationException("Deprecated");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated This method is never invoked, please remove implementation of this method
|
||||||
|
* as it will be removed in future releases.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) {
|
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException("Deprecated");
|
||||||
}
|
}
|
||||||
|
|
||||||
default ShardSyncTaskManager createShardSyncTaskManager(
|
default ShardSyncTaskManager createShardSyncTaskManager(
|
||||||
|
|
@ -41,10 +61,17 @@ public interface LeaseManagementFactory {
|
||||||
|
|
||||||
DynamoDBLeaseRefresher createLeaseRefresher();
|
DynamoDBLeaseRefresher createLeaseRefresher();
|
||||||
|
|
||||||
ShardDetector createShardDetector();
|
/**
|
||||||
|
* @deprecated This method is never invoked, please remove implementation of this method
|
||||||
|
* as it will be removed in future releases.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
default ShardDetector createShardDetector() {
|
||||||
|
throw new UnsupportedOperationException("Deprecated");
|
||||||
|
}
|
||||||
|
|
||||||
default ShardDetector createShardDetector(StreamConfig streamConfig) {
|
default ShardDetector createShardDetector(StreamConfig streamConfig) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException("Not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
LeaseCleanupManager createLeaseCleanupManager(MetricsFactory metricsFactory);
|
LeaseCleanupManager createLeaseCleanupManager(MetricsFactory metricsFactory);
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,9 @@
|
||||||
package software.amazon.kinesis.leases;
|
package software.amazon.kinesis.leases;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import software.amazon.kinesis.common.StreamIdentifier;
|
import software.amazon.kinesis.common.StreamIdentifier;
|
||||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
|
@ -75,6 +78,37 @@ public interface LeaseRefresher {
|
||||||
*/
|
*/
|
||||||
boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutSeconds) throws DependencyException;
|
boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutSeconds) throws DependencyException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates the LeaseOwnerToLeaseKey index on the lease table if it doesn't exist and returns the status of index.
|
||||||
|
*
|
||||||
|
* @return indexStatus status of the index.
|
||||||
|
* @throws DependencyException if storage's describe API fails in an unexpected way
|
||||||
|
*/
|
||||||
|
default String createLeaseOwnerToLeaseKeyIndexIfNotExists() throws DependencyException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Blocks until the index exists by polling storage till either the index is ACTIVE or else timeout has
|
||||||
|
* happened.
|
||||||
|
*
|
||||||
|
* @param secondsBetweenPolls time to wait between polls in seconds
|
||||||
|
* @param timeoutSeconds total time to wait in seconds
|
||||||
|
*
|
||||||
|
* @return true if index on the table exists and is ACTIVE, false if timeout was reached
|
||||||
|
*/
|
||||||
|
default boolean waitUntilLeaseOwnerToLeaseKeyIndexExists(
|
||||||
|
final long secondsBetweenPolls, final long timeoutSeconds) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if leaseOwner GSI is ACTIVE
|
||||||
|
* @return true if index is active, false otherwise
|
||||||
|
* @throws DependencyException if storage's describe API fails in an unexpected way
|
||||||
|
*/
|
||||||
|
boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List all leases for a given stream synchronously.
|
* List all leases for a given stream synchronously.
|
||||||
*
|
*
|
||||||
|
|
@ -87,6 +121,24 @@ public interface LeaseRefresher {
|
||||||
List<Lease> listLeasesForStream(StreamIdentifier streamIdentifier)
|
List<Lease> listLeasesForStream(StreamIdentifier streamIdentifier)
|
||||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all leases for a given workerIdentifier synchronously.
|
||||||
|
* Default implementation calls listLeases() and filters the results.
|
||||||
|
*
|
||||||
|
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||||
|
* @throws InvalidStateException if lease table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||||
|
*
|
||||||
|
* @return list of leases
|
||||||
|
*/
|
||||||
|
default List<String> listLeaseKeysForWorker(final String workerIdentifier)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
return listLeases().stream()
|
||||||
|
.filter(lease -> lease.leaseOwner().equals(workerIdentifier))
|
||||||
|
.map(Lease::leaseKey)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List all objects in table synchronously.
|
* List all objects in table synchronously.
|
||||||
*
|
*
|
||||||
|
|
@ -98,6 +150,23 @@ public interface LeaseRefresher {
|
||||||
*/
|
*/
|
||||||
List<Lease> listLeases() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
List<Lease> listLeases() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all leases from the storage parallely and deserialize into Lease objects. Returns the list of leaseKey
|
||||||
|
* that failed deserialize separately.
|
||||||
|
*
|
||||||
|
* @param threadPool threadpool to use for parallel scan
|
||||||
|
* @param parallelismFactor no. of parallel scans
|
||||||
|
* @return Pair of List of leases from the storage and List of items failed to deserialize
|
||||||
|
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||||
|
* @throws InvalidStateException if lease table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||||
|
*/
|
||||||
|
default Map.Entry<List<Lease>, List<String>> listLeasesParallely(
|
||||||
|
final ExecutorService threadPool, final int parallelismFactor)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
throw new UnsupportedOperationException("listLeasesParallely is not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new lease. Conditional on a lease not already existing with this shardId.
|
* Create a new lease. Conditional on a lease not already existing with this shardId.
|
||||||
*
|
*
|
||||||
|
|
@ -154,6 +223,47 @@ public interface LeaseRefresher {
|
||||||
boolean takeLease(Lease lease, String owner)
|
boolean takeLease(Lease lease, String owner)
|
||||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assigns given lease to newOwner owner by incrementing its leaseCounter and setting its owner field. Conditional
|
||||||
|
* on the leaseOwner in DynamoDB matching the leaseOwner of the input lease. Mutates the leaseCounter and owner of
|
||||||
|
* the passed-in lease object after updating DynamoDB.
|
||||||
|
*
|
||||||
|
* @param lease the lease to be assigned
|
||||||
|
* @param newOwner the new owner
|
||||||
|
*
|
||||||
|
* @return true if lease was successfully assigned, false otherwise
|
||||||
|
*
|
||||||
|
* @throws InvalidStateException if lease table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
|
||||||
|
* @throws DependencyException if DynamoDB update fails in an unexpected way
|
||||||
|
*/
|
||||||
|
default boolean assignLease(final Lease lease, final String newOwner)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
|
||||||
|
throw new UnsupportedOperationException("assignLease is not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiates a graceful handoff of the given lease to the specified new owner, allowing the current owner
|
||||||
|
* to complete its processing before transferring ownership.
|
||||||
|
* <p>
|
||||||
|
* This method updates the lease with the new owner information but ensures that the current owner
|
||||||
|
* is given time to gracefully finish its work (e.g., processing records) before the lease is reassigned.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param lease the lease to be assigned
|
||||||
|
* @param newOwner the new owner
|
||||||
|
* @return true if a graceful handoff was successfully initiated
|
||||||
|
* @throws InvalidStateException if lease table does not exist
|
||||||
|
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
|
||||||
|
* @throws DependencyException if DynamoDB update fails in an unexpected way
|
||||||
|
*/
|
||||||
|
default boolean initiateGracefulLeaseHandoff(final Lease lease, final String newOwner)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
|
|
||||||
|
throw new UnsupportedOperationException("assignLeaseWithWait is not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Evict the current owner of lease by setting owner to null. Conditional on the owner in DynamoDB matching the owner of
|
* Evict the current owner of lease by setting owner to null. Conditional on the owner in DynamoDB matching the owner of
|
||||||
* the input. Mutates the lease counter and owner of the passed-in lease object after updating the record in DynamoDB.
|
* the input. Mutates the lease counter and owner of the passed-in lease object after updating the record in DynamoDB.
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@
|
||||||
package software.amazon.kinesis.leases;
|
package software.amazon.kinesis.leases;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
||||||
|
|
@ -100,6 +101,15 @@ public interface LeaseSerializer {
|
||||||
*/
|
*/
|
||||||
Map<String, AttributeValueUpdate> getDynamoTakeLeaseUpdate(Lease lease, String newOwner);
|
Map<String, AttributeValueUpdate> getDynamoTakeLeaseUpdate(Lease lease, String newOwner);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param lease lease that needs to be assigned
|
||||||
|
* @param newOwner newLeaseOwner
|
||||||
|
* @return the attribute value map that takes a lease for a new owner
|
||||||
|
*/
|
||||||
|
default Map<String, AttributeValueUpdate> getDynamoAssignLeaseUpdate(Lease lease, String newOwner) {
|
||||||
|
throw new UnsupportedOperationException("getDynamoAssignLeaseUpdate is not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param lease
|
* @param lease
|
||||||
* @return the attribute value map that voids a lease
|
* @return the attribute value map that voids a lease
|
||||||
|
|
@ -127,8 +137,22 @@ public interface LeaseSerializer {
|
||||||
*/
|
*/
|
||||||
Collection<KeySchemaElement> getKeySchema();
|
Collection<KeySchemaElement> getKeySchema();
|
||||||
|
|
||||||
|
default Collection<KeySchemaElement> getWorkerIdToLeaseKeyIndexKeySchema() {
|
||||||
|
return Collections.EMPTY_LIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
default Collection<AttributeDefinition> getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
|
||||||
|
return Collections.EMPTY_LIST;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return attribute definitions for creating a DynamoDB table to store leases
|
* @return attribute definitions for creating a DynamoDB table to store leases
|
||||||
*/
|
*/
|
||||||
Collection<AttributeDefinition> getAttributeDefinitions();
|
Collection<AttributeDefinition> getAttributeDefinitions();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param lease
|
||||||
|
* @return the attribute value map that includes lease throughput
|
||||||
|
*/
|
||||||
|
Map<String, AttributeValueUpdate> getDynamoLeaseThroughputKbpsUpdate(Lease lease);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,158 @@
|
||||||
|
package software.amazon.kinesis.leases;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NonNull;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.ToString;
|
||||||
|
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.utils.ExponentialMovingAverage;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class records the stats for the leases.
|
||||||
|
* The stats are recorded in a thread safe queue, and the throughput is calculated by summing up the bytes and dividing
|
||||||
|
* by interval in seconds.
|
||||||
|
* This class is thread safe and backed by thread safe data structures.
|
||||||
|
*/
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
@ThreadSafe
|
||||||
|
public class LeaseStatsRecorder {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This default alpha is chosen based on the testing so far between simple average and moving average with 0.5.
|
||||||
|
* In the future, if one value does not fit all use cases, inject this via config.
|
||||||
|
*/
|
||||||
|
private static final double DEFAULT_ALPHA = 0.5;
|
||||||
|
|
||||||
|
public static final int BYTES_PER_KB = 1024;
|
||||||
|
|
||||||
|
private final Long renewerFrequencyInMillis;
|
||||||
|
private final Map<String, Queue<LeaseStats>> leaseStatsMap = new ConcurrentHashMap<>();
|
||||||
|
private final Map<String, ExponentialMovingAverage> leaseKeyToExponentialMovingAverageMap =
|
||||||
|
new ConcurrentHashMap<>();
|
||||||
|
private final Callable<Long> timeProviderInMillis;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method provides happens-before semantics (i.e., the action (access or removal) from a thread happens
|
||||||
|
* before the action from subsequent thread) for the stats recording in multithreaded environment.
|
||||||
|
*/
|
||||||
|
public void recordStats(@NonNull final LeaseStats leaseStats) {
|
||||||
|
final Queue<LeaseStats> leaseStatsQueue =
|
||||||
|
leaseStatsMap.computeIfAbsent(leaseStats.getLeaseKey(), lease -> new ConcurrentLinkedQueue<>());
|
||||||
|
leaseStatsQueue.add(leaseStats);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the throughput in KBps for the given leaseKey.
|
||||||
|
* Method first clears the items that are older than {@link #renewerFrequencyInMillis} from the queue and then
|
||||||
|
* calculates the throughput per second during {@link #renewerFrequencyInMillis} interval and then returns the
|
||||||
|
* ExponentialMovingAverage of the throughput. If method is called in quick succession with or without new stats
|
||||||
|
* the result can be different as ExponentialMovingAverage decays old values on every new call.
|
||||||
|
* This method is thread safe.
|
||||||
|
* @param leaseKey leaseKey for which stats are required
|
||||||
|
* @return throughput in Kbps, returns null if there is no stats available for the leaseKey.
|
||||||
|
*/
|
||||||
|
public Double getThroughputKBps(final String leaseKey) {
|
||||||
|
final Queue<LeaseStats> leaseStatsQueue = leaseStatsMap.get(leaseKey);
|
||||||
|
|
||||||
|
if (isNull(leaseStatsQueue)) {
|
||||||
|
// This means there is no entry for this leaseKey yet
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
filterExpiredEntries(leaseStatsQueue);
|
||||||
|
|
||||||
|
// Convert bytes into KB and divide by interval in second to get throughput per second.
|
||||||
|
final ExponentialMovingAverage exponentialMovingAverage = leaseKeyToExponentialMovingAverageMap.computeIfAbsent(
|
||||||
|
leaseKey, leaseId -> new ExponentialMovingAverage(DEFAULT_ALPHA));
|
||||||
|
|
||||||
|
// Specifically dividing by 1000.0 rather than using Duration class to get seconds, because Duration class
|
||||||
|
// implementation rounds off to seconds and precision is lost.
|
||||||
|
final double frequency = renewerFrequencyInMillis / 1000.0;
|
||||||
|
final double throughput = readQueue(leaseStatsQueue).stream()
|
||||||
|
.mapToDouble(LeaseStats::getBytes)
|
||||||
|
.sum()
|
||||||
|
/ BYTES_PER_KB
|
||||||
|
/ frequency;
|
||||||
|
exponentialMovingAverage.add(throughput);
|
||||||
|
return exponentialMovingAverage.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the currentTimeMillis and then iterates over the queue to get the stats with creation time less than
|
||||||
|
* currentTimeMillis.
|
||||||
|
* This is specifically done to avoid potential race between with high-frequency put thread blocking get thread.
|
||||||
|
*/
|
||||||
|
private Queue<LeaseStats> readQueue(final Queue<LeaseStats> leaseStatsQueue) {
|
||||||
|
final long currentTimeMillis = getCurrenTimeInMillis();
|
||||||
|
final Queue<LeaseStats> response = new LinkedList<>();
|
||||||
|
for (LeaseStats leaseStats : leaseStatsQueue) {
|
||||||
|
if (leaseStats.creationTimeMillis > currentTimeMillis) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
response.add(leaseStats);
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getCurrenTimeInMillis() {
|
||||||
|
try {
|
||||||
|
return timeProviderInMillis.call();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
// Fallback to using the System.currentTimeMillis if failed.
|
||||||
|
return System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void filterExpiredEntries(final Queue<LeaseStats> leaseStatsQueue) {
|
||||||
|
final long currentTime = getCurrenTimeInMillis();
|
||||||
|
while (!leaseStatsQueue.isEmpty()) {
|
||||||
|
final LeaseStats leaseStats = leaseStatsQueue.peek();
|
||||||
|
if (isNull(leaseStats) || currentTime - leaseStats.getCreationTimeMillis() < renewerFrequencyInMillis) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
leaseStatsQueue.poll();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear the in-memory stats for the lease when a lease is reassigned (due to shut down or lease stealing)
|
||||||
|
* @param leaseKey leaseKey, for which stats are supposed to be clear.
|
||||||
|
*/
|
||||||
|
public void dropLeaseStats(final String leaseKey) {
|
||||||
|
leaseStatsMap.remove(leaseKey);
|
||||||
|
leaseKeyToExponentialMovingAverageMap.remove(leaseKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Builder
|
||||||
|
@Getter
|
||||||
|
@ToString
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
public static final class LeaseStats {
|
||||||
|
/**
|
||||||
|
* Lease key for which this leaseStats object is created.
|
||||||
|
*/
|
||||||
|
private final String leaseKey;
|
||||||
|
/**
|
||||||
|
* Bytes that are processed for a lease
|
||||||
|
*/
|
||||||
|
private final long bytes;
|
||||||
|
/**
|
||||||
|
* Wall time in epoch millis at which this leaseStats object was created. This time is used to determine the
|
||||||
|
* expiry of the lease stats.
|
||||||
|
*/
|
||||||
|
@Builder.Default
|
||||||
|
private final long creationTimeMillis = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -71,7 +71,7 @@ public class ShardSyncTaskManager {
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
*
|
*
|
||||||
* <p>NOTE: This constructor is deprecated and will be removed in a future release.</p>
|
* @deprecated This constructor is deprecated and will be removed in a future release.
|
||||||
*
|
*
|
||||||
* @param shardDetector
|
* @param shardDetector
|
||||||
* @param leaseRefresher
|
* @param leaseRefresher
|
||||||
|
|
@ -92,18 +92,16 @@ public class ShardSyncTaskManager {
|
||||||
long shardSyncIdleTimeMillis,
|
long shardSyncIdleTimeMillis,
|
||||||
ExecutorService executorService,
|
ExecutorService executorService,
|
||||||
MetricsFactory metricsFactory) {
|
MetricsFactory metricsFactory) {
|
||||||
this.shardDetector = shardDetector;
|
this(
|
||||||
this.leaseRefresher = leaseRefresher;
|
shardDetector,
|
||||||
this.initialPositionInStream = initialPositionInStream;
|
leaseRefresher,
|
||||||
this.cleanupLeasesUponShardCompletion = cleanupLeasesUponShardCompletion;
|
initialPositionInStream,
|
||||||
this.garbageCollectLeases = true;
|
cleanupLeasesUponShardCompletion,
|
||||||
this.ignoreUnexpectedChildShards = ignoreUnexpectedChildShards;
|
ignoreUnexpectedChildShards,
|
||||||
this.shardSyncIdleTimeMillis = shardSyncIdleTimeMillis;
|
shardSyncIdleTimeMillis,
|
||||||
this.executorService = executorService;
|
executorService,
|
||||||
this.hierarchicalShardSyncer = new HierarchicalShardSyncer();
|
new HierarchicalShardSyncer(),
|
||||||
this.metricsFactory = metricsFactory;
|
metricsFactory);
|
||||||
this.shardSyncRequestPending = new AtomicBoolean(false);
|
|
||||||
this.lock = new ReentrantLock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.LinkedTransferQueue;
|
import java.util.concurrent.LinkedTransferQueue;
|
||||||
|
|
@ -30,13 +31,17 @@ import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
|
||||||
import software.amazon.kinesis.leases.Lease;
|
import software.amazon.kinesis.leases.Lease;
|
||||||
import software.amazon.kinesis.leases.LeaseCoordinator;
|
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||||
|
import software.amazon.kinesis.leases.LeaseDiscoverer;
|
||||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
import software.amazon.kinesis.leases.LeaseRenewer;
|
import software.amazon.kinesis.leases.LeaseRenewer;
|
||||||
|
import software.amazon.kinesis.leases.LeaseStatsRecorder;
|
||||||
import software.amazon.kinesis.leases.LeaseTaker;
|
import software.amazon.kinesis.leases.LeaseTaker;
|
||||||
import software.amazon.kinesis.leases.MultiStreamLease;
|
import software.amazon.kinesis.leases.MultiStreamLease;
|
||||||
import software.amazon.kinesis.leases.ShardInfo;
|
import software.amazon.kinesis.leases.ShardInfo;
|
||||||
|
|
@ -44,6 +49,8 @@ import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
import software.amazon.kinesis.leases.exceptions.LeasingException;
|
import software.amazon.kinesis.leases.exceptions.LeasingException;
|
||||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
import software.amazon.kinesis.lifecycle.LeaseGracefulShutdownHandler;
|
||||||
|
import software.amazon.kinesis.lifecycle.ShardConsumer;
|
||||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
import software.amazon.kinesis.metrics.MetricsScope;
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
|
@ -70,115 +77,34 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
.setNameFormat("LeaseRenewer-%04d")
|
.setNameFormat("LeaseRenewer-%04d")
|
||||||
.setDaemon(true)
|
.setDaemon(true)
|
||||||
.build();
|
.build();
|
||||||
|
private static final ThreadFactory LEASE_DISCOVERY_THREAD_FACTORY = new ThreadFactoryBuilder()
|
||||||
|
.setNameFormat("LeaseDiscovery-%04d")
|
||||||
|
.setDaemon(true)
|
||||||
|
.build();
|
||||||
|
|
||||||
private final LeaseRenewer leaseRenewer;
|
private final LeaseRenewer leaseRenewer;
|
||||||
private final LeaseTaker leaseTaker;
|
private final LeaseTaker leaseTaker;
|
||||||
|
private final LeaseDiscoverer leaseDiscoverer;
|
||||||
private final long renewerIntervalMillis;
|
private final long renewerIntervalMillis;
|
||||||
private final long takerIntervalMillis;
|
private final long takerIntervalMillis;
|
||||||
|
private final long leaseDiscovererIntervalMillis;
|
||||||
private final ExecutorService leaseRenewalThreadpool;
|
private final ExecutorService leaseRenewalThreadpool;
|
||||||
|
private final ExecutorService leaseDiscoveryThreadPool;
|
||||||
private final LeaseRefresher leaseRefresher;
|
private final LeaseRefresher leaseRefresher;
|
||||||
|
private final LeaseStatsRecorder leaseStatsRecorder;
|
||||||
|
private final LeaseGracefulShutdownHandler leaseGracefulShutdownHandler;
|
||||||
private long initialLeaseTableReadCapacity;
|
private long initialLeaseTableReadCapacity;
|
||||||
private long initialLeaseTableWriteCapacity;
|
private long initialLeaseTableWriteCapacity;
|
||||||
protected final MetricsFactory metricsFactory;
|
protected final MetricsFactory metricsFactory;
|
||||||
|
|
||||||
private final Object shutdownLock = new Object();
|
private final Object shutdownLock = new Object();
|
||||||
|
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
|
||||||
private ScheduledExecutorService leaseCoordinatorThreadPool;
|
private ScheduledExecutorService leaseCoordinatorThreadPool;
|
||||||
|
private ScheduledFuture<?> leaseDiscoveryFuture;
|
||||||
private ScheduledFuture<?> takerFuture;
|
private ScheduledFuture<?> takerFuture;
|
||||||
|
|
||||||
private volatile boolean running = false;
|
private volatile boolean running = false;
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
*
|
|
||||||
* <p>NOTE: This constructor is deprecated and will be removed in a future release.</p>
|
|
||||||
*
|
|
||||||
* @param leaseRefresher
|
|
||||||
* LeaseRefresher instance to use
|
|
||||||
* @param workerIdentifier
|
|
||||||
* Identifies the worker (e.g. useful to track lease ownership)
|
|
||||||
* @param leaseDurationMillis
|
|
||||||
* Duration of a lease
|
|
||||||
* @param epsilonMillis
|
|
||||||
* Allow for some variance when calculating lease expirations
|
|
||||||
* @param maxLeasesForWorker
|
|
||||||
* Max leases this Worker can handle at a time
|
|
||||||
* @param maxLeasesToStealAtOneTime
|
|
||||||
* Steal up to these many leases at a time (for load balancing)
|
|
||||||
* @param metricsFactory
|
|
||||||
* Used to publish metrics about lease operations
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public DynamoDBLeaseCoordinator(
|
|
||||||
final LeaseRefresher leaseRefresher,
|
|
||||||
final String workerIdentifier,
|
|
||||||
final long leaseDurationMillis,
|
|
||||||
final long epsilonMillis,
|
|
||||||
final int maxLeasesForWorker,
|
|
||||||
final int maxLeasesToStealAtOneTime,
|
|
||||||
final int maxLeaseRenewerThreadCount,
|
|
||||||
final MetricsFactory metricsFactory) {
|
|
||||||
this(
|
|
||||||
leaseRefresher,
|
|
||||||
workerIdentifier,
|
|
||||||
leaseDurationMillis,
|
|
||||||
epsilonMillis,
|
|
||||||
maxLeasesForWorker,
|
|
||||||
maxLeasesToStealAtOneTime,
|
|
||||||
maxLeaseRenewerThreadCount,
|
|
||||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
|
|
||||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
|
|
||||||
metricsFactory);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
*
|
|
||||||
* @param leaseRefresher
|
|
||||||
* LeaseRefresher instance to use
|
|
||||||
* @param workerIdentifier
|
|
||||||
* Identifies the worker (e.g. useful to track lease ownership)
|
|
||||||
* @param leaseDurationMillis
|
|
||||||
* Duration of a lease
|
|
||||||
* @param epsilonMillis
|
|
||||||
* Allow for some variance when calculating lease expirations
|
|
||||||
* @param maxLeasesForWorker
|
|
||||||
* Max leases this Worker can handle at a time
|
|
||||||
* @param maxLeasesToStealAtOneTime
|
|
||||||
* Steal up to these many leases at a time (for load balancing)
|
|
||||||
* @param initialLeaseTableReadCapacity
|
|
||||||
* Initial dynamodb lease table read iops if creating the lease table
|
|
||||||
* @param initialLeaseTableWriteCapacity
|
|
||||||
* Initial dynamodb lease table write iops if creating the lease table
|
|
||||||
* @param metricsFactory
|
|
||||||
* Used to publish metrics about lease operations
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public DynamoDBLeaseCoordinator(
|
|
||||||
final LeaseRefresher leaseRefresher,
|
|
||||||
final String workerIdentifier,
|
|
||||||
final long leaseDurationMillis,
|
|
||||||
final long epsilonMillis,
|
|
||||||
final int maxLeasesForWorker,
|
|
||||||
final int maxLeasesToStealAtOneTime,
|
|
||||||
final int maxLeaseRenewerThreadCount,
|
|
||||||
final long initialLeaseTableReadCapacity,
|
|
||||||
final long initialLeaseTableWriteCapacity,
|
|
||||||
final MetricsFactory metricsFactory) {
|
|
||||||
this(
|
|
||||||
leaseRefresher,
|
|
||||||
workerIdentifier,
|
|
||||||
leaseDurationMillis,
|
|
||||||
LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT,
|
|
||||||
epsilonMillis,
|
|
||||||
maxLeasesForWorker,
|
|
||||||
maxLeasesToStealAtOneTime,
|
|
||||||
maxLeaseRenewerThreadCount,
|
|
||||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
|
|
||||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
|
|
||||||
metricsFactory);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
*
|
*
|
||||||
|
|
@ -214,17 +140,35 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
final int maxLeaseRenewerThreadCount,
|
final int maxLeaseRenewerThreadCount,
|
||||||
final long initialLeaseTableReadCapacity,
|
final long initialLeaseTableReadCapacity,
|
||||||
final long initialLeaseTableWriteCapacity,
|
final long initialLeaseTableWriteCapacity,
|
||||||
final MetricsFactory metricsFactory) {
|
final MetricsFactory metricsFactory,
|
||||||
|
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
|
||||||
|
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig,
|
||||||
|
final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
|
||||||
this.leaseRefresher = leaseRefresher;
|
this.leaseRefresher = leaseRefresher;
|
||||||
this.leaseRenewalThreadpool = getLeaseRenewalExecutorService(maxLeaseRenewerThreadCount);
|
this.leaseRenewalThreadpool = createExecutorService(maxLeaseRenewerThreadCount, LEASE_RENEWAL_THREAD_FACTORY);
|
||||||
this.leaseTaker = new DynamoDBLeaseTaker(leaseRefresher, workerIdentifier, leaseDurationMillis, metricsFactory)
|
this.leaseTaker = new DynamoDBLeaseTaker(leaseRefresher, workerIdentifier, leaseDurationMillis, metricsFactory)
|
||||||
.withMaxLeasesForWorker(maxLeasesForWorker)
|
.withMaxLeasesForWorker(maxLeasesForWorker)
|
||||||
.withMaxLeasesToStealAtOneTime(maxLeasesToStealAtOneTime)
|
.withMaxLeasesToStealAtOneTime(maxLeasesToStealAtOneTime)
|
||||||
.withEnablePriorityLeaseAssignment(enablePriorityLeaseAssignment);
|
.withEnablePriorityLeaseAssignment(enablePriorityLeaseAssignment);
|
||||||
this.leaseRenewer = new DynamoDBLeaseRenewer(
|
|
||||||
leaseRefresher, workerIdentifier, leaseDurationMillis, leaseRenewalThreadpool, metricsFactory);
|
|
||||||
this.renewerIntervalMillis = getRenewerTakerIntervalMillis(leaseDurationMillis, epsilonMillis);
|
this.renewerIntervalMillis = getRenewerTakerIntervalMillis(leaseDurationMillis, epsilonMillis);
|
||||||
this.takerIntervalMillis = (leaseDurationMillis + epsilonMillis) * 2;
|
this.takerIntervalMillis = (leaseDurationMillis + epsilonMillis) * 2;
|
||||||
|
// Should run once every leaseDurationMillis to identify new leases before expiry.
|
||||||
|
this.leaseDiscovererIntervalMillis = leaseDurationMillis - epsilonMillis;
|
||||||
|
this.leaseStatsRecorder = new LeaseStatsRecorder(renewerIntervalMillis, System::currentTimeMillis);
|
||||||
|
this.leaseGracefulShutdownHandler = LeaseGracefulShutdownHandler.create(
|
||||||
|
gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis(), shardInfoShardConsumerMap, this);
|
||||||
|
this.leaseRenewer = new DynamoDBLeaseRenewer(
|
||||||
|
leaseRefresher,
|
||||||
|
workerIdentifier,
|
||||||
|
leaseDurationMillis,
|
||||||
|
leaseRenewalThreadpool,
|
||||||
|
metricsFactory,
|
||||||
|
leaseStatsRecorder,
|
||||||
|
leaseGracefulShutdownHandler::enqueueShutdown);
|
||||||
|
this.leaseDiscoveryThreadPool =
|
||||||
|
createExecutorService(maxLeaseRenewerThreadCount, LEASE_DISCOVERY_THREAD_FACTORY);
|
||||||
|
this.leaseDiscoverer = new DynamoDBLeaseDiscoverer(
|
||||||
|
this.leaseRefresher, this.leaseRenewer, metricsFactory, workerIdentifier, leaseDiscoveryThreadPool);
|
||||||
if (initialLeaseTableReadCapacity <= 0) {
|
if (initialLeaseTableReadCapacity <= 0) {
|
||||||
throw new IllegalArgumentException("readCapacity should be >= 1");
|
throw new IllegalArgumentException("readCapacity should be >= 1");
|
||||||
}
|
}
|
||||||
|
|
@ -234,6 +178,7 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
}
|
}
|
||||||
this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity;
|
this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity;
|
||||||
this.metricsFactory = metricsFactory;
|
this.metricsFactory = metricsFactory;
|
||||||
|
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"With failover time {} ms and epsilon {} ms, LeaseCoordinator will renew leases every {} ms, take"
|
"With failover time {} ms and epsilon {} ms, LeaseCoordinator will renew leases every {} ms, take"
|
||||||
|
|
@ -246,11 +191,49 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
maxLeasesToStealAtOneTime);
|
maxLeasesToStealAtOneTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TakerRunnable implements Runnable {
|
@RequiredArgsConstructor
|
||||||
|
private class LeaseDiscoveryRunnable implements Runnable {
|
||||||
|
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
|
// LeaseDiscoverer is run in WORKER_UTILIZATION_AWARE_ASSIGNMENT mode only
|
||||||
|
synchronized (shutdownLock) {
|
||||||
|
if (!leaseAssignmentModeProvider
|
||||||
|
.getLeaseAssignmentMode()
|
||||||
|
.equals(
|
||||||
|
MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
|
||||||
|
.WORKER_UTILIZATION_AWARE_ASSIGNMENT)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (running) {
|
||||||
|
leaseRenewer.addLeasesToRenew(leaseDiscoverer.discoverNewLeases());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to execute lease discovery", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
private class TakerRunnable implements Runnable {
|
||||||
|
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
// LeaseTaker is run in DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT mode only
|
||||||
|
synchronized (shutdownLock) {
|
||||||
|
if (!leaseAssignmentModeProvider
|
||||||
|
.getLeaseAssignmentMode()
|
||||||
|
.equals(
|
||||||
|
MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
|
||||||
|
.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
runLeaseTaker();
|
runLeaseTaker();
|
||||||
} catch (LeasingException e) {
|
} catch (LeasingException e) {
|
||||||
log.error("LeasingException encountered in lease taking thread", e);
|
log.error("LeasingException encountered in lease taking thread", e);
|
||||||
|
|
@ -290,18 +273,35 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
public void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
|
||||||
|
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
leaseRenewer.initialize();
|
leaseRenewer.initialize();
|
||||||
|
// At max, we need 3 threads - lease renewer, lease taker, lease discoverer - to run without contention.
|
||||||
|
leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(3, LEASE_COORDINATOR_THREAD_FACTORY);
|
||||||
|
|
||||||
// 2 because we know we'll have at most 2 concurrent tasks at a time.
|
// During migration to KCLv3.x from KCLv2.x, lease assignment mode can change dynamically, so
|
||||||
leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(2, LEASE_COORDINATOR_THREAD_FACTORY);
|
// both lease assignment algorithms will be started but only one will execute based on
|
||||||
|
// leaseAssignmentModeProvider.getLeaseAssignmentMode(). However for new applications starting in
|
||||||
// Taker runs with fixed DELAY because we want it to run slower in the event of performance degredation.
|
// KCLv3.x or applications successfully migrated to KCLv3.x, lease assignment mode will not
|
||||||
|
// change dynamically and will always be WORKER_UTILIZATION_AWARE_ASSIGNMENT, therefore
|
||||||
|
// don't initialize KCLv2.x lease assignment algorithm components that are not needed.
|
||||||
|
if (leaseAssignmentModeProvider.dynamicModeChangeSupportNeeded()) {
|
||||||
|
// Taker runs with fixed DELAY because we want it to run slower in the event of performance degradation.
|
||||||
takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
|
takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
|
||||||
new TakerRunnable(), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
|
new TakerRunnable(leaseAssignmentModeProvider), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
|
||||||
// Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degredation.
|
}
|
||||||
|
|
||||||
|
leaseDiscoveryFuture = leaseCoordinatorThreadPool.scheduleAtFixedRate(
|
||||||
|
new LeaseDiscoveryRunnable(leaseAssignmentModeProvider),
|
||||||
|
0L,
|
||||||
|
leaseDiscovererIntervalMillis,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
// Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degradation.
|
||||||
leaseCoordinatorThreadPool.scheduleAtFixedRate(
|
leaseCoordinatorThreadPool.scheduleAtFixedRate(
|
||||||
new RenewerRunnable(), 0L, renewerIntervalMillis, TimeUnit.MILLISECONDS);
|
new RenewerRunnable(), 0L, renewerIntervalMillis, TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
leaseGracefulShutdownHandler.start();
|
||||||
running = true;
|
running = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -383,6 +383,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
}
|
}
|
||||||
|
|
||||||
leaseRenewalThreadpool.shutdownNow();
|
leaseRenewalThreadpool.shutdownNow();
|
||||||
|
leaseCoordinatorThreadPool.shutdownNow();
|
||||||
|
leaseGracefulShutdownHandler.stop();
|
||||||
synchronized (shutdownLock) {
|
synchronized (shutdownLock) {
|
||||||
leaseRenewer.clearCurrentlyHeldLeases();
|
leaseRenewer.clearCurrentlyHeldLeases();
|
||||||
running = false;
|
running = false;
|
||||||
|
|
@ -393,6 +395,10 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
public void stopLeaseTaker() {
|
public void stopLeaseTaker() {
|
||||||
if (takerFuture != null) {
|
if (takerFuture != null) {
|
||||||
takerFuture.cancel(false);
|
takerFuture.cancel(false);
|
||||||
|
leaseDiscoveryFuture.cancel(false);
|
||||||
|
// the method is called in worker graceful shutdown. We want to stop any further lease shutdown
|
||||||
|
// so we don't interrupt worker shutdown.
|
||||||
|
leaseGracefulShutdownHandler.stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -418,20 +424,15 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns executor service that should be used for lease renewal.
|
* Returns executor service for given ThreadFactory.
|
||||||
* @param maximumPoolSize Maximum allowed thread pool size
|
* @param maximumPoolSize Maximum allowed thread pool size
|
||||||
* @return Executor service that should be used for lease renewal.
|
* @return Executor service
|
||||||
*/
|
*/
|
||||||
private static ExecutorService getLeaseRenewalExecutorService(int maximumPoolSize) {
|
private static ExecutorService createExecutorService(final int maximumPoolSize, final ThreadFactory threadFactory) {
|
||||||
int coreLeaseCount = Math.max(maximumPoolSize / 4, 2);
|
int coreLeaseCount = Math.max(maximumPoolSize / 4, 2);
|
||||||
|
|
||||||
return new ThreadPoolExecutor(
|
return new ThreadPoolExecutor(
|
||||||
coreLeaseCount,
|
coreLeaseCount, maximumPoolSize, 60, TimeUnit.SECONDS, new LinkedTransferQueue<>(), threadFactory);
|
||||||
maximumPoolSize,
|
|
||||||
60,
|
|
||||||
TimeUnit.SECONDS,
|
|
||||||
new LinkedTransferQueue<>(),
|
|
||||||
LEASE_RENEWAL_THREAD_FACTORY);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -472,6 +473,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*
|
*
|
||||||
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
|
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
|
||||||
|
*
|
||||||
|
* This is a method of the public lease coordinator interface.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
@Deprecated
|
@Deprecated
|
||||||
|
|
@ -487,6 +490,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*
|
*
|
||||||
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
|
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
|
||||||
|
*
|
||||||
|
* This is a method of the public lease coordinator interface.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
@Deprecated
|
@Deprecated
|
||||||
|
|
@ -497,4 +502,9 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
||||||
initialLeaseTableWriteCapacity = writeCapacity;
|
initialLeaseTableWriteCapacity = writeCapacity;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeaseStatsRecorder leaseStatsRecorder() {
|
||||||
|
return leaseStatsRecorder;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,120 @@
|
||||||
|
package software.amazon.kinesis.leases.dynamodb;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
import software.amazon.kinesis.leases.LeaseDiscoverer;
|
||||||
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
|
import software.amazon.kinesis.leases.LeaseRenewer;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static java.util.Objects.isNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An implementation of {@link LeaseDiscoverer}, it uses {@link LeaseRefresher} to query
|
||||||
|
* {@link DynamoDBLeaseRefresher#LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME } and find the leases assigned
|
||||||
|
* to current worker and then filter and returns the leases that have not started processing (looks at
|
||||||
|
* {@link LeaseRenewer#getCurrentlyHeldLeases()} to find out which leases are currently held leases).
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class DynamoDBLeaseDiscoverer implements LeaseDiscoverer {
|
||||||
|
|
||||||
|
private final LeaseRefresher leaseRefresher;
|
||||||
|
private final LeaseRenewer leaseRenewer;
|
||||||
|
private final MetricsFactory metricsFactory;
|
||||||
|
private final String workerIdentifier;
|
||||||
|
private final ExecutorService executorService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Lease> discoverNewLeases()
|
||||||
|
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||||
|
final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(metricsFactory, "LeaseDiscovery");
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
final Set<String> currentHeldLeaseKeys =
|
||||||
|
leaseRenewer.getCurrentlyHeldLeases().keySet();
|
||||||
|
|
||||||
|
final long listLeaseKeysForWorkerStartTime = System.currentTimeMillis();
|
||||||
|
final List<String> leaseKeys = leaseRefresher.listLeaseKeysForWorker(workerIdentifier);
|
||||||
|
MetricsUtil.addLatency(
|
||||||
|
metricsScope, "ListLeaseKeysForWorker", listLeaseKeysForWorkerStartTime, MetricsLevel.DETAILED);
|
||||||
|
|
||||||
|
final List<String> newLeaseKeys = leaseKeys.stream()
|
||||||
|
.filter(leaseKey -> !currentHeldLeaseKeys.contains(leaseKey))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
final long fetchNewLeasesStartTime = System.currentTimeMillis();
|
||||||
|
final List<CompletableFuture<Lease>> completableFutures = newLeaseKeys.stream()
|
||||||
|
.map(leaseKey ->
|
||||||
|
CompletableFuture.supplyAsync(() -> fetchLease(leaseKey, metricsScope), executorService))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
final List<Lease> newLeases = completableFutures.stream()
|
||||||
|
.map(CompletableFuture::join)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"New leases assigned to worker : {}, count : {}, leases : {}",
|
||||||
|
workerIdentifier,
|
||||||
|
newLeases.size(),
|
||||||
|
newLeases.stream().map(Lease::leaseKey).collect(Collectors.toList()));
|
||||||
|
|
||||||
|
MetricsUtil.addLatency(metricsScope, "FetchNewLeases", fetchNewLeasesStartTime, MetricsLevel.DETAILED);
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
MetricsUtil.addCount(metricsScope, "NewLeasesDiscovered", newLeases.size(), MetricsLevel.DETAILED);
|
||||||
|
return newLeases;
|
||||||
|
} finally {
|
||||||
|
MetricsUtil.addWorkerIdentifier(metricsScope, workerIdentifier);
|
||||||
|
MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
|
||||||
|
MetricsUtil.endScope(metricsScope);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Lease fetchLease(final String leaseKey, final MetricsScope metricsScope) {
|
||||||
|
try {
|
||||||
|
final Lease lease = leaseRefresher.getLease(leaseKey);
|
||||||
|
if (isNull(lease)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// GSI is eventually consistent thus, validate that the fetched lease is indeed assigned to this
|
||||||
|
// worker, if not just pass in this run.
|
||||||
|
if (!lease.leaseOwner().equals(workerIdentifier)) {
|
||||||
|
MetricsUtil.addCount(metricsScope, "OwnerMismatch", 1, MetricsLevel.DETAILED);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// if checkpointOwner is not null, it means that the lease is still pending shutdown for the last owner.
|
||||||
|
// Don't add the lease to the in-memory map yet.
|
||||||
|
if (lease.checkpointOwner() != null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// when a new lease is discovered, set the lastCounterIncrementNanos to current time as the time
|
||||||
|
// when it has become visible, on next renewer interval this will be updated by LeaseRenewer to
|
||||||
|
// correct time.
|
||||||
|
lease.lastCounterIncrementNanos(System.nanoTime());
|
||||||
|
return lease;
|
||||||
|
} catch (final Exception e) {
|
||||||
|
// if getLease on some lease key fail, continue and fetch other leases, the one failed will
|
||||||
|
// be fetched in the next iteration or will be reassigned if stayed idle for long.
|
||||||
|
MetricsUtil.addCount(metricsScope, "GetLease:Error", 1, MetricsLevel.SUMMARY);
|
||||||
|
log.error("GetLease failed for leaseKey : {}", leaseKey, e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -14,6 +14,8 @@
|
||||||
*/
|
*/
|
||||||
package software.amazon.kinesis.leases.dynamodb;
|
package software.amazon.kinesis.leases.dynamodb;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.math.RoundingMode;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
@ -26,8 +28,10 @@ import java.util.concurrent.ConcurrentNavigableMap;
|
||||||
import java.util.concurrent.ConcurrentSkipListMap;
|
import java.util.concurrent.ConcurrentSkipListMap;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
@ -39,6 +43,7 @@ import software.amazon.kinesis.common.StreamIdentifier;
|
||||||
import software.amazon.kinesis.leases.Lease;
|
import software.amazon.kinesis.leases.Lease;
|
||||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||||
import software.amazon.kinesis.leases.LeaseRenewer;
|
import software.amazon.kinesis.leases.LeaseRenewer;
|
||||||
|
import software.amazon.kinesis.leases.LeaseStatsRecorder;
|
||||||
import software.amazon.kinesis.leases.MultiStreamLease;
|
import software.amazon.kinesis.leases.MultiStreamLease;
|
||||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
|
@ -48,21 +53,32 @@ import software.amazon.kinesis.metrics.MetricsLevel;
|
||||||
import software.amazon.kinesis.metrics.MetricsScope;
|
import software.amazon.kinesis.metrics.MetricsScope;
|
||||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||||
|
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
import static software.amazon.kinesis.leases.LeaseStatsRecorder.BYTES_PER_KB;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An implementation of {@link LeaseRenewer} that uses DynamoDB via {@link LeaseRefresher}.
|
* An implementation of {@link LeaseRenewer} that uses DynamoDB via {@link LeaseRefresher}.
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@KinesisClientInternalApi
|
@KinesisClientInternalApi
|
||||||
public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 6 digit after decimal gives the granularity of 0.001 byte per second.
|
||||||
|
*/
|
||||||
|
private static final int DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL = 6;
|
||||||
|
|
||||||
private static final int RENEWAL_RETRIES = 2;
|
private static final int RENEWAL_RETRIES = 2;
|
||||||
private static final String RENEW_ALL_LEASES_DIMENSION = "RenewAllLeases";
|
private static final String RENEW_ALL_LEASES_DIMENSION = "RenewAllLeases";
|
||||||
|
private static final String LEASE_RENEWER_INITIALIZE = "LeaseRenewerInitialize";
|
||||||
|
|
||||||
private final LeaseRefresher leaseRefresher;
|
private final LeaseRefresher leaseRefresher;
|
||||||
private final String workerIdentifier;
|
private final String workerIdentifier;
|
||||||
private final long leaseDurationNanos;
|
private final long leaseDurationNanos;
|
||||||
private final ExecutorService executorService;
|
private final ExecutorService executorService;
|
||||||
private final MetricsFactory metricsFactory;
|
private final MetricsFactory metricsFactory;
|
||||||
|
private final LeaseStatsRecorder leaseStatsRecorder;
|
||||||
|
private final Consumer<Lease> leaseGracefulShutdownCallback;
|
||||||
private final ConcurrentNavigableMap<String, Lease> ownedLeases = new ConcurrentSkipListMap<>();
|
private final ConcurrentNavigableMap<String, Lease> ownedLeases = new ConcurrentSkipListMap<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -82,12 +98,16 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
final String workerIdentifier,
|
final String workerIdentifier,
|
||||||
final long leaseDurationMillis,
|
final long leaseDurationMillis,
|
||||||
final ExecutorService executorService,
|
final ExecutorService executorService,
|
||||||
final MetricsFactory metricsFactory) {
|
final MetricsFactory metricsFactory,
|
||||||
|
final LeaseStatsRecorder leaseStatsRecorder,
|
||||||
|
final Consumer<Lease> leaseGracefulShutdownCallback) {
|
||||||
this.leaseRefresher = leaseRefresher;
|
this.leaseRefresher = leaseRefresher;
|
||||||
this.workerIdentifier = workerIdentifier;
|
this.workerIdentifier = workerIdentifier;
|
||||||
this.leaseDurationNanos = TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis);
|
this.leaseDurationNanos = TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis);
|
||||||
this.executorService = executorService;
|
this.executorService = executorService;
|
||||||
this.metricsFactory = metricsFactory;
|
this.metricsFactory = metricsFactory;
|
||||||
|
this.leaseStatsRecorder = leaseStatsRecorder;
|
||||||
|
this.leaseGracefulShutdownCallback = leaseGracefulShutdownCallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -187,11 +207,21 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
// ShutdownException).
|
// ShutdownException).
|
||||||
boolean isLeaseExpired = lease.isExpired(leaseDurationNanos, System.nanoTime());
|
boolean isLeaseExpired = lease.isExpired(leaseDurationNanos, System.nanoTime());
|
||||||
if (renewEvenIfExpired || !isLeaseExpired) {
|
if (renewEvenIfExpired || !isLeaseExpired) {
|
||||||
|
final Double throughputPerKBps = this.leaseStatsRecorder.getThroughputKBps(leaseKey);
|
||||||
|
if (nonNull(throughputPerKBps)) {
|
||||||
|
lease.throughputKBps(BigDecimal.valueOf(throughputPerKBps)
|
||||||
|
.setScale(DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL, RoundingMode.HALF_UP)
|
||||||
|
.doubleValue());
|
||||||
|
}
|
||||||
renewedLease = leaseRefresher.renewLease(lease);
|
renewedLease = leaseRefresher.renewLease(lease);
|
||||||
}
|
}
|
||||||
if (renewedLease) {
|
if (renewedLease) {
|
||||||
lease.lastCounterIncrementNanos(System.nanoTime());
|
lease.lastCounterIncrementNanos(System.nanoTime());
|
||||||
}
|
}
|
||||||
|
if (lease.shutdownRequested()) {
|
||||||
|
// the underlying function will dedup
|
||||||
|
leaseGracefulShutdownCallback.accept(lease.copy());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (renewedLease) {
|
if (renewedLease) {
|
||||||
|
|
@ -391,6 +421,12 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
* every time we acquire a lease, it gets a new concurrency token.
|
* every time we acquire a lease, it gets a new concurrency token.
|
||||||
*/
|
*/
|
||||||
authoritativeLease.concurrencyToken(UUID.randomUUID());
|
authoritativeLease.concurrencyToken(UUID.randomUUID());
|
||||||
|
if (nonNull(lease.throughputKBps())) {
|
||||||
|
leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder()
|
||||||
|
.leaseKey(lease.leaseKey())
|
||||||
|
.bytes(Math.round(lease.throughputKBps() * BYTES_PER_KB)) // Convert KB to Bytes
|
||||||
|
.build());
|
||||||
|
}
|
||||||
ownedLeases.put(authoritativeLease.leaseKey(), authoritativeLease);
|
ownedLeases.put(authoritativeLease.leaseKey(), authoritativeLease);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -409,6 +445,7 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void dropLease(Lease lease) {
|
public void dropLease(Lease lease) {
|
||||||
|
leaseStatsRecorder.dropLeaseStats(lease.leaseKey());
|
||||||
ownedLeases.remove(lease.leaseKey());
|
ownedLeases.remove(lease.leaseKey());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -417,15 +454,27 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void initialize() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
public void initialize() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||||
Collection<Lease> leases = leaseRefresher.listLeases();
|
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, LEASE_RENEWER_INITIALIZE);
|
||||||
List<Lease> myLeases = new LinkedList<>();
|
final ExecutorService singleThreadExecutorService = Executors.newSingleThreadExecutor();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
final Map.Entry<List<Lease>, List<String>> response =
|
||||||
|
leaseRefresher.listLeasesParallely(singleThreadExecutorService, 1);
|
||||||
|
|
||||||
|
if (!response.getValue().isEmpty()) {
|
||||||
|
log.warn("List of leaseKeys failed to deserialize : {} ", response.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Lease> myLeases = new LinkedList<>();
|
||||||
boolean renewEvenIfExpired = true;
|
boolean renewEvenIfExpired = true;
|
||||||
|
|
||||||
for (Lease lease : leases) {
|
for (Lease lease : response.getKey()) {
|
||||||
if (workerIdentifier.equals(lease.leaseOwner())) {
|
if (workerIdentifier.equals(lease.leaseOwner())) {
|
||||||
log.info(" Worker {} found lease {}", workerIdentifier, lease);
|
log.info(" Worker {} found lease {}", workerIdentifier, lease);
|
||||||
// Okay to renew even if lease is expired, because we start with an empty list and we add the lease to
|
// Okay to renew even if lease is expired, because we start with an empty list and we add the lease
|
||||||
// our list only after a successful renew. So we don't need to worry about the edge case where we could
|
// to
|
||||||
|
// our list only after a successful renew. So we don't need to worry about the edge case where we
|
||||||
|
// could
|
||||||
// continue renewing a lease after signaling a lease loss to the application.
|
// continue renewing a lease after signaling a lease loss to the application.
|
||||||
|
|
||||||
if (renewLease(lease, renewEvenIfExpired)) {
|
if (renewLease(lease, renewEvenIfExpired)) {
|
||||||
|
|
@ -437,6 +486,16 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||||
}
|
}
|
||||||
|
|
||||||
addLeasesToRenew(myLeases);
|
addLeasesToRenew(myLeases);
|
||||||
|
success = true;
|
||||||
|
} catch (final Exception e) {
|
||||||
|
// It's ok to swollow exception here fail to discover all leases here, as the assignment logic takes
|
||||||
|
// care of reassignment if some lease is expired.
|
||||||
|
log.warn("LeaseRefresher failed in initialization during renewing of pre assigned leases", e);
|
||||||
|
} finally {
|
||||||
|
singleThreadExecutorService.shutdown();
|
||||||
|
MetricsUtil.addCount(scope, "Fault", success ? 0 : 1, MetricsLevel.DETAILED);
|
||||||
|
MetricsUtil.endScope(scope);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyNotNull(Object object, String message) {
|
private void verifyNotNull(Object object, String message) {
|
||||||
|
|
|
||||||
|
|
@ -44,11 +44,8 @@ import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
||||||
*/
|
*/
|
||||||
@KinesisClientInternalApi
|
@KinesisClientInternalApi
|
||||||
public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
private static final String LEASE_KEY_KEY = "leaseKey";
|
|
||||||
private static final String LEASE_OWNER_KEY = "leaseOwner";
|
|
||||||
private static final String LEASE_COUNTER_KEY = "leaseCounter";
|
private static final String LEASE_COUNTER_KEY = "leaseCounter";
|
||||||
private static final String OWNER_SWITCHES_KEY = "ownerSwitchesSinceCheckpoint";
|
private static final String OWNER_SWITCHES_KEY = "ownerSwitchesSinceCheckpoint";
|
||||||
private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint";
|
|
||||||
private static final String CHECKPOINT_SUBSEQUENCE_NUMBER_KEY = "checkpointSubSequenceNumber";
|
private static final String CHECKPOINT_SUBSEQUENCE_NUMBER_KEY = "checkpointSubSequenceNumber";
|
||||||
private static final String PENDING_CHECKPOINT_SEQUENCE_KEY = "pendingCheckpoint";
|
private static final String PENDING_CHECKPOINT_SEQUENCE_KEY = "pendingCheckpoint";
|
||||||
private static final String PENDING_CHECKPOINT_SUBSEQUENCE_KEY = "pendingCheckpointSubSequenceNumber";
|
private static final String PENDING_CHECKPOINT_SUBSEQUENCE_KEY = "pendingCheckpointSubSequenceNumber";
|
||||||
|
|
@ -57,6 +54,11 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
private static final String CHILD_SHARD_IDS_KEY = "childShardIds";
|
private static final String CHILD_SHARD_IDS_KEY = "childShardIds";
|
||||||
private static final String STARTING_HASH_KEY = "startingHashKey";
|
private static final String STARTING_HASH_KEY = "startingHashKey";
|
||||||
private static final String ENDING_HASH_KEY = "endingHashKey";
|
private static final String ENDING_HASH_KEY = "endingHashKey";
|
||||||
|
private static final String THROUGHOUT_PUT_KBPS = "throughputKBps";
|
||||||
|
private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint";
|
||||||
|
static final String CHECKPOINT_OWNER = "checkpointOwner";
|
||||||
|
static final String LEASE_OWNER_KEY = "leaseOwner";
|
||||||
|
static final String LEASE_KEY_KEY = "leaseKey";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, AttributeValue> toDynamoRecord(final Lease lease) {
|
public Map<String, AttributeValue> toDynamoRecord(final Lease lease) {
|
||||||
|
|
@ -110,6 +112,13 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
lease.hashKeyRangeForLease().serializedEndingHashKey()));
|
lease.hashKeyRangeForLease().serializedEndingHashKey()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lease.throughputKBps() != null) {
|
||||||
|
result.put(THROUGHOUT_PUT_KBPS, DynamoUtils.createAttributeValue(lease.throughputKBps()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lease.checkpointOwner() != null) {
|
||||||
|
result.put(CHECKPOINT_OWNER, DynamoUtils.createAttributeValue(lease.checkpointOwner()));
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -146,6 +155,14 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
leaseToUpdate.hashKeyRange(HashKeyRangeForLease.deserialize(startingHashKey, endingHashKey));
|
leaseToUpdate.hashKeyRange(HashKeyRangeForLease.deserialize(startingHashKey, endingHashKey));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS) != null) {
|
||||||
|
leaseToUpdate.throughputKBps(DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER) != null) {
|
||||||
|
leaseToUpdate.checkpointOwner(DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER));
|
||||||
|
}
|
||||||
|
|
||||||
return leaseToUpdate;
|
return leaseToUpdate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -181,18 +198,9 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, ExpectedAttributeValue> getDynamoLeaseOwnerExpectation(final Lease lease) {
|
public Map<String, ExpectedAttributeValue> getDynamoLeaseOwnerExpectation(final Lease lease) {
|
||||||
Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||||
|
result.put(LEASE_OWNER_KEY, buildExpectedAttributeValueIfExistsOrValue(lease.leaseOwner()));
|
||||||
ExpectedAttributeValue.Builder eavBuilder = ExpectedAttributeValue.builder();
|
result.put(CHECKPOINT_OWNER, buildExpectedAttributeValueIfExistsOrValue(lease.checkpointOwner()));
|
||||||
|
|
||||||
if (lease.leaseOwner() == null) {
|
|
||||||
eavBuilder = eavBuilder.exists(false);
|
|
||||||
} else {
|
|
||||||
eavBuilder = eavBuilder.value(DynamoUtils.createAttributeValue(lease.leaseOwner()));
|
|
||||||
}
|
|
||||||
|
|
||||||
result.put(LEASE_OWNER_KEY, eavBuilder.build());
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -247,9 +255,17 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
.value(DynamoUtils.createAttributeValue(owner))
|
.value(DynamoUtils.createAttributeValue(owner))
|
||||||
.action(AttributeAction.PUT)
|
.action(AttributeAction.PUT)
|
||||||
.build());
|
.build());
|
||||||
|
// this method is currently used by assignLease and takeLease. In both case we want the checkpoint owner to be
|
||||||
|
// deleted as this is a fresh assignment
|
||||||
|
result.put(
|
||||||
|
CHECKPOINT_OWNER,
|
||||||
|
AttributeValueUpdate.builder().action(AttributeAction.DELETE).build());
|
||||||
|
|
||||||
String oldOwner = lease.leaseOwner();
|
String oldOwner = lease.leaseOwner();
|
||||||
if (oldOwner != null && !oldOwner.equals(owner)) {
|
String checkpointOwner = lease.checkpointOwner();
|
||||||
|
// if checkpoint owner is not null, this update is supposed to remove the checkpoint owner
|
||||||
|
// and transfer the lease ownership to the leaseOwner so incrementing the owner switch key
|
||||||
|
if (oldOwner != null && !oldOwner.equals(owner) || (checkpointOwner != null && checkpointOwner.equals(owner))) {
|
||||||
result.put(
|
result.put(
|
||||||
OWNER_SWITCHES_KEY,
|
OWNER_SWITCHES_KEY,
|
||||||
AttributeValueUpdate.builder()
|
AttributeValueUpdate.builder()
|
||||||
|
|
@ -261,18 +277,38 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AssignLease performs the PUT action on the LeaseOwner and ADD (1) action on the leaseCounter.
|
||||||
|
* @param lease lease that needs to be assigned
|
||||||
|
* @param newOwner newLeaseOwner
|
||||||
|
* @return Map of AttributeName to update operation
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Map<String, AttributeValueUpdate> getDynamoAssignLeaseUpdate(final Lease lease, final String newOwner) {
|
||||||
|
Map<String, AttributeValueUpdate> result = getDynamoTakeLeaseUpdate(lease, newOwner);
|
||||||
|
|
||||||
|
result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, AttributeValueUpdate> getDynamoEvictLeaseUpdate(final Lease lease) {
|
public Map<String, AttributeValueUpdate> getDynamoEvictLeaseUpdate(final Lease lease) {
|
||||||
Map<String, AttributeValueUpdate> result = new HashMap<>();
|
final Map<String, AttributeValueUpdate> result = new HashMap<>();
|
||||||
AttributeValue value = null;
|
// if checkpointOwner is not null, it means lease handoff is initiated. In this case we just remove the
|
||||||
|
// checkpoint owner so the next owner (leaseOwner) can pick up the lease without waiting for assignment.
|
||||||
|
// Otherwise, remove the leaseOwner
|
||||||
|
if (lease.checkpointOwner() == null) {
|
||||||
result.put(
|
result.put(
|
||||||
LEASE_OWNER_KEY,
|
LEASE_OWNER_KEY,
|
||||||
AttributeValueUpdate.builder()
|
AttributeValueUpdate.builder()
|
||||||
.value(value)
|
|
||||||
.action(AttributeAction.DELETE)
|
.action(AttributeAction.DELETE)
|
||||||
.build());
|
.build());
|
||||||
|
}
|
||||||
|
// We always want to remove checkpointOwner, it's ok even if it's null
|
||||||
|
result.put(
|
||||||
|
CHECKPOINT_OWNER,
|
||||||
|
AttributeValueUpdate.builder().action(AttributeAction.DELETE).build());
|
||||||
|
result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -394,4 +430,58 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||||
|
|
||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<KeySchemaElement> getWorkerIdToLeaseKeyIndexKeySchema() {
|
||||||
|
final List<KeySchemaElement> keySchema = new ArrayList<>();
|
||||||
|
keySchema.add(KeySchemaElement.builder()
|
||||||
|
.attributeName(LEASE_OWNER_KEY)
|
||||||
|
.keyType(KeyType.HASH)
|
||||||
|
.build());
|
||||||
|
keySchema.add(KeySchemaElement.builder()
|
||||||
|
.attributeName(LEASE_KEY_KEY)
|
||||||
|
.keyType(KeyType.RANGE)
|
||||||
|
.build());
|
||||||
|
return keySchema;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<AttributeDefinition> getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
|
||||||
|
final List<AttributeDefinition> definitions = new ArrayList<>();
|
||||||
|
definitions.add(AttributeDefinition.builder()
|
||||||
|
.attributeName(LEASE_OWNER_KEY)
|
||||||
|
.attributeType(ScalarAttributeType.S)
|
||||||
|
.build());
|
||||||
|
definitions.add(AttributeDefinition.builder()
|
||||||
|
.attributeName(LEASE_KEY_KEY)
|
||||||
|
.attributeType(ScalarAttributeType.S)
|
||||||
|
.build());
|
||||||
|
return definitions;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, AttributeValueUpdate> getDynamoLeaseThroughputKbpsUpdate(Lease lease) {
|
||||||
|
final Map<String, AttributeValueUpdate> result = new HashMap<>();
|
||||||
|
final AttributeValueUpdate avu = AttributeValueUpdate.builder()
|
||||||
|
.value(DynamoUtils.createAttributeValue(lease.throughputKBps()))
|
||||||
|
.action(AttributeAction.PUT)
|
||||||
|
.build();
|
||||||
|
result.put(THROUGHOUT_PUT_KBPS, avu);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ExpectedAttributeValue buildExpectedAttributeValueIfExistsOrValue(String value) {
|
||||||
|
return value == null
|
||||||
|
? ExpectedAttributeValue.builder().exists(false).build()
|
||||||
|
: ExpectedAttributeValue.builder()
|
||||||
|
.value(DynamoUtils.createAttributeValue(value))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AttributeValueUpdate getAttributeValueUpdateForAdd() {
|
||||||
|
return AttributeValueUpdate.builder()
|
||||||
|
.value(DynamoUtils.createAttributeValue(1L))
|
||||||
|
.action(AttributeAction.ADD)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -106,15 +106,6 @@ public class DynamoDBLeaseTaker implements LeaseTaker {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated Misspelled method, use {@link DynamoDBLeaseTaker#withVeryOldLeaseDurationNanosMultiplier(int)}
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public DynamoDBLeaseTaker withVeryOldLeaseDurationNanosMultipler(long veryOldLeaseDurationNanosMultipler) {
|
|
||||||
this.veryOldLeaseDurationNanosMultiplier = (int) veryOldLeaseDurationNanosMultipler;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Overrides the default very old lease duration nanos multiplier to increase the threshold for taking very old leases.
|
* Overrides the default very old lease duration nanos multiplier to increase the threshold for taking very old leases.
|
||||||
* Setting this to a higher value than 3 will increase the threshold for very old lease taking.
|
* Setting this to a higher value than 3 will increase the threshold for very old lease taking.
|
||||||
|
|
|
||||||
|
|
@ -266,7 +266,8 @@ class ConsumerStates {
|
||||||
argument.idleTimeInMilliseconds(),
|
argument.idleTimeInMilliseconds(),
|
||||||
argument.aggregatorUtil(),
|
argument.aggregatorUtil(),
|
||||||
argument.metricsFactory(),
|
argument.metricsFactory(),
|
||||||
argument.schemaRegistryDecoder());
|
argument.schemaRegistryDecoder(),
|
||||||
|
argument.leaseCoordinator().leaseStatsRecorder());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -336,7 +337,8 @@ class ConsumerStates {
|
||||||
argument.shardRecordProcessor(),
|
argument.shardRecordProcessor(),
|
||||||
argument.recordProcessorCheckpointer(),
|
argument.recordProcessorCheckpointer(),
|
||||||
consumer.shutdownNotification(),
|
consumer.shutdownNotification(),
|
||||||
argument.shardInfo());
|
argument.shardInfo(),
|
||||||
|
consumer.shardConsumerArgument().leaseCoordinator());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,213 @@
|
||||||
|
package software.amazon.kinesis.lifecycle;
|
||||||
|
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||||
|
import software.amazon.kinesis.leases.ShardInfo;
|
||||||
|
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class handles the graceful shutdown of shard consumers. When a lease is requested for shutdown, it will be
|
||||||
|
* enqueued from the lease renewal thread which will call the shard consumer of the lease to enqueue a shutdown request.
|
||||||
|
* The class monitors those leases and check if the shutdown is properly completed.
|
||||||
|
* If the shard consumer doesn't shut down within the given timeout, it will trigger a lease transfer.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
public class LeaseGracefulShutdownHandler {
|
||||||
|
|
||||||
|
// Arbitrary number to run a similar frequency as the scheduler based on shardConsumerDispatchPollIntervalMillis
|
||||||
|
// which is how fast scheduler triggers state change. It's ok to add few extra second delay to call shutdown since
|
||||||
|
// the leases should still be processing by the current owner so there should not be processing delay due to this.
|
||||||
|
private static final long SHUTDOWN_CHECK_INTERVAL_MILLIS = 2000;
|
||||||
|
|
||||||
|
private final long shutdownTimeoutMillis;
|
||||||
|
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap;
|
||||||
|
private final LeaseCoordinator leaseCoordinator;
|
||||||
|
private final Supplier<Long> currentTimeSupplier;
|
||||||
|
private final ConcurrentMap<ShardInfo, LeasePendingShutdown> shardInfoLeasePendingShutdownMap =
|
||||||
|
new ConcurrentHashMap<>();
|
||||||
|
private final ScheduledExecutorService executorService;
|
||||||
|
|
||||||
|
private volatile boolean isRunning = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method to create a new instance of LeaseGracefulShutdownHandler.
|
||||||
|
*
|
||||||
|
* @param shutdownTimeoutMillis Timeout for graceful shutdown of shard consumers.
|
||||||
|
* @param shardInfoShardConsumerMap Map of shard info to shard consumer instances.
|
||||||
|
* @param leaseCoordinator Lease coordinator instance to access lease information.
|
||||||
|
* @return A new instance of LeaseGracefulShutdownHandler.
|
||||||
|
*/
|
||||||
|
public static LeaseGracefulShutdownHandler create(
|
||||||
|
long shutdownTimeoutMillis,
|
||||||
|
ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap,
|
||||||
|
LeaseCoordinator leaseCoordinator) {
|
||||||
|
return new LeaseGracefulShutdownHandler(
|
||||||
|
shutdownTimeoutMillis,
|
||||||
|
shardInfoShardConsumerMap,
|
||||||
|
leaseCoordinator,
|
||||||
|
System::currentTimeMillis,
|
||||||
|
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder()
|
||||||
|
.setNameFormat("LeaseGracefulShutdown-%04d")
|
||||||
|
.setDaemon(true)
|
||||||
|
.build()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts the shard consumer shutdown handler thread.
|
||||||
|
*/
|
||||||
|
public void start() {
|
||||||
|
if (!isRunning) {
|
||||||
|
log.info("Starting graceful lease handoff thread.");
|
||||||
|
executorService.scheduleAtFixedRate(
|
||||||
|
this::monitorGracefulShutdownLeases, 0, SHUTDOWN_CHECK_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
|
||||||
|
isRunning = true;
|
||||||
|
} else {
|
||||||
|
log.info("Graceful lease handoff thread already running, no need to start.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops the shard consumer shutdown handler thread.
|
||||||
|
*/
|
||||||
|
public void stop() {
|
||||||
|
if (isRunning) {
|
||||||
|
log.info("Stopping graceful lease handoff thread.");
|
||||||
|
executorService.shutdown();
|
||||||
|
isRunning = false;
|
||||||
|
} else {
|
||||||
|
log.info("Graceful lease handoff thread already stopped.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enqueue a shutdown request for the given lease if the lease has requested shutdown and the shard consumer
|
||||||
|
* is not already shutdown.
|
||||||
|
*
|
||||||
|
* @param lease The lease to enqueue a shutdown request for.
|
||||||
|
*/
|
||||||
|
public void enqueueShutdown(Lease lease) {
|
||||||
|
if (lease == null || !lease.shutdownRequested() || !isRunning) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
|
||||||
|
final ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
|
||||||
|
if (consumer == null || consumer.isShutdown()) {
|
||||||
|
shardInfoLeasePendingShutdownMap.remove(shardInfo);
|
||||||
|
} else {
|
||||||
|
// there could be change shard get enqueued after getting removed. This should be okay because
|
||||||
|
// this enqueue will be no-op and will be removed again because the shardConsumer associated with the
|
||||||
|
// shardInfo is shutdown by then.
|
||||||
|
shardInfoLeasePendingShutdownMap.computeIfAbsent(shardInfo, key -> {
|
||||||
|
log.info("Calling graceful shutdown for lease {}", lease.leaseKey());
|
||||||
|
LeasePendingShutdown leasePendingShutdown = new LeasePendingShutdown(lease, consumer);
|
||||||
|
initiateShutdown(leasePendingShutdown);
|
||||||
|
return leasePendingShutdown;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for shutdown to complete or transfer ownership of lease to the next owner if timeout is met.
|
||||||
|
*/
|
||||||
|
private void monitorGracefulShutdownLeases() {
|
||||||
|
String leaseKey = null;
|
||||||
|
try {
|
||||||
|
for (ConcurrentMap.Entry<ShardInfo, LeasePendingShutdown> entry :
|
||||||
|
shardInfoLeasePendingShutdownMap.entrySet()) {
|
||||||
|
final LeasePendingShutdown leasePendingShutdown = entry.getValue();
|
||||||
|
final ShardInfo shardInfo = entry.getKey();
|
||||||
|
leaseKey = leasePendingShutdown.lease.leaseKey();
|
||||||
|
|
||||||
|
if (leasePendingShutdown.shardConsumer.isShutdown()
|
||||||
|
|| shardInfoShardConsumerMap.get(shardInfo) == null
|
||||||
|
|| leaseCoordinator.getCurrentlyHeldLease(leaseKey) == null) {
|
||||||
|
logTimeoutMessage(leasePendingShutdown);
|
||||||
|
shardInfoLeasePendingShutdownMap.remove(shardInfo);
|
||||||
|
} else if (getCurrentTimeMillis() >= leasePendingShutdown.timeoutTimestampMillis
|
||||||
|
&& !leasePendingShutdown.leaseTransferCalled) {
|
||||||
|
try {
|
||||||
|
log.info(
|
||||||
|
"Timeout {} millisecond reached waiting for lease {} to graceful handoff."
|
||||||
|
+ " Attempting to transfer the lease to {}",
|
||||||
|
shutdownTimeoutMillis,
|
||||||
|
leaseKey,
|
||||||
|
leasePendingShutdown.lease.leaseOwner());
|
||||||
|
transferLeaseIfOwner(leasePendingShutdown);
|
||||||
|
} catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) {
|
||||||
|
log.warn("Failed to transfer lease for key {}. Will retry", leaseKey, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error in graceful shutdown for lease {}", leaseKey, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initiateShutdown(LeasePendingShutdown tracker) {
|
||||||
|
tracker.shardConsumer.gracefulShutdown(null);
|
||||||
|
tracker.shutdownRequested = true;
|
||||||
|
tracker.timeoutTimestampMillis = getCurrentTimeMillis() + shutdownTimeoutMillis;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logTimeoutMessage(LeasePendingShutdown leasePendingShutdown) {
|
||||||
|
if (leasePendingShutdown.leaseTransferCalled) {
|
||||||
|
final long timeElapsedSinceShutdownInitiated =
|
||||||
|
getCurrentTimeMillis() - leasePendingShutdown.timeoutTimestampMillis + shutdownTimeoutMillis;
|
||||||
|
log.info(
|
||||||
|
"Lease {} took {} milliseconds to complete the shutdown. "
|
||||||
|
+ "Consider tuning the GracefulLeaseHandoffTimeoutMillis to prevent timeouts, "
|
||||||
|
+ "if necessary.",
|
||||||
|
leasePendingShutdown.lease.leaseKey(),
|
||||||
|
timeElapsedSinceShutdownInitiated);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void transferLeaseIfOwner(LeasePendingShutdown leasePendingShutdown)
|
||||||
|
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||||
|
final Lease lease = leasePendingShutdown.lease;
|
||||||
|
if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) {
|
||||||
|
// assignLease will increment the leaseCounter which will cause the heartbeat to stop on the current owner
|
||||||
|
// for the lease
|
||||||
|
leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner());
|
||||||
|
} else {
|
||||||
|
// the worker ID check is just for sanity. We don't expect it to be different from the current worker.
|
||||||
|
log.error(
|
||||||
|
"Lease {} checkpoint owner mismatch found {} but it should be {}",
|
||||||
|
lease.leaseKey(),
|
||||||
|
lease.checkpointOwner(),
|
||||||
|
leaseCoordinator.workerIdentifier());
|
||||||
|
}
|
||||||
|
// mark it true because we don't want to enter the method again because update is not possible anymore.
|
||||||
|
leasePendingShutdown.leaseTransferCalled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getCurrentTimeMillis() {
|
||||||
|
return currentTimeSupplier.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
|
private static class LeasePendingShutdown {
|
||||||
|
final Lease lease;
|
||||||
|
final ShardConsumer shardConsumer;
|
||||||
|
long timeoutTimestampMillis;
|
||||||
|
boolean shutdownRequested = false;
|
||||||
|
boolean leaseTransferCalled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -24,6 +24,7 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
|
||||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
|
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
|
||||||
import software.amazon.kinesis.common.StreamIdentifier;
|
import software.amazon.kinesis.common.StreamIdentifier;
|
||||||
|
import software.amazon.kinesis.leases.LeaseStatsRecorder;
|
||||||
import software.amazon.kinesis.leases.ShardDetector;
|
import software.amazon.kinesis.leases.ShardDetector;
|
||||||
import software.amazon.kinesis.leases.ShardInfo;
|
import software.amazon.kinesis.leases.ShardInfo;
|
||||||
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||||
|
|
@ -65,6 +66,7 @@ public class ProcessTask implements ConsumerTask {
|
||||||
private final AggregatorUtil aggregatorUtil;
|
private final AggregatorUtil aggregatorUtil;
|
||||||
private final String shardInfoId;
|
private final String shardInfoId;
|
||||||
private final SchemaRegistryDecoder schemaRegistryDecoder;
|
private final SchemaRegistryDecoder schemaRegistryDecoder;
|
||||||
|
private final LeaseStatsRecorder leaseStatsRecorder;
|
||||||
|
|
||||||
public ProcessTask(
|
public ProcessTask(
|
||||||
@NonNull ShardInfo shardInfo,
|
@NonNull ShardInfo shardInfo,
|
||||||
|
|
@ -79,7 +81,8 @@ public class ProcessTask implements ConsumerTask {
|
||||||
long idleTimeInMilliseconds,
|
long idleTimeInMilliseconds,
|
||||||
@NonNull AggregatorUtil aggregatorUtil,
|
@NonNull AggregatorUtil aggregatorUtil,
|
||||||
@NonNull MetricsFactory metricsFactory,
|
@NonNull MetricsFactory metricsFactory,
|
||||||
SchemaRegistryDecoder schemaRegistryDecoder) {
|
SchemaRegistryDecoder schemaRegistryDecoder,
|
||||||
|
@NonNull LeaseStatsRecorder leaseStatsRecorder) {
|
||||||
this.shardInfo = shardInfo;
|
this.shardInfo = shardInfo;
|
||||||
this.shardInfoId = ShardInfo.getLeaseKey(shardInfo);
|
this.shardInfoId = ShardInfo.getLeaseKey(shardInfo);
|
||||||
this.shardRecordProcessor = shardRecordProcessor;
|
this.shardRecordProcessor = shardRecordProcessor;
|
||||||
|
|
@ -91,6 +94,7 @@ public class ProcessTask implements ConsumerTask {
|
||||||
this.idleTimeInMilliseconds = idleTimeInMilliseconds;
|
this.idleTimeInMilliseconds = idleTimeInMilliseconds;
|
||||||
this.metricsFactory = metricsFactory;
|
this.metricsFactory = metricsFactory;
|
||||||
this.schemaRegistryDecoder = schemaRegistryDecoder;
|
this.schemaRegistryDecoder = schemaRegistryDecoder;
|
||||||
|
this.leaseStatsRecorder = leaseStatsRecorder;
|
||||||
|
|
||||||
if (!skipShardSyncAtWorkerInitializationIfLeasesExist) {
|
if (!skipShardSyncAtWorkerInitializationIfLeasesExist) {
|
||||||
this.shard = shardDetector.shard(shardInfo.shardId());
|
this.shard = shardDetector.shard(shardInfo.shardId());
|
||||||
|
|
@ -173,6 +177,7 @@ public class ProcessTask implements ConsumerTask {
|
||||||
recordProcessorCheckpointer.largestPermittedCheckpointValue()));
|
recordProcessorCheckpointer.largestPermittedCheckpointValue()));
|
||||||
|
|
||||||
if (shouldCallProcessRecords(records)) {
|
if (shouldCallProcessRecords(records)) {
|
||||||
|
publishLeaseStats(records);
|
||||||
callProcessRecords(processRecordsInput, records);
|
callProcessRecords(processRecordsInput, records);
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
|
|
@ -197,6 +202,15 @@ public class ProcessTask implements ConsumerTask {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void publishLeaseStats(final List<KinesisClientRecord> records) {
|
||||||
|
leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder()
|
||||||
|
.bytes(records.stream()
|
||||||
|
.mapToInt(record -> record.data().limit())
|
||||||
|
.sum())
|
||||||
|
.leaseKey(ShardInfo.getLeaseKey(shardInfo))
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
|
||||||
private List<KinesisClientRecord> deaggregateAnyKplRecords(List<KinesisClientRecord> records) {
|
private List<KinesisClientRecord> deaggregateAnyKplRecords(List<KinesisClientRecord> records) {
|
||||||
if (shard == null) {
|
if (shard == null) {
|
||||||
return aggregatorUtil.deaggregate(records);
|
return aggregatorUtil.deaggregate(records);
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,6 @@ import java.util.concurrent.CompletableFuture;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.RejectedExecutionException;
|
import java.util.concurrent.RejectedExecutionException;
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
|
|
@ -35,8 +34,6 @@ import software.amazon.kinesis.exceptions.internal.BlockedOnParentShardException
|
||||||
import software.amazon.kinesis.leases.ShardInfo;
|
import software.amazon.kinesis.leases.ShardInfo;
|
||||||
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||||
import software.amazon.kinesis.lifecycle.events.TaskExecutionListenerInput;
|
import software.amazon.kinesis.lifecycle.events.TaskExecutionListenerInput;
|
||||||
import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator;
|
|
||||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
|
||||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -59,12 +56,6 @@ public class ShardConsumer {
|
||||||
@NonNull
|
@NonNull
|
||||||
private final Optional<Long> logWarningForTaskAfterMillis;
|
private final Optional<Long> logWarningForTaskAfterMillis;
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated unused; to be removed in a "major" version bump
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
private final Function<ConsumerTask, ConsumerTask> taskMetricsDecorator;
|
|
||||||
|
|
||||||
private final int bufferSize;
|
private final int bufferSize;
|
||||||
private final TaskExecutionListener taskExecutionListener;
|
private final TaskExecutionListener taskExecutionListener;
|
||||||
private final String streamIdentifier;
|
private final String streamIdentifier;
|
||||||
|
|
@ -95,27 +86,6 @@ public class ShardConsumer {
|
||||||
|
|
||||||
private ProcessRecordsInput shardEndProcessRecordsInput;
|
private ProcessRecordsInput shardEndProcessRecordsInput;
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public ShardConsumer(
|
|
||||||
RecordsPublisher recordsPublisher,
|
|
||||||
ExecutorService executorService,
|
|
||||||
ShardInfo shardInfo,
|
|
||||||
Optional<Long> logWarningForTaskAfterMillis,
|
|
||||||
ShardConsumerArgument shardConsumerArgument,
|
|
||||||
TaskExecutionListener taskExecutionListener) {
|
|
||||||
this(
|
|
||||||
recordsPublisher,
|
|
||||||
executorService,
|
|
||||||
shardInfo,
|
|
||||||
logWarningForTaskAfterMillis,
|
|
||||||
shardConsumerArgument,
|
|
||||||
ConsumerStates.INITIAL_STATE,
|
|
||||||
ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()),
|
|
||||||
8,
|
|
||||||
taskExecutionListener,
|
|
||||||
LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ShardConsumer(
|
public ShardConsumer(
|
||||||
RecordsPublisher recordsPublisher,
|
RecordsPublisher recordsPublisher,
|
||||||
ExecutorService executorService,
|
ExecutorService executorService,
|
||||||
|
|
@ -131,36 +101,11 @@ public class ShardConsumer {
|
||||||
logWarningForTaskAfterMillis,
|
logWarningForTaskAfterMillis,
|
||||||
shardConsumerArgument,
|
shardConsumerArgument,
|
||||||
ConsumerStates.INITIAL_STATE,
|
ConsumerStates.INITIAL_STATE,
|
||||||
ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()),
|
|
||||||
8,
|
8,
|
||||||
taskExecutionListener,
|
taskExecutionListener,
|
||||||
readTimeoutsToIgnoreBeforeWarning);
|
readTimeoutsToIgnoreBeforeWarning);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public ShardConsumer(
|
|
||||||
RecordsPublisher recordsPublisher,
|
|
||||||
ExecutorService executorService,
|
|
||||||
ShardInfo shardInfo,
|
|
||||||
Optional<Long> logWarningForTaskAfterMillis,
|
|
||||||
ShardConsumerArgument shardConsumerArgument,
|
|
||||||
ConsumerState initialState,
|
|
||||||
Function<ConsumerTask, ConsumerTask> taskMetricsDecorator,
|
|
||||||
int bufferSize,
|
|
||||||
TaskExecutionListener taskExecutionListener) {
|
|
||||||
this(
|
|
||||||
recordsPublisher,
|
|
||||||
executorService,
|
|
||||||
shardInfo,
|
|
||||||
logWarningForTaskAfterMillis,
|
|
||||||
shardConsumerArgument,
|
|
||||||
initialState,
|
|
||||||
taskMetricsDecorator,
|
|
||||||
bufferSize,
|
|
||||||
taskExecutionListener,
|
|
||||||
LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// TODO: Make bufferSize configurable
|
// TODO: Make bufferSize configurable
|
||||||
//
|
//
|
||||||
|
|
@ -171,7 +116,6 @@ public class ShardConsumer {
|
||||||
Optional<Long> logWarningForTaskAfterMillis,
|
Optional<Long> logWarningForTaskAfterMillis,
|
||||||
ShardConsumerArgument shardConsumerArgument,
|
ShardConsumerArgument shardConsumerArgument,
|
||||||
ConsumerState initialState,
|
ConsumerState initialState,
|
||||||
Function<ConsumerTask, ConsumerTask> taskMetricsDecorator,
|
|
||||||
int bufferSize,
|
int bufferSize,
|
||||||
TaskExecutionListener taskExecutionListener,
|
TaskExecutionListener taskExecutionListener,
|
||||||
int readTimeoutsToIgnoreBeforeWarning) {
|
int readTimeoutsToIgnoreBeforeWarning) {
|
||||||
|
|
@ -183,7 +127,6 @@ public class ShardConsumer {
|
||||||
this.logWarningForTaskAfterMillis = logWarningForTaskAfterMillis;
|
this.logWarningForTaskAfterMillis = logWarningForTaskAfterMillis;
|
||||||
this.taskExecutionListener = taskExecutionListener;
|
this.taskExecutionListener = taskExecutionListener;
|
||||||
this.currentState = initialState;
|
this.currentState = initialState;
|
||||||
this.taskMetricsDecorator = taskMetricsDecorator;
|
|
||||||
subscriber = new ShardConsumerSubscriber(
|
subscriber = new ShardConsumerSubscriber(
|
||||||
recordsPublisher, executorService, bufferSize, this, readTimeoutsToIgnoreBeforeWarning);
|
recordsPublisher, executorService, bufferSize, this, readTimeoutsToIgnoreBeforeWarning);
|
||||||
this.bufferSize = bufferSize;
|
this.bufferSize = bufferSize;
|
||||||
|
|
@ -484,17 +427,18 @@ public class ShardConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Requests the shutdown of the this ShardConsumer. This should give the record processor a chance to checkpoint
|
* Requests the shutdown of the ShardConsumer. This should give the record processor a chance to checkpoint
|
||||||
* before being shutdown.
|
* before being shutdown.
|
||||||
*
|
*
|
||||||
* @param shutdownNotification
|
* @param shutdownNotification used to signal that the record processor has been given the chance to shut down.
|
||||||
* used to signal that the record processor has been given the chance to shutdown.
|
|
||||||
*/
|
*/
|
||||||
public void gracefulShutdown(ShutdownNotification shutdownNotification) {
|
public void gracefulShutdown(ShutdownNotification shutdownNotification) {
|
||||||
if (subscriber != null) {
|
if (subscriber != null) {
|
||||||
subscriber.cancel();
|
subscriber.cancel();
|
||||||
}
|
}
|
||||||
|
if (shutdownNotification != null) {
|
||||||
this.shutdownNotification = shutdownNotification;
|
this.shutdownNotification = shutdownNotification;
|
||||||
|
}
|
||||||
markForShutdown(ShutdownReason.REQUESTED);
|
markForShutdown(ShutdownReason.REQUESTED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -542,21 +486,4 @@ public class ShardConsumer {
|
||||||
return shutdownReason != null;
|
return shutdownReason != null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Default task wrapping function for metrics
|
|
||||||
*
|
|
||||||
* @param metricsFactory
|
|
||||||
* the factory used for reporting metrics
|
|
||||||
* @return a function that will wrap the task with a metrics reporter
|
|
||||||
*/
|
|
||||||
private static Function<ConsumerTask, ConsumerTask> metricsWrappingFunction(MetricsFactory metricsFactory) {
|
|
||||||
return (task) -> {
|
|
||||||
if (task == null) {
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
return new MetricsCollectingTaskDecorator(task, metricsFactory);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,12 @@ import lombok.AccessLevel;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.leases.Lease;
|
||||||
|
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||||
import software.amazon.kinesis.leases.ShardInfo;
|
import software.amazon.kinesis.leases.ShardInfo;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||||
|
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||||
import software.amazon.kinesis.lifecycle.events.ShutdownRequestedInput;
|
import software.amazon.kinesis.lifecycle.events.ShutdownRequestedInput;
|
||||||
import software.amazon.kinesis.processor.RecordProcessorCheckpointer;
|
import software.amazon.kinesis.processor.RecordProcessorCheckpointer;
|
||||||
import software.amazon.kinesis.processor.ShardRecordProcessor;
|
import software.amazon.kinesis.processor.ShardRecordProcessor;
|
||||||
|
|
@ -33,23 +38,41 @@ public class ShutdownNotificationTask implements ConsumerTask {
|
||||||
private final ShardRecordProcessor shardRecordProcessor;
|
private final ShardRecordProcessor shardRecordProcessor;
|
||||||
private final RecordProcessorCheckpointer recordProcessorCheckpointer;
|
private final RecordProcessorCheckpointer recordProcessorCheckpointer;
|
||||||
private final ShutdownNotification shutdownNotification;
|
private final ShutdownNotification shutdownNotification;
|
||||||
// TODO: remove if not used
|
|
||||||
private final ShardInfo shardInfo;
|
private final ShardInfo shardInfo;
|
||||||
|
private final LeaseCoordinator leaseCoordinator;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TaskResult call() {
|
public TaskResult call() {
|
||||||
|
final String leaseKey = ShardInfo.getLeaseKey(shardInfo);
|
||||||
|
final Lease currentShardLease = leaseCoordinator.getCurrentlyHeldLease(leaseKey);
|
||||||
try {
|
try {
|
||||||
try {
|
try {
|
||||||
shardRecordProcessor.shutdownRequested(ShutdownRequestedInput.builder()
|
shardRecordProcessor.shutdownRequested(ShutdownRequestedInput.builder()
|
||||||
.checkpointer(recordProcessorCheckpointer)
|
.checkpointer(recordProcessorCheckpointer)
|
||||||
.build());
|
.build());
|
||||||
|
attemptLeaseTransfer(currentShardLease);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
return new TaskResult(ex);
|
return new TaskResult(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new TaskResult(null);
|
return new TaskResult(null);
|
||||||
} finally {
|
} finally {
|
||||||
|
if (shutdownNotification != null) {
|
||||||
shutdownNotification.shutdownNotificationComplete();
|
shutdownNotification.shutdownNotificationComplete();
|
||||||
|
} else {
|
||||||
|
// shutdownNotification is null if this is a shard level graceful shutdown instead of a worker level
|
||||||
|
// one. We need to drop lease like what's done in the shutdownNotificationComplete so we can
|
||||||
|
// transition to next state.
|
||||||
|
leaseCoordinator.dropLease(currentShardLease);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void attemptLeaseTransfer(Lease lease)
|
||||||
|
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||||
|
if (lease != null && lease.shutdownRequested()) {
|
||||||
|
if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) {
|
||||||
|
leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -164,7 +164,6 @@ public class ShutdownTask implements ConsumerTask {
|
||||||
} else {
|
} else {
|
||||||
throwOnApplicationException(leaseKey, leaseLostAction, scope, startTime);
|
throwOnApplicationException(leaseKey, leaseLostAction, scope, startTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug("Shutting down retrieval strategy for shard {}.", leaseKey);
|
log.debug("Shutting down retrieval strategy for shard {}.", leaseKey);
|
||||||
recordsPublisher.shutdown();
|
recordsPublisher.shutdown();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ public class RetrievalConfig {
|
||||||
*/
|
*/
|
||||||
public static final String KINESIS_CLIENT_LIB_USER_AGENT = "amazon-kinesis-client-library-java";
|
public static final String KINESIS_CLIENT_LIB_USER_AGENT = "amazon-kinesis-client-library-java";
|
||||||
|
|
||||||
public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "2.6.1-SNAPSHOT";
|
public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "3.0.0";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Client used to make calls to Kinesis for records retrieval
|
* Client used to make calls to Kinesis for records retrieval
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ import lombok.NonNull;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import lombok.ToString;
|
import lombok.ToString;
|
||||||
import lombok.experimental.Accessors;
|
import lombok.experimental.Accessors;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||||
import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
|
import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
|
||||||
import software.amazon.kinesis.retrieval.DataFetcherProviderConfig;
|
import software.amazon.kinesis.retrieval.DataFetcherProviderConfig;
|
||||||
|
|
@ -38,12 +39,15 @@ import software.amazon.kinesis.retrieval.RetrievalSpecificConfig;
|
||||||
@Setter
|
@Setter
|
||||||
@ToString
|
@ToString
|
||||||
@EqualsAndHashCode
|
@EqualsAndHashCode
|
||||||
|
@Slf4j
|
||||||
public class PollingConfig implements RetrievalSpecificConfig {
|
public class PollingConfig implements RetrievalSpecificConfig {
|
||||||
|
|
||||||
public static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofSeconds(30);
|
public static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofSeconds(30);
|
||||||
|
|
||||||
public static final int DEFAULT_MAX_RECORDS = 10000;
|
public static final int DEFAULT_MAX_RECORDS = 10000;
|
||||||
|
|
||||||
|
public static final long MIN_IDLE_MILLIS_BETWEEN_READS = 200L;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configurable functional interface to override the existing DataFetcher.
|
* Configurable functional interface to override the existing DataFetcher.
|
||||||
*/
|
*/
|
||||||
|
|
@ -138,9 +142,18 @@ public class PollingConfig implements RetrievalSpecificConfig {
|
||||||
/**
|
/**
|
||||||
* Set the value for how long the ShardConsumer should sleep in between calls to
|
* Set the value for how long the ShardConsumer should sleep in between calls to
|
||||||
* {@link KinesisAsyncClient#getRecords(GetRecordsRequest)}. If this is not specified here the value provided in
|
* {@link KinesisAsyncClient#getRecords(GetRecordsRequest)}. If this is not specified here the value provided in
|
||||||
* {@link RecordsFetcherFactory} will be used.
|
* {@link RecordsFetcherFactory} will be used. Cannot set value below MIN_IDLE_MILLIS_BETWEEN_READS.
|
||||||
*/
|
*/
|
||||||
public PollingConfig idleTimeBetweenReadsInMillis(long idleTimeBetweenReadsInMillis) {
|
public PollingConfig idleTimeBetweenReadsInMillis(long idleTimeBetweenReadsInMillis) {
|
||||||
|
if (idleTimeBetweenReadsInMillis < MIN_IDLE_MILLIS_BETWEEN_READS) {
|
||||||
|
log.warn(
|
||||||
|
"idleTimeBetweenReadsInMillis must be greater than or equal to {} but current value is {}."
|
||||||
|
+ " Defaulting to minimum {}.",
|
||||||
|
MIN_IDLE_MILLIS_BETWEEN_READS,
|
||||||
|
idleTimeBetweenReadsInMillis,
|
||||||
|
MIN_IDLE_MILLIS_BETWEEN_READS);
|
||||||
|
idleTimeBetweenReadsInMillis = MIN_IDLE_MILLIS_BETWEEN_READS;
|
||||||
|
}
|
||||||
usePollingConfigIdleTimeValue = true;
|
usePollingConfigIdleTimeValue = true;
|
||||||
this.idleTimeBetweenReadsInMillis = idleTimeBetweenReadsInMillis;
|
this.idleTimeBetweenReadsInMillis = idleTimeBetweenReadsInMillis;
|
||||||
return this;
|
return this;
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@ import software.amazon.kinesis.retrieval.RecordsDeliveryAck;
|
||||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||||
import software.amazon.kinesis.retrieval.RecordsRetrieved;
|
import software.amazon.kinesis.retrieval.RecordsRetrieved;
|
||||||
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
|
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
|
||||||
|
import software.amazon.kinesis.retrieval.ThrottlingReporter;
|
||||||
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
||||||
|
|
||||||
import static software.amazon.kinesis.common.DiagnosticUtils.takeDelayedDeliveryActionIfRequired;
|
import static software.amazon.kinesis.common.DiagnosticUtils.takeDelayedDeliveryActionIfRequired;
|
||||||
|
|
@ -109,6 +110,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
private boolean wasReset = false;
|
private boolean wasReset = false;
|
||||||
private Instant lastEventDeliveryTime = Instant.EPOCH;
|
private Instant lastEventDeliveryTime = Instant.EPOCH;
|
||||||
private final RequestDetails lastSuccessfulRequestDetails = new RequestDetails();
|
private final RequestDetails lastSuccessfulRequestDetails = new RequestDetails();
|
||||||
|
private final ThrottlingReporter throttlingReporter;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Accessors(fluent = true)
|
@Accessors(fluent = true)
|
||||||
|
|
@ -233,6 +235,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
@NonNull final MetricsFactory metricsFactory,
|
@NonNull final MetricsFactory metricsFactory,
|
||||||
@NonNull final String operation,
|
@NonNull final String operation,
|
||||||
@NonNull final String shardId,
|
@NonNull final String shardId,
|
||||||
|
final ThrottlingReporter throttlingReporter,
|
||||||
final long awaitTerminationTimeoutMillis) {
|
final long awaitTerminationTimeoutMillis) {
|
||||||
this.getRecordsRetrievalStrategy = getRecordsRetrievalStrategy;
|
this.getRecordsRetrievalStrategy = getRecordsRetrievalStrategy;
|
||||||
this.maxRecordsPerCall = maxRecordsPerCall;
|
this.maxRecordsPerCall = maxRecordsPerCall;
|
||||||
|
|
@ -248,6 +251,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
this.idleMillisBetweenCalls = idleMillisBetweenCalls;
|
this.idleMillisBetweenCalls = idleMillisBetweenCalls;
|
||||||
this.defaultGetRecordsCacheDaemon = new DefaultGetRecordsCacheDaemon();
|
this.defaultGetRecordsCacheDaemon = new DefaultGetRecordsCacheDaemon();
|
||||||
Validate.notEmpty(operation, "Operation cannot be empty");
|
Validate.notEmpty(operation, "Operation cannot be empty");
|
||||||
|
this.throttlingReporter = throttlingReporter;
|
||||||
this.operation = operation;
|
this.operation = operation;
|
||||||
this.streamId = this.getRecordsRetrievalStrategy.dataFetcher().getStreamIdentifier();
|
this.streamId = this.getRecordsRetrievalStrategy.dataFetcher().getStreamIdentifier();
|
||||||
this.streamAndShardId = this.streamId.serialize() + ":" + shardId;
|
this.streamAndShardId = this.streamId.serialize() + ":" + shardId;
|
||||||
|
|
@ -279,7 +283,8 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
final long idleMillisBetweenCalls,
|
final long idleMillisBetweenCalls,
|
||||||
final MetricsFactory metricsFactory,
|
final MetricsFactory metricsFactory,
|
||||||
final String operation,
|
final String operation,
|
||||||
final String shardId) {
|
final String shardId,
|
||||||
|
final ThrottlingReporter throttlingReporter) {
|
||||||
this(
|
this(
|
||||||
maxPendingProcessRecordsInput,
|
maxPendingProcessRecordsInput,
|
||||||
maxByteSize,
|
maxByteSize,
|
||||||
|
|
@ -291,6 +296,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
metricsFactory,
|
metricsFactory,
|
||||||
operation,
|
operation,
|
||||||
shardId,
|
shardId,
|
||||||
|
throttlingReporter,
|
||||||
DEFAULT_AWAIT_TERMINATION_TIMEOUT_MILLIS);
|
DEFAULT_AWAIT_TERMINATION_TIMEOUT_MILLIS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -555,6 +561,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
recordsRetrieved.lastBatchSequenceNumber);
|
recordsRetrieved.lastBatchSequenceNumber);
|
||||||
addArrivedRecordsInput(recordsRetrieved);
|
addArrivedRecordsInput(recordsRetrieved);
|
||||||
drainQueueForRequests();
|
drainQueueForRequests();
|
||||||
|
throttlingReporter.success();
|
||||||
} catch (PositionResetException pse) {
|
} catch (PositionResetException pse) {
|
||||||
throw pse;
|
throw pse;
|
||||||
} catch (RetryableRetrievalException rre) {
|
} catch (RetryableRetrievalException rre) {
|
||||||
|
|
@ -584,10 +591,11 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
||||||
|
|
||||||
publisherSession.dataFetcher().restartIterator();
|
publisherSession.dataFetcher().restartIterator();
|
||||||
} catch (ProvisionedThroughputExceededException e) {
|
} catch (ProvisionedThroughputExceededException e) {
|
||||||
// Update the lastSuccessfulCall if we get a throttling exception so that we back off idleMillis
|
log.error(
|
||||||
// for the next call
|
"{} : ProvisionedThroughputExceededException thrown while fetching records from Kinesis",
|
||||||
lastSuccessfulCall = Instant.now();
|
streamAndShardId,
|
||||||
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
|
e);
|
||||||
|
throttlingReporter.throttled();
|
||||||
} catch (SdkException e) {
|
} catch (SdkException e) {
|
||||||
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
|
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ import software.amazon.kinesis.retrieval.DataFetchingStrategy;
|
||||||
import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy;
|
import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy;
|
||||||
import software.amazon.kinesis.retrieval.RecordsFetcherFactory;
|
import software.amazon.kinesis.retrieval.RecordsFetcherFactory;
|
||||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||||
|
import software.amazon.kinesis.retrieval.ThrottlingReporter;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@KinesisClientInternalApi
|
@KinesisClientInternalApi
|
||||||
|
|
@ -32,6 +33,7 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory {
|
||||||
private int maxByteSize = 8 * 1024 * 1024;
|
private int maxByteSize = 8 * 1024 * 1024;
|
||||||
private int maxRecordsCount = 30000;
|
private int maxRecordsCount = 30000;
|
||||||
private long idleMillisBetweenCalls = 1500L;
|
private long idleMillisBetweenCalls = 1500L;
|
||||||
|
private int maxConsecutiveThrottles = 5;
|
||||||
private DataFetchingStrategy dataFetchingStrategy = DataFetchingStrategy.DEFAULT;
|
private DataFetchingStrategy dataFetchingStrategy = DataFetchingStrategy.DEFAULT;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -56,7 +58,8 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory {
|
||||||
idleMillisBetweenCalls,
|
idleMillisBetweenCalls,
|
||||||
metricsFactory,
|
metricsFactory,
|
||||||
"ProcessTask",
|
"ProcessTask",
|
||||||
shardId);
|
shardId,
|
||||||
|
new ThrottlingReporter(maxConsecutiveThrottles, shardId));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@ import java.util.List;
|
||||||
import com.amazonaws.services.schemaregistry.common.Schema;
|
import com.amazonaws.services.schemaregistry.common.Schema;
|
||||||
import com.amazonaws.services.schemaregistry.deserializers.GlueSchemaRegistryDeserializer;
|
import com.amazonaws.services.schemaregistry.deserializers.GlueSchemaRegistryDeserializer;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import software.amazon.kinesis.common.KinesisClientLibraryPackage;
|
|
||||||
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -15,7 +14,7 @@ import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class SchemaRegistryDecoder {
|
public class SchemaRegistryDecoder {
|
||||||
private static final String USER_AGENT_APP_NAME = "kcl" + "-" + KinesisClientLibraryPackage.VERSION;
|
private static final String USER_AGENT_APP_NAME = "kcl" + "-" + "3.0.0";
|
||||||
private final GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer;
|
private final GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer;
|
||||||
|
|
||||||
public SchemaRegistryDecoder(GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer) {
|
public SchemaRegistryDecoder(GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
package software.amazon.kinesis.utils;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class Cgroup {
|
||||||
|
|
||||||
|
public static String readSingleLineFile(String path) {
|
||||||
|
BufferedReader bufferedReader = null;
|
||||||
|
try {
|
||||||
|
final File file = new File(path);
|
||||||
|
if (file.exists()) {
|
||||||
|
bufferedReader = new BufferedReader(new FileReader(file));
|
||||||
|
return bufferedReader.readLine();
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(String.format("Failed to read file. %s does not exist", path));
|
||||||
|
}
|
||||||
|
} catch (final Throwable t) {
|
||||||
|
if (t instanceof IllegalArgumentException) {
|
||||||
|
throw (IllegalArgumentException) t;
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Failed to read file.", t);
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (bufferedReader != null) {
|
||||||
|
bufferedReader.close();
|
||||||
|
}
|
||||||
|
} catch (Throwable x) {
|
||||||
|
log.warn("Failed to close bufferedReader ", x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the number of available cpus from the cpuset
|
||||||
|
* See https://docs.kernel.org/admin-guide/cgroup-v2.html#cpuset for more information
|
||||||
|
* "0-7" represents 8 cores
|
||||||
|
* "0-4,6,8-10" represents 9 cores (cores 0,1,2,3,4 and core 6 and core 8,9,10)
|
||||||
|
* @param cpuSet a single line from the cgroup cpuset file
|
||||||
|
* @return the number of available cpus
|
||||||
|
*/
|
||||||
|
public static int getAvailableCpusFromEffectiveCpuSet(final String cpuSet) {
|
||||||
|
final String[] cpuSetArr = cpuSet.split(",");
|
||||||
|
|
||||||
|
int sumCpus = 0;
|
||||||
|
for (String cpuSetGroup : cpuSetArr) {
|
||||||
|
if (cpuSetGroup.contains("-")) {
|
||||||
|
final String[] cpuSetGroupSplit = cpuSetGroup.split("-");
|
||||||
|
// Values are inclusive
|
||||||
|
sumCpus += Integer.parseInt(cpuSetGroupSplit[1]) - Integer.parseInt(cpuSetGroupSplit[0]) + 1;
|
||||||
|
} else {
|
||||||
|
sumCpus += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sumCpus;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
package software.amazon.kinesis.utils;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
import lombok.NonNull;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest;
|
||||||
|
import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsResponse;
|
||||||
|
import software.amazon.kinesis.common.DdbTableConfig;
|
||||||
|
|
||||||
|
import static java.util.Objects.nonNull;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public final class DdbUtil {
|
||||||
|
|
||||||
|
@NonNull
|
||||||
|
public static Supplier<CompletableFuture<CreateTableResponse>> tableCreator(
|
||||||
|
final Supplier<List<KeySchemaElement>> keySchemaProvider,
|
||||||
|
final Supplier<List<AttributeDefinition>> attributeDefinitionProvider,
|
||||||
|
final DdbTableConfig tableConfig,
|
||||||
|
final DynamoDbAsyncClient dynamoDbAsyncClient) {
|
||||||
|
final CreateTableRequest.Builder createTableRequest = CreateTableRequest.builder()
|
||||||
|
.tableName(tableConfig.tableName())
|
||||||
|
.keySchema(keySchemaProvider.get())
|
||||||
|
.attributeDefinitions(attributeDefinitionProvider.get())
|
||||||
|
.deletionProtectionEnabled(tableConfig.deletionProtectionEnabled());
|
||||||
|
|
||||||
|
if (nonNull(tableConfig.tags()) && !tableConfig.tags().isEmpty()) {
|
||||||
|
createTableRequest.tags(tableConfig.tags());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tableConfig.billingMode() == BillingMode.PROVISIONED) {
|
||||||
|
log.info(
|
||||||
|
"Creating table {} in provisioned mode with {}wcu and {}rcu",
|
||||||
|
tableConfig.tableName(),
|
||||||
|
tableConfig.writeCapacity(),
|
||||||
|
tableConfig.readCapacity());
|
||||||
|
createTableRequest.provisionedThroughput(ProvisionedThroughput.builder()
|
||||||
|
.readCapacityUnits(tableConfig.readCapacity())
|
||||||
|
.writeCapacityUnits(tableConfig.writeCapacity())
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
createTableRequest.billingMode(tableConfig.billingMode());
|
||||||
|
return () -> dynamoDbAsyncClient.createTable(createTableRequest.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CompletableFuture<UpdateContinuousBackupsResponse> pitrEnabler(
|
||||||
|
final DdbTableConfig tableConfig, final DynamoDbAsyncClient dynamoDbAsyncClient) {
|
||||||
|
if (tableConfig.pointInTimeRecoveryEnabled()) {
|
||||||
|
final UpdateContinuousBackupsRequest request = UpdateContinuousBackupsRequest.builder()
|
||||||
|
.tableName(tableConfig.tableName())
|
||||||
|
.pointInTimeRecoverySpecification(builder -> builder.pointInTimeRecoveryEnabled(true))
|
||||||
|
.build();
|
||||||
|
return dynamoDbAsyncClient.updateContinuousBackups(request);
|
||||||
|
}
|
||||||
|
return CompletableFuture.completedFuture(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
package software.amazon.kinesis.utils;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the formula mentioned below for simple ExponentialMovingAverage
|
||||||
|
* <a href="https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average"/>
|
||||||
|
*
|
||||||
|
* Values of alpha close to 1 have less of a smoothing effect and give greater weight to recent changes in the data,
|
||||||
|
* while values of alpha closer to 0 have a greater smoothing effect and are less responsive to recent changes.
|
||||||
|
*/
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ExponentialMovingAverage {
|
||||||
|
|
||||||
|
private final double alpha;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private double value;
|
||||||
|
|
||||||
|
private boolean initialized = false;
|
||||||
|
|
||||||
|
public void add(final double newValue) {
|
||||||
|
if (!initialized) {
|
||||||
|
this.value = newValue;
|
||||||
|
initialized = true;
|
||||||
|
} else {
|
||||||
|
this.value = alpha * newValue + (1 - alpha) * this.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
package software.amazon.kinesis.utils;
|
||||||
|
|
||||||
|
import java.util.AbstractMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class Statistics {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the simple mean of the given values
|
||||||
|
* @param values list of values (double)
|
||||||
|
* @return mean of the given values, if the {@param values} is empty then returns 0;
|
||||||
|
*/
|
||||||
|
public static double calculateSimpleMean(final List<Double> values) {
|
||||||
|
if (values.isEmpty()) {
|
||||||
|
return 0D;
|
||||||
|
}
|
||||||
|
double sum = 0.0;
|
||||||
|
for (final double i : values) {
|
||||||
|
sum += i;
|
||||||
|
}
|
||||||
|
return sum / values.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For the given values find the standard deviation (SD).
|
||||||
|
* For details of SD calculation ref : <a href="https://en.wikipedia.org/wiki/Standard_deviation"/>
|
||||||
|
* @param values list of values (double)
|
||||||
|
* @return Map.Entry of mean to standard deviation for {@param values}, if {@param values} is empty then return
|
||||||
|
* Map.Entry with 0 as mean and 0 as SD.
|
||||||
|
*/
|
||||||
|
public static Map.Entry<Double, Double> calculateStandardDeviationAndMean(final List<Double> values) {
|
||||||
|
if (values.isEmpty()) {
|
||||||
|
return new AbstractMap.SimpleEntry<>(0D, 0D);
|
||||||
|
}
|
||||||
|
final double mean = calculateSimpleMean(values);
|
||||||
|
// calculate the standard deviation
|
||||||
|
double standardDeviation = 0.0;
|
||||||
|
for (final double num : values) {
|
||||||
|
standardDeviation += Math.pow(num - mean, 2);
|
||||||
|
}
|
||||||
|
return new AbstractMap.SimpleEntry<>(mean, Math.sqrt(standardDeviation / values.size()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
package software.amazon.kinesis.worker;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||||
|
import software.amazon.kinesis.worker.metric.OperatingRange;
|
||||||
|
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.metric.impl.container.Cgroupv1CpuWorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.metric.impl.container.Cgroupv2CpuWorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.metric.impl.container.EcsCpuWorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.metric.impl.linux.LinuxCpuWorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.platform.Ec2Resource;
|
||||||
|
import software.amazon.kinesis.worker.platform.EcsResource;
|
||||||
|
import software.amazon.kinesis.worker.platform.EksResource;
|
||||||
|
import software.amazon.kinesis.worker.platform.OperatingRangeDataProvider;
|
||||||
|
import software.amazon.kinesis.worker.platform.ResourceMetadataProvider;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class to select appropriate WorkerMetricStats based on the operating range provider that is available on the instance.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@KinesisClientInternalApi
|
||||||
|
public class WorkerMetricsSelector {
|
||||||
|
|
||||||
|
private static final OperatingRange DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE =
|
||||||
|
OperatingRange.builder().maxUtilization(100).build();
|
||||||
|
|
||||||
|
private final List<ResourceMetadataProvider> workerComputePlatforms;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method to create an instance of WorkerMetricsSelector.
|
||||||
|
*
|
||||||
|
* @return WorkerMetricsSelector instance
|
||||||
|
*/
|
||||||
|
public static WorkerMetricsSelector create() {
|
||||||
|
final List<ResourceMetadataProvider> resourceMetadataProviders = new ArrayList<>();
|
||||||
|
resourceMetadataProviders.add(EcsResource.create());
|
||||||
|
resourceMetadataProviders.add(EksResource.create());
|
||||||
|
// ec2 has to be the last one to check
|
||||||
|
resourceMetadataProviders.add(Ec2Resource.create());
|
||||||
|
return new WorkerMetricsSelector(resourceMetadataProviders);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider() {
|
||||||
|
for (ResourceMetadataProvider platform : workerComputePlatforms) {
|
||||||
|
if (platform.isOnPlatform()) {
|
||||||
|
final ResourceMetadataProvider.ComputePlatform computePlatform = platform.getPlatform();
|
||||||
|
log.info("Worker is running on {}", computePlatform);
|
||||||
|
return platform.getOperatingRangeDataProvider();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of WorkerMetricStats based on the operating range provider the worker uses.
|
||||||
|
*
|
||||||
|
* @return List of WorkerMetricStats
|
||||||
|
*/
|
||||||
|
public List<WorkerMetric> getDefaultWorkerMetrics() {
|
||||||
|
final List<WorkerMetric> workerMetrics = new ArrayList<>();
|
||||||
|
final Optional<OperatingRangeDataProvider> optionalProvider = getOperatingRangeDataProvider();
|
||||||
|
if (!optionalProvider.isPresent()) {
|
||||||
|
log.warn("Did not find an operating range metadata provider.");
|
||||||
|
return workerMetrics;
|
||||||
|
}
|
||||||
|
final OperatingRangeDataProvider dataProvider = optionalProvider.get();
|
||||||
|
log.info("Worker has operating range metadata provider {} ", dataProvider);
|
||||||
|
switch (dataProvider) {
|
||||||
|
case LINUX_PROC:
|
||||||
|
workerMetrics.add(new LinuxCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||||
|
break;
|
||||||
|
case LINUX_ECS_METADATA_KEY_V4:
|
||||||
|
workerMetrics.add(new EcsCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||||
|
break;
|
||||||
|
case LINUX_EKS_CGROUP_V2:
|
||||||
|
workerMetrics.add(new Cgroupv2CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||||
|
break;
|
||||||
|
case LINUX_EKS_CGROUP_V1:
|
||||||
|
workerMetrics.add(new Cgroupv1CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return workerMetrics;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
package software.amazon.kinesis.worker.metric;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
public class OperatingRange {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Max utilization percentage allowed for the workerMetrics.
|
||||||
|
*/
|
||||||
|
private final int maxUtilization;
|
||||||
|
|
||||||
|
private OperatingRange(final int maxUtilization) {
|
||||||
|
Preconditions.checkArgument(!(maxUtilization < 0 || maxUtilization > 100), "Invalid maxUtilization value");
|
||||||
|
this.maxUtilization = maxUtilization;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,52 @@
|
||||||
|
package software.amazon.kinesis.worker.metric;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NonNull;
|
||||||
|
|
||||||
|
public interface WorkerMetric {
|
||||||
|
/**
|
||||||
|
* WorkerMetricStats short name that is used as attribute name for it in storage.
|
||||||
|
* @return short name for the WorkerMetricStats
|
||||||
|
*/
|
||||||
|
String getShortName();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current WorkerMetricValue. WorkerMetricValue is a normalized percentage value to its max configured limits.
|
||||||
|
* E.g., if for a worker max network bandwidth is 10Gbps and current used bandwidth is 2Gbps, then WorkerMetricValue for
|
||||||
|
* NetworkWorkerMetrics will be 20 (%).
|
||||||
|
*
|
||||||
|
* @return WorkerMetricValue between 0 and 100 (both inclusive)
|
||||||
|
*/
|
||||||
|
WorkerMetricValue capture();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the operating range for this workerMetrics
|
||||||
|
* @return Operating range for this workerMetrics
|
||||||
|
*/
|
||||||
|
OperatingRange getOperatingRange();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type of the current WorkerMetricStats.
|
||||||
|
* @return WorkerMetricType
|
||||||
|
*/
|
||||||
|
WorkerMetricType getWorkerMetricType();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WorkerMetricValue model class is used as return type for the capture() method to have a strong checks at the build
|
||||||
|
* time of the object itself.
|
||||||
|
*/
|
||||||
|
@Builder
|
||||||
|
class WorkerMetricValue {
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final Double value;
|
||||||
|
|
||||||
|
private WorkerMetricValue(@NonNull final Double value) {
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
!(value < 0 || value > 100), value + " is either less than 0 or greater than 100");
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
package software.amazon.kinesis.worker.metric;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public enum WorkerMetricType {
|
||||||
|
CPU("C"),
|
||||||
|
MEMORY("M"),
|
||||||
|
NETWORK_IN("NI"),
|
||||||
|
NETWORK_OUT("NO"),
|
||||||
|
THROUGHPUT("T");
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
private final String shortName;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
package software.amazon.kinesis.worker.metric.impl.container;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.kinesis.worker.metric.OperatingRange;
|
||||||
|
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.metric.WorkerMetricType;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet;
|
||||||
|
import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory.This works for Elastic
|
||||||
|
* Kubernetes Service (EKS) containers running on Linux instances which use cgroupv1.
|
||||||
|
*
|
||||||
|
* EC2 instances must use a Linux instance that uses cgroupv1. Amazon Linux 2 uses cgroupv1.
|
||||||
|
* Fargate versions 1.4.0 and 1.3.0 use Amazon Linux 2 and can use this.
|
||||||
|
*
|
||||||
|
* CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over
|
||||||
|
* a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%.
|
||||||
|
*
|
||||||
|
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
|
||||||
|
* to calculate the diff.
|
||||||
|
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
|
||||||
|
public class Cgroupv1CpuWorkerMetric implements WorkerMetric {
|
||||||
|
|
||||||
|
private static final Object LOCK_OBJECT = new Object();
|
||||||
|
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
|
||||||
|
private static final String CGROUP_ROOT = "/sys/fs/cgroup/";
|
||||||
|
private static final String CPU_TIME_FILE = CGROUP_ROOT + "cpu/cpuacct.usage";
|
||||||
|
private static final String CPU_CFS_QUOTA_FILE = CGROUP_ROOT + "cpu/cpu.cfs_quota_us";
|
||||||
|
private static final String CPU_CFS_PERIOD_FILE = CGROUP_ROOT + "cpu/cpu.cfs_period_us";
|
||||||
|
private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset/cpuset.effective_cpus";
|
||||||
|
private final OperatingRange operatingRange;
|
||||||
|
private final String cpuTimeFile;
|
||||||
|
private final String cfsQuotaFile;
|
||||||
|
private final String cfsPeriodFile;
|
||||||
|
private final String effectiveCpuSetFile;
|
||||||
|
private final Clock clock;
|
||||||
|
private double cpuLimit = -1;
|
||||||
|
private long lastCpuUseTimeNanos = 0;
|
||||||
|
private long lastSystemTimeNanos = 0;
|
||||||
|
|
||||||
|
public Cgroupv1CpuWorkerMetric(final OperatingRange operatingRange) {
|
||||||
|
this(
|
||||||
|
operatingRange,
|
||||||
|
CPU_TIME_FILE,
|
||||||
|
CPU_CFS_QUOTA_FILE,
|
||||||
|
CPU_CFS_PERIOD_FILE,
|
||||||
|
EFFECTIVE_CPU_SET_FILE,
|
||||||
|
Clock.systemUTC());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getShortName() {
|
||||||
|
return CPU_WORKER_METRICS_TYPE.getShortName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public WorkerMetricValue capture() {
|
||||||
|
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateCpuUsage() {
|
||||||
|
if (cpuLimit == -1) {
|
||||||
|
cpuLimit = calculateCpuLimit();
|
||||||
|
}
|
||||||
|
|
||||||
|
final long cpuTimeNanos = Long.parseLong(readSingleLineFile(cpuTimeFile));
|
||||||
|
final long currentTimeNanos = TimeUnit.MILLISECONDS.toNanos(clock.millis());
|
||||||
|
|
||||||
|
boolean skip = false;
|
||||||
|
double cpuCoreTimeUsed;
|
||||||
|
synchronized (LOCK_OBJECT) {
|
||||||
|
if (lastCpuUseTimeNanos == 0 && lastSystemTimeNanos == 0) {
|
||||||
|
// Case where this is a first call so no diff available
|
||||||
|
skip = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
final long nanoTimeDiff = currentTimeNanos - lastSystemTimeNanos;
|
||||||
|
final long cpuUseDiff = cpuTimeNanos - lastCpuUseTimeNanos;
|
||||||
|
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
|
||||||
|
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
|
||||||
|
// that less than 1 CPU core was used.
|
||||||
|
cpuCoreTimeUsed = ((double) cpuUseDiff / nanoTimeDiff);
|
||||||
|
|
||||||
|
lastCpuUseTimeNanos = cpuTimeNanos;
|
||||||
|
lastSystemTimeNanos = currentTimeNanos;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skip) {
|
||||||
|
return 0D;
|
||||||
|
} else {
|
||||||
|
// In case of rounding error, treat everything above 100% as 100%
|
||||||
|
return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateCpuLimit() {
|
||||||
|
// Documentation on these values:
|
||||||
|
// https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs
|
||||||
|
final long cfsQuota = Long.parseLong(readSingleLineFile(cfsQuotaFile));
|
||||||
|
final long cfsPeriod = Long.parseLong(readSingleLineFile(cfsPeriodFile));
|
||||||
|
if (cfsQuota == -1) {
|
||||||
|
// If quota is -1, a limit is not set on the container. The container can use all available cores.
|
||||||
|
return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile));
|
||||||
|
} else {
|
||||||
|
return ((double) cfsQuota) / cfsPeriod;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public OperatingRange getOperatingRange() {
|
||||||
|
return operatingRange;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public WorkerMetricType getWorkerMetricType() {
|
||||||
|
return CPU_WORKER_METRICS_TYPE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
package software.amazon.kinesis.worker.metric.impl.container;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import lombok.AccessLevel;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import software.amazon.kinesis.worker.metric.OperatingRange;
|
||||||
|
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||||
|
import software.amazon.kinesis.worker.metric.WorkerMetricType;
|
||||||
|
|
||||||
|
import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet;
|
||||||
|
import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory. This works for Elastic
|
||||||
|
* Kubernetes Service (EKS) containers running on Linux instances which use cgroupv2.
|
||||||
|
*
|
||||||
|
* EC2 instances must use a Linux instance that uses cgroupv2. Amazon Linux 2023 uses cgroupv2.
|
||||||
|
*
|
||||||
|
* CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over
|
||||||
|
* a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%.
|
||||||
|
*
|
||||||
|
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
|
||||||
|
* to calculate the diff.
|
||||||
|
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
|
||||||
|
public class Cgroupv2CpuWorkerMetric implements WorkerMetric {
|
||||||
|
|
||||||
|
private static final Object LOCK_OBJECT = new Object();
|
||||||
|
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
|
||||||
|
private static final String CGROUP_ROOT = "/sys/fs/cgroup/";
|
||||||
|
private static final String CPU_MAX_FILE = CGROUP_ROOT + "cpu.max";
|
||||||
|
private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset.cpus.effective";
|
||||||
|
private static final String CPU_STAT_FILE = CGROUP_ROOT + "cpu.stat";
|
||||||
|
private final OperatingRange operatingRange;
|
||||||
|
private final String cpuMaxFile;
|
||||||
|
private final String effectiveCpuSetFile;
|
||||||
|
private final String cpuStatFile;
|
||||||
|
private final Clock clock;
|
||||||
|
private double cpuLimit = -1;
|
||||||
|
private long lastCpuUseTimeMicros = 0;
|
||||||
|
private long lastSystemTimeMicros = 0;
|
||||||
|
|
||||||
|
public Cgroupv2CpuWorkerMetric(final OperatingRange operatingRange) {
|
||||||
|
this(operatingRange, CPU_MAX_FILE, EFFECTIVE_CPU_SET_FILE, CPU_STAT_FILE, Clock.systemUTC());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getShortName() {
|
||||||
|
return CPU_WORKER_METRICS_TYPE.getShortName();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public WorkerMetricValue capture() {
|
||||||
|
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateCpuUsage() {
|
||||||
|
if (cpuLimit == -1) {
|
||||||
|
cpuLimit = calculateCpuLimit();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The first line of this file is of the format
|
||||||
|
// usage_usec $MICROSECONDS
|
||||||
|
// where $MICROSECONDS is always a number
|
||||||
|
final String cpuUsageStat = readSingleLineFile(cpuStatFile);
|
||||||
|
final long cpuTimeMicros = Long.parseLong(cpuUsageStat.split(" ")[1]);
|
||||||
|
final long currentTimeMicros = TimeUnit.MILLISECONDS.toMicros(clock.millis());
|
||||||
|
|
||||||
|
boolean skip = false;
|
||||||
|
double cpuCoreTimeUsed;
|
||||||
|
synchronized (LOCK_OBJECT) {
|
||||||
|
if (lastCpuUseTimeMicros == 0 && lastSystemTimeMicros == 0) {
|
||||||
|
// Case where this is a first call so no diff available
|
||||||
|
skip = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
final long microTimeDiff = currentTimeMicros - lastSystemTimeMicros;
|
||||||
|
final long cpuUseDiff = cpuTimeMicros - lastCpuUseTimeMicros;
|
||||||
|
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
|
||||||
|
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
|
||||||
|
// that less than 1 CPU core was used.
|
||||||
|
cpuCoreTimeUsed = ((double) cpuUseDiff / microTimeDiff);
|
||||||
|
|
||||||
|
lastCpuUseTimeMicros = cpuTimeMicros;
|
||||||
|
lastSystemTimeMicros = currentTimeMicros;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skip) {
|
||||||
|
return 0D;
|
||||||
|
} else {
|
||||||
|
// In case of rounding error, treat everything above 100% as 100%
|
||||||
|
return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateCpuLimit() {
|
||||||
|
// This file contains two values separated by space ($MAX $PERIOD).
|
||||||
|
// $MAX is either a number or "max"
|
||||||
|
// $PERIOD is always a number
|
||||||
|
final String cpuMax = readSingleLineFile(cpuMaxFile);
|
||||||
|
final String[] cpuMaxArr = cpuMax.split(" ");
|
||||||
|
final String max = cpuMaxArr[0];
|
||||||
|
final String period = cpuMaxArr[1];
|
||||||
|
|
||||||
|
if (max.equals("max")) {
|
||||||
|
// if first value in file is "max", a limit is not set on the container. The container can use all available
|
||||||
|
// cores
|
||||||
|
return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile));
|
||||||
|
} else {
|
||||||
|
return Double.parseDouble(max) / Long.parseLong(period);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public OperatingRange getOperatingRange() {
|
||||||
|
return operatingRange;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public WorkerMetricType getWorkerMetricType() {
|
||||||
|
return CPU_WORKER_METRICS_TYPE;
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue