Merge 88ff519d51 into a159fa31fb
This commit is contained in:
commit
b80e6d15b9
206 changed files with 21374 additions and 3045 deletions
|
|
@ -21,16 +21,12 @@
|
|||
<parent>
|
||||
<artifactId>amazon-kinesis-client-pom</artifactId>
|
||||
<groupId>software.amazon.kinesis</groupId>
|
||||
<version>2.6.1-SNAPSHOT</version>
|
||||
<version>3.0.0</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>amazon-kinesis-client-multilang</artifactId>
|
||||
|
||||
<properties>
|
||||
<aws-java-sdk.version>1.12.668</aws-java-sdk.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>software.amazon.kinesis</groupId>
|
||||
|
|
@ -43,36 +39,10 @@
|
|||
<version>${awssdk.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.amazonaws</groupId>
|
||||
<artifactId>aws-java-sdk-core</artifactId>
|
||||
<version>${aws-java-sdk.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||
<artifactId>jackson-dataformat-cbor</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.amazonaws</groupId>
|
||||
<artifactId>aws-java-sdk-sts</artifactId>
|
||||
<version>${aws-java-sdk.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>1.18.24</version>
|
||||
<version>1.18.28</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
|
@ -104,6 +74,12 @@
|
|||
</dependency>
|
||||
|
||||
<!-- Test -->
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-api</artifactId>
|
||||
<version>5.11.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
|
@ -122,6 +98,13 @@
|
|||
<version>1.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- Using older version to be compatible with Java 8 -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<version>3.12.4</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
|||
|
|
@ -61,10 +61,10 @@ import software.amazon.kinesis.coordinator.Scheduler;
|
|||
* applicationName = PythonKCLSample
|
||||
*
|
||||
* # Users can change the credentials provider the KCL will use to retrieve credentials.
|
||||
* # The DefaultAWSCredentialsProviderChain checks several other providers, which is
|
||||
* # The DefaultCredentialsProvider checks several other providers, which is
|
||||
* # described here:
|
||||
* # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
|
||||
* AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
|
||||
* # https://sdk.amazonaws.com/java/api/2.0.0-preview-11/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
|
||||
* AwsCredentialsProvider = DefaultCredentialsProvider
|
||||
* </pre>
|
||||
*/
|
||||
@Slf4j
|
||||
|
|
@ -141,7 +141,7 @@ public class MultiLangDaemon {
|
|||
}
|
||||
}
|
||||
|
||||
String propertiesFile(final MultiLangDaemonArguments arguments) {
|
||||
String validateAndGetPropertiesFileName(final MultiLangDaemonArguments arguments) {
|
||||
String propertiesFile = "";
|
||||
|
||||
if (CollectionUtils.isNotEmpty(arguments.parameters)) {
|
||||
|
|
@ -216,9 +216,9 @@ public class MultiLangDaemon {
|
|||
MultiLangDaemonArguments arguments = new MultiLangDaemonArguments();
|
||||
JCommander jCommander = daemon.buildJCommanderAndParseArgs(arguments, args);
|
||||
try {
|
||||
String propertiesFile = daemon.propertiesFile(arguments);
|
||||
String propertiesFileName = daemon.validateAndGetPropertiesFileName(arguments);
|
||||
daemon.configureLogging(arguments.logConfiguration);
|
||||
MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFile);
|
||||
MultiLangDaemonConfig config = daemon.buildMultiLangDaemonConfig(propertiesFileName);
|
||||
|
||||
Scheduler scheduler = daemon.buildScheduler(config);
|
||||
MultiLangRunner runner = new MultiLangRunner(scheduler);
|
||||
|
|
|
|||
|
|
@ -15,13 +15,14 @@
|
|||
package software.amazon.kinesis.multilang;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.amazonaws.regions.Regions;
|
||||
import com.google.common.base.CaseFormat;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.regions.Region;
|
||||
|
||||
/**
|
||||
* Key-Value pairs which may be nested in, and extracted from, a property value
|
||||
|
|
@ -73,8 +74,13 @@ public enum NestedPropertyKey {
|
|||
* @see <a href="https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions">Available Regions</a>
|
||||
*/
|
||||
ENDPOINT_REGION {
|
||||
void visit(final NestedPropertyProcessor processor, final String region) {
|
||||
processor.acceptEndpointRegion(Regions.fromName(region));
|
||||
void visit(final NestedPropertyProcessor processor, final String regionName) {
|
||||
List<Region> validRegions = Region.regions();
|
||||
Region region = Region.of(regionName);
|
||||
if (!validRegions.contains(region)) {
|
||||
throw new IllegalArgumentException("Invalid region name: " + regionName);
|
||||
}
|
||||
processor.acceptEndpointRegion(region);
|
||||
}
|
||||
},
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
*/
|
||||
package software.amazon.kinesis.multilang;
|
||||
|
||||
import com.amazonaws.regions.Regions;
|
||||
import software.amazon.awssdk.regions.Region;
|
||||
|
||||
/**
|
||||
* Defines methods to process {@link NestedPropertyKey}s.
|
||||
|
|
@ -28,7 +28,7 @@ public interface NestedPropertyProcessor {
|
|||
* (e.g., https://sns.us-west-1.amazonaws.com, sns.us-west-1.amazonaws.com)
|
||||
* @param signingRegion the region to use for SigV4 signing of requests (e.g. us-west-1)
|
||||
*
|
||||
* @see #acceptEndpointRegion(Regions)
|
||||
* @see #acceptEndpointRegion(Region)
|
||||
* @see <a href="https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html">
|
||||
* AwsClientBuilder.EndpointConfiguration</a>
|
||||
*/
|
||||
|
|
@ -42,7 +42,7 @@ public interface NestedPropertyProcessor {
|
|||
*
|
||||
* @see #acceptEndpoint(String, String)
|
||||
*/
|
||||
void acceptEndpointRegion(Regions region);
|
||||
void acceptEndpointRegion(Region region);
|
||||
|
||||
/**
|
||||
* Set the external id, an optional field to designate who can assume an IAM role.
|
||||
|
|
|
|||
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Copyright 2023 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.multilang.auth;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.amazonaws.auth.AWSSessionCredentials;
|
||||
import com.amazonaws.auth.AWSSessionCredentialsProvider;
|
||||
import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider;
|
||||
import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider.Builder;
|
||||
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
|
||||
import com.amazonaws.regions.Regions;
|
||||
import com.amazonaws.services.securitytoken.AWSSecurityTokenService;
|
||||
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient;
|
||||
import software.amazon.kinesis.multilang.NestedPropertyKey;
|
||||
import software.amazon.kinesis.multilang.NestedPropertyProcessor;
|
||||
|
||||
/**
|
||||
* An {@link AWSSessionCredentialsProvider} that is backed by STSAssumeRole.
|
||||
*/
|
||||
public class KclSTSAssumeRoleSessionCredentialsProvider
|
||||
implements AWSSessionCredentialsProvider, NestedPropertyProcessor {
|
||||
|
||||
private final Builder builder;
|
||||
|
||||
private final STSAssumeRoleSessionCredentialsProvider provider;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param params vararg parameters which must include roleArn at index=0,
|
||||
* and roleSessionName at index=1
|
||||
*/
|
||||
public KclSTSAssumeRoleSessionCredentialsProvider(final String[] params) {
|
||||
this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length));
|
||||
}
|
||||
|
||||
public KclSTSAssumeRoleSessionCredentialsProvider(
|
||||
final String roleArn, final String roleSessionName, final String... params) {
|
||||
builder = new Builder(roleArn, roleSessionName);
|
||||
NestedPropertyKey.parse(this, params);
|
||||
provider = builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AWSSessionCredentials getCredentials() {
|
||||
return provider.getCredentials();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void acceptEndpoint(final String serviceEndpoint, final String signingRegion) {
|
||||
final EndpointConfiguration endpoint = new EndpointConfiguration(serviceEndpoint, signingRegion);
|
||||
final AWSSecurityTokenService stsClient = AWSSecurityTokenServiceClient.builder()
|
||||
.withEndpointConfiguration(endpoint)
|
||||
.build();
|
||||
builder.withStsClient(stsClient);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void acceptEndpointRegion(final Regions region) {
|
||||
final AWSSecurityTokenService stsClient =
|
||||
AWSSecurityTokenServiceClient.builder().withRegion(region).build();
|
||||
builder.withStsClient(stsClient);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void acceptExternalId(final String externalId) {
|
||||
builder.withExternalId(externalId);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
package software.amazon.kinesis.multilang.auth;
|
||||
|
||||
import java.net.URI;
|
||||
import java.util.Arrays;
|
||||
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||
import software.amazon.awssdk.regions.Region;
|
||||
import software.amazon.awssdk.services.sts.StsClient;
|
||||
import software.amazon.awssdk.services.sts.StsClientBuilder;
|
||||
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
|
||||
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest;
|
||||
import software.amazon.awssdk.services.sts.model.AssumeRoleRequest.Builder;
|
||||
import software.amazon.kinesis.multilang.NestedPropertyKey;
|
||||
import software.amazon.kinesis.multilang.NestedPropertyProcessor;
|
||||
|
||||
public class KclStsAssumeRoleCredentialsProvider implements AwsCredentialsProvider, NestedPropertyProcessor {
|
||||
private final Builder assumeRoleRequestBuilder;
|
||||
private final StsClientBuilder stsClientBuilder;
|
||||
private final StsAssumeRoleCredentialsProvider stsAssumeRoleCredentialsProvider;
|
||||
|
||||
public KclStsAssumeRoleCredentialsProvider(String[] params) {
|
||||
this(params[0], params[1], Arrays.copyOfRange(params, 2, params.length));
|
||||
}
|
||||
|
||||
public KclStsAssumeRoleCredentialsProvider(String roleArn, String roleSessionName, String... params) {
|
||||
this.assumeRoleRequestBuilder =
|
||||
AssumeRoleRequest.builder().roleArn(roleArn).roleSessionName(roleSessionName);
|
||||
this.stsClientBuilder = StsClient.builder();
|
||||
NestedPropertyKey.parse(this, params);
|
||||
this.stsAssumeRoleCredentialsProvider = StsAssumeRoleCredentialsProvider.builder()
|
||||
.refreshRequest(assumeRoleRequestBuilder.build())
|
||||
.asyncCredentialUpdateEnabled(true)
|
||||
.stsClient(stsClientBuilder.build())
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return stsAssumeRoleCredentialsProvider.resolveCredentials();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void acceptEndpoint(String serviceEndpoint, String signingRegion) {
|
||||
if (!serviceEndpoint.startsWith("http://") && !serviceEndpoint.startsWith("https://")) {
|
||||
serviceEndpoint = "https://" + serviceEndpoint;
|
||||
}
|
||||
stsClientBuilder.endpointOverride(URI.create(serviceEndpoint));
|
||||
stsClientBuilder.region(Region.of(signingRegion));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void acceptEndpointRegion(Region region) {
|
||||
stsClientBuilder.region(region);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void acceptExternalId(String externalId) {
|
||||
assumeRoleRequestBuilder.externalId(externalId);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,182 +0,0 @@
|
|||
/*
|
||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||
import com.amazonaws.auth.AWSCredentialsProviderChain;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Get AWSCredentialsProvider property.
|
||||
*/
|
||||
@Slf4j
|
||||
class AWSCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder<AWSCredentialsProvider> {
|
||||
private static final String LIST_DELIMITER = ",";
|
||||
private static final String ARG_DELIMITER = "|";
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
AWSCredentialsProviderPropertyValueDecoder() {}
|
||||
|
||||
/**
|
||||
* Get AWSCredentialsProvider property.
|
||||
*
|
||||
* @param value
|
||||
* property value as String
|
||||
* @return corresponding variable in correct type
|
||||
*/
|
||||
@Override
|
||||
public AWSCredentialsProvider decodeValue(String value) {
|
||||
if (value != null) {
|
||||
List<String> providerNames = getProviderNames(value);
|
||||
List<AWSCredentialsProvider> providers = getValidCredentialsProviders(providerNames);
|
||||
AWSCredentialsProvider[] ps = new AWSCredentialsProvider[providers.size()];
|
||||
providers.toArray(ps);
|
||||
return new AWSCredentialsProviderChain(providers);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Property AWSCredentialsProvider is missing.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return list of supported types
|
||||
*/
|
||||
@Override
|
||||
public List<Class<AWSCredentialsProvider>> getSupportedTypes() {
|
||||
return Collections.singletonList(AWSCredentialsProvider.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert string list to a list of valid credentials providers.
|
||||
*/
|
||||
private static List<AWSCredentialsProvider> getValidCredentialsProviders(List<String> providerNames) {
|
||||
List<AWSCredentialsProvider> credentialsProviders = new ArrayList<>();
|
||||
|
||||
for (String providerName : providerNames) {
|
||||
final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER);
|
||||
final Class<? extends AWSCredentialsProvider> clazz;
|
||||
try {
|
||||
final Class<?> c = Class.forName(nameAndArgs[0]);
|
||||
if (!AWSCredentialsProvider.class.isAssignableFrom(c)) {
|
||||
continue;
|
||||
}
|
||||
clazz = (Class<? extends AWSCredentialsProvider>) c;
|
||||
} catch (ClassNotFoundException cnfe) {
|
||||
// Providers are a product of prefixed Strings to cover multiple
|
||||
// namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }).
|
||||
// It's expected that many class names will not resolve.
|
||||
continue;
|
||||
}
|
||||
log.info("Attempting to construct {}", clazz);
|
||||
|
||||
AWSCredentialsProvider provider = null;
|
||||
if (nameAndArgs.length > 1) {
|
||||
final String[] varargs = Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length);
|
||||
|
||||
// attempt to invoke an explicit N-arg constructor of FooClass(String, String, ...)
|
||||
provider = constructProvider(providerName, () -> {
|
||||
Class<?>[] argTypes = new Class<?>[nameAndArgs.length - 1];
|
||||
Arrays.fill(argTypes, String.class);
|
||||
return clazz.getConstructor(argTypes).newInstance(varargs);
|
||||
});
|
||||
|
||||
if (provider == null) {
|
||||
// attempt to invoke a public varargs/array constructor of FooClass(String[])
|
||||
provider = constructProvider(providerName, () -> clazz.getConstructor(String[].class)
|
||||
.newInstance((Object) varargs));
|
||||
}
|
||||
}
|
||||
|
||||
if (provider == null) {
|
||||
// regardless of parameters, fallback to invoke a public no-arg constructor
|
||||
provider = constructProvider(providerName, clazz::newInstance);
|
||||
}
|
||||
|
||||
if (provider != null) {
|
||||
credentialsProviders.add(provider);
|
||||
}
|
||||
}
|
||||
return credentialsProviders;
|
||||
}
|
||||
|
||||
private static List<String> getProviderNames(String property) {
|
||||
// assume list delimiter is ","
|
||||
String[] elements = property.split(LIST_DELIMITER);
|
||||
List<String> result = new ArrayList<>();
|
||||
for (int i = 0; i < elements.length; i++) {
|
||||
String string = elements[i].trim();
|
||||
if (!string.isEmpty()) {
|
||||
// find all possible names and add them to name list
|
||||
result.addAll(getPossibleFullClassNames(string));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<String> getPossibleFullClassNames(final String provider) {
|
||||
return Stream.of(
|
||||
// Customer provides a short name of common providers in com.amazonaws.auth package
|
||||
// (e.g., any classes implementing the AWSCredentialsProvider interface)
|
||||
// @see
|
||||
// http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/AWSCredentialsProvider.html
|
||||
"com.amazonaws.auth.",
|
||||
|
||||
// Customer provides a short name of a provider offered by this multi-lang package
|
||||
"software.amazon.kinesis.multilang.auth.",
|
||||
|
||||
// Customer provides a fully-qualified provider name, or a custom credentials provider
|
||||
// (e.g., com.amazonaws.auth.ClasspathFileCredentialsProvider, org.mycompany.FooProvider)
|
||||
"")
|
||||
.map(prefix -> prefix + provider)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
private interface CredentialsProviderConstructor<T extends AWSCredentialsProvider> {
|
||||
T construct()
|
||||
throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to construct an {@link AWSCredentialsProvider}.
|
||||
*
|
||||
* @param providerName Raw, unmodified provider name. Should there be an
|
||||
* Exeception during construction, this parameter will be logged.
|
||||
* @param constructor supplier-like function that will perform the construction
|
||||
* @return the constructed provider, if successful; otherwise, null
|
||||
*
|
||||
* @param <T> type of the CredentialsProvider to construct
|
||||
*/
|
||||
private static <T extends AWSCredentialsProvider> T constructProvider(
|
||||
final String providerName, final CredentialsProviderConstructor<T> constructor) {
|
||||
try {
|
||||
return constructor.construct();
|
||||
} catch (NoSuchMethodException ignored) {
|
||||
// ignore
|
||||
} catch (IllegalAccessException | InstantiationException | InvocationTargetException | RuntimeException e) {
|
||||
log.warn("Failed to construct {}", providerName, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,261 @@
|
|||
/*
|
||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain;
|
||||
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
|
||||
import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider;
|
||||
|
||||
/**
|
||||
* Get AwsCredentialsProvider property.
|
||||
*/
|
||||
@Slf4j
|
||||
class AwsCredentialsProviderPropertyValueDecoder implements IPropertyValueDecoder<AwsCredentialsProvider> {
|
||||
private static final String LIST_DELIMITER = ",";
|
||||
private static final String ARG_DELIMITER = "|";
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
AwsCredentialsProviderPropertyValueDecoder() {}
|
||||
|
||||
/**
|
||||
* Get AwsCredentialsProvider property.
|
||||
*
|
||||
* @param value
|
||||
* property value as String
|
||||
* @return corresponding variable in correct type
|
||||
*/
|
||||
@Override
|
||||
public AwsCredentialsProvider decodeValue(String value) {
|
||||
if (value != null) {
|
||||
List<String> providerNames = getProviderNames(value);
|
||||
List<AwsCredentialsProvider> providers = getValidCredentialsProviders(providerNames);
|
||||
AwsCredentialsProvider[] ps = new AwsCredentialsProvider[providers.size()];
|
||||
providers.toArray(ps);
|
||||
if (providers.isEmpty()) {
|
||||
log.warn("Unable to construct any provider with name {}", value);
|
||||
log.warn("Please verify that all AwsCredentialsProvider properties are passed correctly");
|
||||
}
|
||||
return AwsCredentialsProviderChain.builder()
|
||||
.credentialsProviders(providers)
|
||||
.build();
|
||||
} else {
|
||||
throw new IllegalArgumentException("Property AwsCredentialsProvider is missing.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return list of supported types
|
||||
*/
|
||||
@Override
|
||||
public List<Class<AwsCredentialsProvider>> getSupportedTypes() {
|
||||
return Collections.singletonList(AwsCredentialsProvider.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert string list to a list of valid credentials providers.
|
||||
*/
|
||||
private static List<AwsCredentialsProvider> getValidCredentialsProviders(List<String> providerNames) {
|
||||
List<AwsCredentialsProvider> credentialsProviders = new ArrayList<>();
|
||||
|
||||
for (String providerName : providerNames) {
|
||||
final String[] nameAndArgs = providerName.split("\\" + ARG_DELIMITER);
|
||||
final Class<? extends AwsCredentialsProvider> clazz = getClass(nameAndArgs[0]);
|
||||
if (clazz == null) {
|
||||
continue;
|
||||
}
|
||||
log.info("Attempting to construct {}", clazz);
|
||||
final String[] varargs =
|
||||
nameAndArgs.length > 1 ? Arrays.copyOfRange(nameAndArgs, 1, nameAndArgs.length) : new String[0];
|
||||
AwsCredentialsProvider provider = tryConstructor(providerName, clazz, varargs);
|
||||
if (provider == null) {
|
||||
provider = tryCreate(providerName, clazz, varargs);
|
||||
}
|
||||
if (provider != null) {
|
||||
log.info("Provider constructed successfully: {}", provider);
|
||||
credentialsProviders.add(provider);
|
||||
}
|
||||
}
|
||||
return credentialsProviders;
|
||||
}
|
||||
|
||||
private static AwsCredentialsProvider tryConstructor(
|
||||
String providerName, Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||
AwsCredentialsProvider provider =
|
||||
constructProvider(providerName, () -> getConstructorWithVarArgs(clazz, varargs));
|
||||
if (provider == null) {
|
||||
provider = constructProvider(providerName, () -> getConstructorWithArgs(clazz, varargs));
|
||||
}
|
||||
if (provider == null) {
|
||||
provider = constructProvider(providerName, clazz::newInstance);
|
||||
}
|
||||
return provider;
|
||||
}
|
||||
|
||||
private static AwsCredentialsProvider tryCreate(
|
||||
String providerName, Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||
AwsCredentialsProvider provider =
|
||||
constructProvider(providerName, () -> getCreateMethod(clazz, (Object) varargs));
|
||||
if (provider == null) {
|
||||
provider = constructProvider(providerName, () -> getCreateMethod(clazz, varargs));
|
||||
}
|
||||
if (provider == null) {
|
||||
provider = constructProvider(providerName, () -> getCreateMethod(clazz));
|
||||
}
|
||||
return provider;
|
||||
}
|
||||
|
||||
private static AwsCredentialsProvider getConstructorWithVarArgs(
|
||||
Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||
try {
|
||||
return clazz.getConstructor(String[].class).newInstance((Object) varargs);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static AwsCredentialsProvider getConstructorWithArgs(
|
||||
Class<? extends AwsCredentialsProvider> clazz, String[] varargs) {
|
||||
try {
|
||||
Class<?>[] argTypes = new Class<?>[varargs.length];
|
||||
Arrays.fill(argTypes, String.class);
|
||||
return clazz.getConstructor(argTypes).newInstance((Object[]) varargs);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static AwsCredentialsProvider getCreateMethod(
|
||||
Class<? extends AwsCredentialsProvider> clazz, Object... args) {
|
||||
try {
|
||||
Class<?>[] argTypes = new Class<?>[args.length];
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
argTypes[i] = args[i].getClass();
|
||||
}
|
||||
Method createMethod = clazz.getDeclaredMethod("create", argTypes);
|
||||
if (Modifier.isStatic(createMethod.getModifiers())) {
|
||||
return clazz.cast(createMethod.invoke(null, args));
|
||||
} else {
|
||||
log.warn("Found non-static create() method in {}", clazz.getName());
|
||||
}
|
||||
} catch (NoSuchMethodException e) {
|
||||
// No matching create method found for class
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to invoke create() method in {}", clazz.getName(), e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the class for the given provider name.
|
||||
*
|
||||
* @param providerName A string containing the provider name.
|
||||
*
|
||||
* @return The Class object representing the resolved AwsCredentialsProvider implementation,
|
||||
* or null if the class cannot be resolved or does not extend AwsCredentialsProvider.
|
||||
*/
|
||||
private static Class<? extends AwsCredentialsProvider> getClass(String providerName) {
|
||||
// Convert any form of StsAssumeRoleCredentialsProvider string to KclStsAssumeRoleCredentialsProvider
|
||||
if (providerName.equals(StsAssumeRoleCredentialsProvider.class.getSimpleName())
|
||||
|| providerName.equals(StsAssumeRoleCredentialsProvider.class.getName())) {
|
||||
providerName = KclStsAssumeRoleCredentialsProvider.class.getName();
|
||||
}
|
||||
try {
|
||||
final Class<?> c = Class.forName(providerName);
|
||||
if (!AwsCredentialsProvider.class.isAssignableFrom(c)) {
|
||||
return null;
|
||||
}
|
||||
return (Class<? extends AwsCredentialsProvider>) c;
|
||||
} catch (ClassNotFoundException cnfe) {
|
||||
// Providers are a product of prefixed Strings to cover multiple
|
||||
// namespaces (e.g., "Foo" -> { "some.auth.Foo", "kcl.auth.Foo" }).
|
||||
// It's expected that many class names will not resolve.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static List<String> getProviderNames(String property) {
|
||||
// assume list delimiter is ","
|
||||
String[] elements = property.split(LIST_DELIMITER);
|
||||
List<String> result = new ArrayList<>();
|
||||
for (int i = 0; i < elements.length; i++) {
|
||||
String string = elements[i].trim();
|
||||
if (!string.isEmpty()) {
|
||||
// find all possible names and add them to name list
|
||||
result.addAll(getPossibleFullClassNames(string));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<String> getPossibleFullClassNames(final String provider) {
|
||||
return Stream.of(
|
||||
// Customer provides a short name of a provider offered by this multi-lang package
|
||||
"software.amazon.kinesis.multilang.auth.",
|
||||
// Customer provides a short name of common providers in software.amazon.awssdk.auth.credentials
|
||||
// package (e.g., any classes implementing the AwsCredentialsProvider interface)
|
||||
// @see
|
||||
// https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.html
|
||||
"software.amazon.awssdk.auth.credentials.",
|
||||
// Customer provides a fully-qualified provider name, or a custom credentials provider
|
||||
// (e.g., org.mycompany.FooProvider)
|
||||
"")
|
||||
.map(prefix -> prefix + provider)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
private interface CredentialsProviderConstructor<T extends AwsCredentialsProvider> {
|
||||
T construct()
|
||||
throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to construct an {@link AwsCredentialsProvider}.
|
||||
*
|
||||
* @param providerName Raw, unmodified provider name. Should there be an
|
||||
* Exception during construction, this parameter will be logged.
|
||||
* @param constructor supplier-like function that will perform the construction
|
||||
* @return the constructed provider, if successful; otherwise, null
|
||||
*
|
||||
* @param <T> type of the CredentialsProvider to construct
|
||||
*/
|
||||
private static <T extends AwsCredentialsProvider> T constructProvider(
|
||||
final String providerName, final CredentialsProviderConstructor<T> constructor) {
|
||||
try {
|
||||
return constructor.construct();
|
||||
} catch (NoSuchMethodException
|
||||
| IllegalAccessException
|
||||
| InstantiationException
|
||||
| InvocationTargetException
|
||||
| RuntimeException ignored) {
|
||||
// ignore
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class CoordinatorStateTableConfigBean {
|
||||
|
||||
interface CoordinatorStateConfigBeanDelegate {
|
||||
String getCoordinatorStateTableName();
|
||||
|
||||
void setCoordinatorStateTableName(String value);
|
||||
|
||||
BillingMode getCoordinatorStateBillingMode();
|
||||
|
||||
void setCoordinatorStateBillingMode(BillingMode value);
|
||||
|
||||
long getCoordinatorStateReadCapacity();
|
||||
|
||||
void setCoordinatorStateReadCapacity(long value);
|
||||
|
||||
long getCoordinatorStateWriteCapacity();
|
||||
|
||||
void setCoordinatorStateWriteCapacity(long value);
|
||||
}
|
||||
|
||||
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "tableName")
|
||||
private String coordinatorStateTableName;
|
||||
|
||||
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "billingMode")
|
||||
private BillingMode coordinatorStateBillingMode;
|
||||
|
||||
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "readCapacity")
|
||||
private long coordinatorStateReadCapacity;
|
||||
|
||||
@ConfigurationSettable(configurationClass = CoordinatorStateTableConfig.class, methodName = "writeCapacity")
|
||||
private long coordinatorStateWriteCapacity;
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class GracefulLeaseHandoffConfigBean {
|
||||
|
||||
interface GracefulLeaseHandoffConfigBeanDelegate {
|
||||
Long getGracefulLeaseHandoffTimeoutMillis();
|
||||
|
||||
void setGracefulLeaseHandoffTimeoutMillis(Long value);
|
||||
|
||||
Boolean getIsGracefulLeaseHandoffEnabled();
|
||||
|
||||
void setIsGracefulLeaseHandoffEnabled(Boolean value);
|
||||
}
|
||||
|
||||
@ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class)
|
||||
private Long gracefulLeaseHandoffTimeoutMillis;
|
||||
|
||||
@ConfigurationSettable(configurationClass = LeaseManagementConfig.GracefulLeaseHandoffConfig.class)
|
||||
private Boolean isGracefulLeaseHandoffEnabled;
|
||||
}
|
||||
|
|
@ -28,7 +28,7 @@ import software.amazon.kinesis.common.StreamIdentifier;
|
|||
|
||||
/**
|
||||
* KinesisClientLibConfigurator constructs a KinesisClientLibConfiguration from java properties file. The following
|
||||
* three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AWSCredentialsProvider"
|
||||
* three properties must be provided. 1) "applicationName" 2) "streamName" 3) "AwsCredentialsProvider"
|
||||
* KinesisClientLibConfigurator will help to automatically assign the value of "workerId" if this property is not
|
||||
* provided. In the specified properties file, any properties, which matches the variable name in
|
||||
* KinesisClientLibConfiguration and has a corresponding "with{variableName}" setter method, will be read in, and its
|
||||
|
|
@ -62,7 +62,7 @@ public class KinesisClientLibConfigurator {
|
|||
properties.entrySet().forEach(e -> {
|
||||
try {
|
||||
log.info("Processing (key={}, value={})", e.getKey(), e.getValue());
|
||||
utilsBean.setProperty(configuration, (String) e.getKey(), e.getValue());
|
||||
utilsBean.setProperty(configuration, processKey((String) e.getKey()), e.getValue());
|
||||
} catch (IllegalAccessException | InvocationTargetException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
|
@ -110,4 +110,17 @@ public class KinesisClientLibConfigurator {
|
|||
}
|
||||
return getConfiguration(properties);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a configuration key to normalize AWS credentials provider naming. Necessary to conform to
|
||||
* autogenerated setters.
|
||||
* @param key the config param key
|
||||
* @return case-configured param key name
|
||||
*/
|
||||
String processKey(String key) {
|
||||
if (key.toLowerCase().startsWith("awscredentialsprovider")) {
|
||||
key = key.replaceAll("(?i)awscredentialsprovider", "awsCredentialsProvider");
|
||||
}
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ package software.amazon.kinesis.multilang.config;
|
|||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.net.URI;
|
||||
import java.time.Duration;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
|
|
@ -41,6 +42,7 @@ import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
|||
import software.amazon.awssdk.regions.Region;
|
||||
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClientBuilder;
|
||||
import software.amazon.kinesis.checkpoint.CheckpointConfig;
|
||||
|
|
@ -55,7 +57,7 @@ import software.amazon.kinesis.leases.ShardPrioritization;
|
|||
import software.amazon.kinesis.lifecycle.LifecycleConfig;
|
||||
import software.amazon.kinesis.metrics.MetricsConfig;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.multilang.config.credentials.V2CredentialWrapper;
|
||||
import software.amazon.kinesis.multilang.config.converter.DurationConverter;
|
||||
import software.amazon.kinesis.processor.ProcessorConfig;
|
||||
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
||||
import software.amazon.kinesis.retrieval.RetrievalConfig;
|
||||
|
|
@ -156,6 +158,9 @@ public class MultiLangDaemonConfiguration {
|
|||
@ConfigurationSettable(configurationClass = CoordinatorConfig.class)
|
||||
private long schedulerInitializationBackoffTimeMillis;
|
||||
|
||||
@ConfigurationSettable(configurationClass = CoordinatorConfig.class)
|
||||
private CoordinatorConfig.ClientVersionConfig clientVersionConfig;
|
||||
|
||||
@ConfigurationSettable(configurationClass = LifecycleConfig.class)
|
||||
private long taskBackoffTimeMillis;
|
||||
|
||||
|
|
@ -189,6 +194,22 @@ public class MultiLangDaemonConfiguration {
|
|||
@Delegate(types = PollingConfigBean.PollingConfigBeanDelegate.class)
|
||||
private final PollingConfigBean pollingConfig = new PollingConfigBean();
|
||||
|
||||
@Delegate(types = GracefulLeaseHandoffConfigBean.GracefulLeaseHandoffConfigBeanDelegate.class)
|
||||
private final GracefulLeaseHandoffConfigBean gracefulLeaseHandoffConfigBean = new GracefulLeaseHandoffConfigBean();
|
||||
|
||||
@Delegate(
|
||||
types = WorkerUtilizationAwareAssignmentConfigBean.WorkerUtilizationAwareAssignmentConfigBeanDelegate.class)
|
||||
private final WorkerUtilizationAwareAssignmentConfigBean workerUtilizationAwareAssignmentConfigBean =
|
||||
new WorkerUtilizationAwareAssignmentConfigBean();
|
||||
|
||||
@Delegate(types = WorkerMetricStatsTableConfigBean.WorkerMetricsTableConfigBeanDelegate.class)
|
||||
private final WorkerMetricStatsTableConfigBean workerMetricStatsTableConfigBean =
|
||||
new WorkerMetricStatsTableConfigBean();
|
||||
|
||||
@Delegate(types = CoordinatorStateTableConfigBean.CoordinatorStateConfigBeanDelegate.class)
|
||||
private final CoordinatorStateTableConfigBean coordinatorStateTableConfigBean =
|
||||
new CoordinatorStateTableConfigBean();
|
||||
|
||||
private boolean validateSequenceNumberBeforeCheckpointing;
|
||||
|
||||
private long shutdownGraceMillis;
|
||||
|
|
@ -196,19 +217,19 @@ public class MultiLangDaemonConfiguration {
|
|||
|
||||
private final BuilderDynaBean kinesisCredentialsProvider;
|
||||
|
||||
public void setAWSCredentialsProvider(String providerString) {
|
||||
public void setAwsCredentialsProvider(String providerString) {
|
||||
kinesisCredentialsProvider.set("", providerString);
|
||||
}
|
||||
|
||||
private final BuilderDynaBean dynamoDBCredentialsProvider;
|
||||
|
||||
public void setAWSCredentialsProviderDynamoDB(String providerString) {
|
||||
public void setAwsCredentialsProviderDynamoDB(String providerString) {
|
||||
dynamoDBCredentialsProvider.set("", providerString);
|
||||
}
|
||||
|
||||
private final BuilderDynaBean cloudWatchCredentialsProvider;
|
||||
|
||||
public void setAWSCredentialsProviderCloudWatch(String providerString) {
|
||||
public void setAwsCredentialsProviderCloudWatch(String providerString) {
|
||||
cloudWatchCredentialsProvider.set("", providerString);
|
||||
}
|
||||
|
||||
|
|
@ -252,6 +273,25 @@ public class MultiLangDaemonConfiguration {
|
|||
},
|
||||
InitialPositionInStream.class);
|
||||
|
||||
convertUtilsBean.register(
|
||||
new Converter() {
|
||||
@Override
|
||||
public <T> T convert(Class<T> type, Object value) {
|
||||
return type.cast(CoordinatorConfig.ClientVersionConfig.valueOf(
|
||||
value.toString().toUpperCase()));
|
||||
}
|
||||
},
|
||||
CoordinatorConfig.ClientVersionConfig.class);
|
||||
|
||||
convertUtilsBean.register(
|
||||
new Converter() {
|
||||
@Override
|
||||
public <T> T convert(Class<T> type, Object value) {
|
||||
return type.cast(BillingMode.valueOf(value.toString().toUpperCase()));
|
||||
}
|
||||
},
|
||||
BillingMode.class);
|
||||
|
||||
convertUtilsBean.register(
|
||||
new Converter() {
|
||||
@Override
|
||||
|
|
@ -279,12 +319,14 @@ public class MultiLangDaemonConfiguration {
|
|||
},
|
||||
Region.class);
|
||||
|
||||
convertUtilsBean.register(new DurationConverter(), Duration.class);
|
||||
|
||||
ArrayConverter arrayConverter = new ArrayConverter(String[].class, new StringConverter());
|
||||
arrayConverter.setDelimiter(',');
|
||||
convertUtilsBean.register(arrayConverter, String[].class);
|
||||
AWSCredentialsProviderPropertyValueDecoder oldCredentialsDecoder =
|
||||
new AWSCredentialsProviderPropertyValueDecoder();
|
||||
Function<String, ?> converter = s -> new V2CredentialWrapper(oldCredentialsDecoder.decodeValue(s));
|
||||
AwsCredentialsProviderPropertyValueDecoder credentialsDecoder =
|
||||
new AwsCredentialsProviderPropertyValueDecoder();
|
||||
Function<String, ?> converter = credentialsDecoder::decodeValue;
|
||||
|
||||
this.kinesisCredentialsProvider = new BuilderDynaBean(
|
||||
AwsCredentialsProvider.class, convertUtilsBean, converter, CREDENTIALS_DEFAULT_SEARCH_PATH);
|
||||
|
|
@ -370,6 +412,22 @@ public class MultiLangDaemonConfiguration {
|
|||
retrievalMode.builder(this).build(configsBuilder.kinesisClient(), this));
|
||||
}
|
||||
|
||||
private void handleCoordinatorConfig(CoordinatorConfig coordinatorConfig) {
|
||||
ConfigurationSettableUtils.resolveFields(
|
||||
this.coordinatorStateTableConfigBean, coordinatorConfig.coordinatorStateTableConfig());
|
||||
}
|
||||
|
||||
private void handleLeaseManagementConfig(LeaseManagementConfig leaseManagementConfig) {
|
||||
ConfigurationSettableUtils.resolveFields(
|
||||
this.gracefulLeaseHandoffConfigBean, leaseManagementConfig.gracefulLeaseHandoffConfig());
|
||||
ConfigurationSettableUtils.resolveFields(
|
||||
this.workerUtilizationAwareAssignmentConfigBean,
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
|
||||
ConfigurationSettableUtils.resolveFields(
|
||||
this.workerMetricStatsTableConfigBean,
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig());
|
||||
}
|
||||
|
||||
private Object adjustKinesisHttpConfiguration(Object builderObj) {
|
||||
if (builderObj instanceof KinesisAsyncClientBuilder) {
|
||||
KinesisAsyncClientBuilder builder = (KinesisAsyncClientBuilder) builderObj;
|
||||
|
|
@ -448,6 +506,8 @@ public class MultiLangDaemonConfiguration {
|
|||
processorConfig,
|
||||
retrievalConfig);
|
||||
|
||||
handleCoordinatorConfig(coordinatorConfig);
|
||||
handleLeaseManagementConfig(leaseManagementConfig);
|
||||
handleRetrievalConfig(retrievalConfig, configsBuilder);
|
||||
|
||||
resolveFields(configObjects, null, new HashSet<>(Arrays.asList(ConfigsBuilder.class, PollingConfig.class)));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class WorkerMetricStatsTableConfigBean {
|
||||
|
||||
interface WorkerMetricsTableConfigBeanDelegate {
|
||||
String getWorkerMetricsTableName();
|
||||
|
||||
void setWorkerMetricsTableName(String value);
|
||||
|
||||
BillingMode getWorkerMetricsBillingMode();
|
||||
|
||||
void setWorkerMetricsBillingMode(BillingMode value);
|
||||
|
||||
long getWorkerMetricsReadCapacity();
|
||||
|
||||
void setWorkerMetricsReadCapacity(long value);
|
||||
|
||||
long getWorkerMetricsWriteCapacity();
|
||||
|
||||
void setWorkerMetricsWriteCapacity(long value);
|
||||
}
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "tableName")
|
||||
private String workerMetricsTableName;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "billingMode")
|
||||
private BillingMode workerMetricsBillingMode;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "readCapacity")
|
||||
private long workerMetricsReadCapacity;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerMetricsTableConfig.class, methodName = "writeCapacity")
|
||||
private long workerMetricsWriteCapacity;
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class WorkerUtilizationAwareAssignmentConfigBean {
|
||||
|
||||
interface WorkerUtilizationAwareAssignmentConfigBeanDelegate {
|
||||
long getInMemoryWorkerMetricsCaptureFrequencyMillis();
|
||||
|
||||
void setInMemoryWorkerMetricsCaptureFrequencyMillis(long value);
|
||||
|
||||
long getWorkerMetricsReporterFreqInMillis();
|
||||
|
||||
void setWorkerMetricsReporterFreqInMillis(long value);
|
||||
|
||||
int getNoOfPersistedMetricsPerWorkerMetrics();
|
||||
|
||||
void setNoOfPersistedMetricsPerWorkerMetrics(int value);
|
||||
|
||||
Boolean getDisableWorkerMetrics();
|
||||
|
||||
void setDisableWorkerMetrics(Boolean value);
|
||||
|
||||
double getMaxThroughputPerHostKBps();
|
||||
|
||||
void setMaxThroughputPerHostKBps(double value);
|
||||
|
||||
int getDampeningPercentage();
|
||||
|
||||
void setDampeningPercentage(int value);
|
||||
|
||||
int getReBalanceThresholdPercentage();
|
||||
|
||||
void setReBalanceThresholdPercentage(int value);
|
||||
|
||||
Boolean getAllowThroughputOvershoot();
|
||||
|
||||
void setAllowThroughputOvershoot(Boolean value);
|
||||
|
||||
int getVarianceBalancingFrequency();
|
||||
|
||||
void setVarianceBalancingFrequency(int value);
|
||||
|
||||
double getWorkerMetricsEMAAlpha();
|
||||
|
||||
void setWorkerMetricsEMAAlpha(double value);
|
||||
|
||||
void setStaleWorkerMetricsEntryCleanupDuration(Duration value);
|
||||
|
||||
Duration getStaleWorkerMetricsEntryCleanupDuration();
|
||||
}
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private long inMemoryWorkerMetricsCaptureFrequencyMillis;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private long workerMetricsReporterFreqInMillis;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private int noOfPersistedMetricsPerWorkerMetrics;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private Boolean disableWorkerMetrics;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private double maxThroughputPerHostKBps;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private int dampeningPercentage;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private int reBalanceThresholdPercentage;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private Boolean allowThroughputOvershoot;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private int varianceBalancingFrequency;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private double workerMetricsEMAAlpha;
|
||||
|
||||
@ConfigurationSettable(configurationClass = WorkerUtilizationAwareAssignmentConfig.class)
|
||||
private Duration staleWorkerMetricsEntryCleanupDuration;
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
package software.amazon.kinesis.multilang.config.converter;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
import org.apache.commons.beanutils.Converter;
|
||||
|
||||
/**
|
||||
* Converter that converts Duration text representation to a Duration object.
|
||||
* Refer to {@code Duration.parse} javadocs for the exact text representation.
|
||||
*/
|
||||
public class DurationConverter implements Converter {
|
||||
|
||||
@Override
|
||||
public <T> T convert(Class<T> type, Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (type != Duration.class) {
|
||||
throw new ConversionException("Can only convert to Duration");
|
||||
}
|
||||
|
||||
String durationString = value.toString().trim();
|
||||
final Duration duration = Duration.parse(durationString);
|
||||
if (duration.isNegative()) {
|
||||
throw new ConversionException("Negative values are not permitted for duration: " + durationString);
|
||||
}
|
||||
|
||||
return type.cast(duration);
|
||||
}
|
||||
|
||||
public static class ConversionException extends RuntimeException {
|
||||
public ConversionException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package software.amazon.kinesis.multilang.config.credentials;
|
||||
|
||||
import com.amazonaws.auth.AWSCredentials;
|
||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||
import com.amazonaws.auth.AWSSessionCredentials;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public class V2CredentialWrapper implements AwsCredentialsProvider {
|
||||
|
||||
private final AWSCredentialsProvider oldCredentialsProvider;
|
||||
|
||||
@Override
|
||||
public AwsCredentials resolveCredentials() {
|
||||
AWSCredentials current = oldCredentialsProvider.getCredentials();
|
||||
if (current instanceof AWSSessionCredentials) {
|
||||
return AwsSessionCredentials.create(
|
||||
current.getAWSAccessKeyId(),
|
||||
current.getAWSSecretKey(),
|
||||
((AWSSessionCredentials) current).getSessionToken());
|
||||
}
|
||||
return new AwsCredentials() {
|
||||
@Override
|
||||
public String accessKeyId() {
|
||||
return current.getAWSAccessKeyId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String secretAccessKey() {
|
||||
return current.getAWSSecretKey();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -65,7 +65,7 @@ public class MultiLangDaemonConfigTest {
|
|||
String properties = String.format(
|
||||
"executableName = %s\n"
|
||||
+ "applicationName = %s\n"
|
||||
+ "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n"
|
||||
+ "AwsCredentialsProvider = DefaultCredentialsProvider\n"
|
||||
+ "processingLanguage = malbolge\n"
|
||||
+ "regionName = %s\n",
|
||||
EXE, APPLICATION_NAME, "us-east-1");
|
||||
|
|
@ -182,7 +182,7 @@ public class MultiLangDaemonConfigTest {
|
|||
@Test
|
||||
public void testPropertyValidation() {
|
||||
String propertiesNoExecutableName = "applicationName = testApp \n" + "streamName = fakeStream \n"
|
||||
+ "AWSCredentialsProvider = DefaultAWSCredentialsProviderChain\n" + "processingLanguage = malbolge";
|
||||
+ "AwsCredentialsProvider = DefaultCredentialsProvider\n" + "processingLanguage = malbolge";
|
||||
ClassLoader classLoader = Mockito.mock(ClassLoader.class);
|
||||
|
||||
Mockito.doReturn(new ByteArrayInputStream(propertiesNoExecutableName.getBytes()))
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ public class MultiLangDaemonTest {
|
|||
|
||||
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
||||
|
||||
daemon.propertiesFile(arguments);
|
||||
daemon.validateAndGetPropertiesFileName(arguments);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -166,7 +166,7 @@ public class MultiLangDaemonTest {
|
|||
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
||||
arguments.parameters = Collections.singletonList(expectedPropertiesFile);
|
||||
|
||||
String propertiesFile = daemon.propertiesFile(arguments);
|
||||
String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments);
|
||||
|
||||
assertThat(propertiesFile, equalTo(expectedPropertiesFile));
|
||||
}
|
||||
|
|
@ -180,7 +180,7 @@ public class MultiLangDaemonTest {
|
|||
arguments.parameters = Collections.singletonList(propertiesArgument);
|
||||
arguments.propertiesFile = propertiesOptions;
|
||||
|
||||
String propertiesFile = daemon.propertiesFile(arguments);
|
||||
String propertiesFile = daemon.validateAndGetPropertiesFileName(arguments);
|
||||
|
||||
assertThat(propertiesFile, equalTo(propertiesOptions));
|
||||
}
|
||||
|
|
@ -193,7 +193,7 @@ public class MultiLangDaemonTest {
|
|||
MultiLangDaemon.MultiLangDaemonArguments arguments = new MultiLangDaemon.MultiLangDaemonArguments();
|
||||
arguments.parameters = Arrays.asList("parameter1", "parameter2");
|
||||
|
||||
daemon.propertiesFile(arguments);
|
||||
daemon.validateAndGetPropertiesFileName(arguments);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@
|
|||
*/
|
||||
package software.amazon.kinesis.multilang;
|
||||
|
||||
import com.amazonaws.regions.Regions;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.runners.MockitoJUnitRunner;
|
||||
import software.amazon.awssdk.regions.Region;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.mockito.Mockito.verify;
|
||||
|
|
@ -64,9 +64,9 @@ public class NestedPropertyKeyTest {
|
|||
|
||||
@Test
|
||||
public void testEndpointRegion() {
|
||||
final Regions expectedRegion = Regions.GovCloud;
|
||||
final Region expectedRegion = Region.US_GOV_WEST_1;
|
||||
|
||||
parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.getName()));
|
||||
parse(mockProcessor, createKey(ENDPOINT_REGION, expectedRegion.id()));
|
||||
verify(mockProcessor).acceptEndpointRegion(expectedRegion);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,15 +31,14 @@ public class KclSTSAssumeRoleSessionCredentialsProviderTest {
|
|||
*/
|
||||
@Test
|
||||
public void testConstructorWithoutOptionalParams() {
|
||||
new KclSTSAssumeRoleSessionCredentialsProvider(new String[] {ARN, SESSION_NAME});
|
||||
new KclStsAssumeRoleCredentialsProvider(new String[] {ARN, SESSION_NAME});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAcceptEndpoint() {
|
||||
// discovered exception during e2e testing; therefore, this test is
|
||||
// to simply verify the constructed STS client doesn't go *boom*
|
||||
final KclSTSAssumeRoleSessionCredentialsProvider provider =
|
||||
new KclSTSAssumeRoleSessionCredentialsProvider(ARN, SESSION_NAME);
|
||||
final KclStsAssumeRoleCredentialsProvider provider = new KclStsAssumeRoleCredentialsProvider(ARN, SESSION_NAME);
|
||||
provider.acceptEndpoint("endpoint", "us-east-1");
|
||||
}
|
||||
|
||||
|
|
@ -53,7 +52,7 @@ public class KclSTSAssumeRoleSessionCredentialsProviderTest {
|
|||
}
|
||||
}
|
||||
|
||||
private static class VarArgsSpy extends KclSTSAssumeRoleSessionCredentialsProvider {
|
||||
private static class VarArgsSpy extends KclStsAssumeRoleCredentialsProvider {
|
||||
|
||||
private String externalId;
|
||||
|
||||
|
|
|
|||
|
|
@ -16,16 +16,17 @@ package software.amazon.kinesis.multilang.config;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.amazonaws.auth.AWSCredentials;
|
||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||
import com.amazonaws.auth.AWSCredentialsProviderChain;
|
||||
import com.amazonaws.auth.BasicAWSCredentials;
|
||||
import lombok.ToString;
|
||||
import org.hamcrest.Description;
|
||||
import org.hamcrest.Matcher;
|
||||
import org.hamcrest.TypeSafeDiagnosingMatcher;
|
||||
import org.junit.Test;
|
||||
import software.amazon.kinesis.multilang.auth.KclSTSAssumeRoleSessionCredentialsProvider;
|
||||
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain;
|
||||
import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider;
|
||||
import software.amazon.kinesis.multilang.auth.KclStsAssumeRoleCredentialsProvider;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
|
|
@ -33,31 +34,32 @@ import static org.junit.Assert.assertEquals;
|
|||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
public class AWSCredentialsProviderPropertyValueDecoderTest {
|
||||
public class AwsCredentialsProviderPropertyValueDecoderTest {
|
||||
|
||||
private static final String TEST_ACCESS_KEY_ID = "123";
|
||||
private static final String TEST_SECRET_KEY = "456";
|
||||
|
||||
private final String credentialName1 = AlwaysSucceedCredentialsProvider.class.getName();
|
||||
private final String credentialName2 = ConstructorCredentialsProvider.class.getName();
|
||||
private final AWSCredentialsProviderPropertyValueDecoder decoder = new AWSCredentialsProviderPropertyValueDecoder();
|
||||
private final String createCredentialClass = CreateProvider.class.getName();
|
||||
private final AwsCredentialsProviderPropertyValueDecoder decoder = new AwsCredentialsProviderPropertyValueDecoder();
|
||||
|
||||
@ToString
|
||||
private static class AWSCredentialsMatcher extends TypeSafeDiagnosingMatcher<AWSCredentialsProvider> {
|
||||
private static class AwsCredentialsMatcher extends TypeSafeDiagnosingMatcher<AwsCredentialsProvider> {
|
||||
|
||||
private final Matcher<String> akidMatcher;
|
||||
private final Matcher<String> secretMatcher;
|
||||
private final Matcher<Class<?>> classMatcher;
|
||||
|
||||
public AWSCredentialsMatcher(String akid, String secret) {
|
||||
public AwsCredentialsMatcher(String akid, String secret) {
|
||||
this.akidMatcher = equalTo(akid);
|
||||
this.secretMatcher = equalTo(secret);
|
||||
this.classMatcher = instanceOf(AWSCredentialsProviderChain.class);
|
||||
this.classMatcher = instanceOf(AwsCredentialsProviderChain.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean matchesSafely(AWSCredentialsProvider item, Description mismatchDescription) {
|
||||
AWSCredentials actual = item.getCredentials();
|
||||
protected boolean matchesSafely(AwsCredentialsProvider item, Description mismatchDescription) {
|
||||
AwsCredentials actual = item.resolveCredentials();
|
||||
boolean matched = true;
|
||||
|
||||
if (!classMatcher.matches(item)) {
|
||||
|
|
@ -65,12 +67,12 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
|||
matched = false;
|
||||
}
|
||||
|
||||
if (!akidMatcher.matches(actual.getAWSAccessKeyId())) {
|
||||
akidMatcher.describeMismatch(actual.getAWSAccessKeyId(), mismatchDescription);
|
||||
if (!akidMatcher.matches(actual.accessKeyId())) {
|
||||
akidMatcher.describeMismatch(actual.accessKeyId(), mismatchDescription);
|
||||
matched = false;
|
||||
}
|
||||
if (!secretMatcher.matches(actual.getAWSSecretKey())) {
|
||||
secretMatcher.describeMismatch(actual.getAWSSecretKey(), mismatchDescription);
|
||||
if (!secretMatcher.matches(actual.secretAccessKey())) {
|
||||
secretMatcher.describeMismatch(actual.secretAccessKey(), mismatchDescription);
|
||||
matched = false;
|
||||
}
|
||||
return matched;
|
||||
|
|
@ -79,36 +81,36 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
|||
@Override
|
||||
public void describeTo(Description description) {
|
||||
description
|
||||
.appendText("An AWSCredentialsProvider that provides an AWSCredential matching: ")
|
||||
.appendText("An AwsCredentialsProvider that provides an AwsCredential matching: ")
|
||||
.appendList("(", ", ", ")", Arrays.asList(classMatcher, akidMatcher, secretMatcher));
|
||||
}
|
||||
}
|
||||
|
||||
private static AWSCredentialsMatcher hasCredentials(String akid, String secret) {
|
||||
return new AWSCredentialsMatcher(akid, secret);
|
||||
private static AwsCredentialsMatcher hasCredentials(String akid, String secret) {
|
||||
return new AwsCredentialsMatcher(akid, secret);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleProvider() {
|
||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName1);
|
||||
AwsCredentialsProvider provider = decoder.decodeValue(credentialName1);
|
||||
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTwoProviders() {
|
||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1);
|
||||
AwsCredentialsProvider provider = decoder.decodeValue(credentialName1 + "," + credentialName1);
|
||||
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProfileProviderWithOneArg() {
|
||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg");
|
||||
AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg");
|
||||
assertThat(provider, hasCredentials("arg", "blank"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProfileProviderWithTwoArgs() {
|
||||
AWSCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2");
|
||||
AwsCredentialsProvider provider = decoder.decodeValue(credentialName2 + "|arg1|arg2");
|
||||
assertThat(provider, hasCredentials("arg1", "arg2"));
|
||||
}
|
||||
|
||||
|
|
@ -118,14 +120,33 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
|||
@Test
|
||||
public void testKclAuthProvider() {
|
||||
for (final String className : Arrays.asList(
|
||||
KclSTSAssumeRoleSessionCredentialsProvider.class.getName(), // fully-qualified name
|
||||
KclSTSAssumeRoleSessionCredentialsProvider.class.getSimpleName() // name-only; needs prefix
|
||||
)) {
|
||||
final AWSCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName");
|
||||
KclStsAssumeRoleCredentialsProvider.class.getName(), // fully-qualified name
|
||||
KclStsAssumeRoleCredentialsProvider.class.getSimpleName(), // name-only; needs prefix
|
||||
StsAssumeRoleCredentialsProvider.class.getName(), // user passes full sts package path
|
||||
StsAssumeRoleCredentialsProvider.class.getSimpleName())) {
|
||||
final AwsCredentialsProvider provider = decoder.decodeValue(className + "|arn|sessionName");
|
||||
assertNotNull(className, provider);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that OneArgCreateProvider in the SDK v2 can process a create() method
|
||||
*/
|
||||
@Test
|
||||
public void testEmptyCreateProvider() {
|
||||
AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass);
|
||||
assertThat(provider, hasCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that OneArgCreateProvider in the SDK v2 can process a create(arg1) method
|
||||
*/
|
||||
@Test
|
||||
public void testOneArgCreateProvider() {
|
||||
AwsCredentialsProvider provider = decoder.decodeValue(createCredentialClass + "|testCreateProperty");
|
||||
assertThat(provider, hasCredentials("testCreateProperty", TEST_SECRET_KEY));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that a provider can be instantiated by its varargs constructor.
|
||||
*/
|
||||
|
|
@ -135,28 +156,24 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
|||
final String className = VarArgCredentialsProvider.class.getName();
|
||||
final String encodedValue = className + "|" + String.join("|", args);
|
||||
|
||||
final AWSCredentialsProvider provider = decoder.decodeValue(encodedValue);
|
||||
assertEquals(Arrays.toString(args), provider.getCredentials().getAWSAccessKeyId());
|
||||
final AwsCredentialsProvider provider = decoder.decodeValue(encodedValue);
|
||||
assertEquals(Arrays.toString(args), provider.resolveCredentials().accessKeyId());
|
||||
}
|
||||
|
||||
/**
|
||||
* This credentials provider will always succeed
|
||||
*/
|
||||
public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider {
|
||||
|
||||
public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider {
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
return new BasicAWSCredentials(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY);
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create(TEST_ACCESS_KEY_ID, TEST_SECRET_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
/**
|
||||
* This credentials provider needs a constructor call to instantiate it
|
||||
*/
|
||||
public static class ConstructorCredentialsProvider implements AWSCredentialsProvider {
|
||||
public static class ConstructorCredentialsProvider implements AwsCredentialsProvider {
|
||||
|
||||
private String arg1;
|
||||
private String arg2;
|
||||
|
|
@ -172,15 +189,12 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
|||
}
|
||||
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
return new BasicAWSCredentials(arg1, arg2);
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create(arg1, arg2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
private static class VarArgCredentialsProvider implements AWSCredentialsProvider {
|
||||
private static class VarArgCredentialsProvider implements AwsCredentialsProvider {
|
||||
|
||||
private final String[] args;
|
||||
|
||||
|
|
@ -189,13 +203,34 @@ public class AWSCredentialsProviderPropertyValueDecoderTest {
|
|||
}
|
||||
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
public AwsCredentials resolveCredentials() {
|
||||
// KISS solution to surface the constructor args
|
||||
final String flattenedArgs = Arrays.toString(args);
|
||||
return new BasicAWSCredentials(flattenedArgs, flattenedArgs);
|
||||
return AwsBasicCredentials.create(flattenedArgs, flattenedArgs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Credentials provider to test AWS SDK v2 create() methods for providers like ProfileCredentialsProvider
|
||||
*/
|
||||
public static class CreateProvider implements AwsCredentialsProvider {
|
||||
private String accessKeyId;
|
||||
|
||||
private CreateProvider(String accessKeyId) {
|
||||
this.accessKeyId = accessKeyId;
|
||||
}
|
||||
|
||||
public static CreateProvider create() {
|
||||
return new CreateProvider(TEST_ACCESS_KEY_ID);
|
||||
}
|
||||
|
||||
public static CreateProvider create(String accessKeyId) {
|
||||
return new CreateProvider(accessKeyId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create(accessKeyId, TEST_SECRET_KEY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -52,6 +52,16 @@ public class ConfigurationSettableUtilsTest {
|
|||
assertThat(actual, equalTo(expected));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBoolean() {
|
||||
ConfigResult expected = ConfigResult.builder().bool(false).build();
|
||||
|
||||
ConfigObject configObject = ConfigObject.builder().bool(expected.bool).build();
|
||||
ConfigResult actual = resolve(configObject);
|
||||
|
||||
assertThat(actual, equalTo(expected));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeapValuesSet() {
|
||||
ConfigResult expected =
|
||||
|
|
@ -147,6 +157,9 @@ public class ConfigurationSettableUtilsTest {
|
|||
private Long boxedLong;
|
||||
private ComplexValue complexValue;
|
||||
|
||||
@Builder.Default
|
||||
private Boolean bool = true;
|
||||
|
||||
private Optional<String> optionalString;
|
||||
private Optional<Integer> optionalInteger;
|
||||
private Optional<Long> optionalLong;
|
||||
|
|
@ -175,6 +188,10 @@ public class ConfigurationSettableUtilsTest {
|
|||
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
||||
private int rawInt;
|
||||
|
||||
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
||||
@Builder.Default
|
||||
private Boolean bool = true;
|
||||
|
||||
@ConfigurationSettable(configurationClass = ConfigResult.class)
|
||||
private Integer boxedInt;
|
||||
|
||||
|
|
|
|||
|
|
@ -20,19 +20,21 @@ import java.net.URI;
|
|||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
||||
import com.amazonaws.auth.AWSCredentials;
|
||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||
import com.amazonaws.auth.BasicAWSCredentials;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.runners.MockitoJUnitRunner;
|
||||
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentials;
|
||||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.kinesis.common.InitialPositionInStream;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
|
|
@ -40,6 +42,7 @@ import static org.hamcrest.CoreMatchers.nullValue;
|
|||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
|
@ -60,7 +63,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"workerId = 123"
|
||||
},
|
||||
'\n'));
|
||||
|
|
@ -69,6 +72,8 @@ public class KinesisClientLibConfiguratorTest {
|
|||
assertEquals(config.getWorkerIdentifier(), "123");
|
||||
assertThat(config.getMaxGetRecordsThreadPool(), nullValue());
|
||||
assertThat(config.getRetryGetRecordsInSeconds(), nullValue());
|
||||
assertNull(config.getGracefulLeaseHandoffTimeoutMillis());
|
||||
assertNull(config.getIsGracefulLeaseHandoffEnabled());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -77,7 +82,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"applicationName = app",
|
||||
"streamName = 123",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"workerId = 123",
|
||||
"failoverTimeMillis = 100",
|
||||
"shardSyncIntervalMillis = 500"
|
||||
|
|
@ -98,7 +103,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"applicationName = app",
|
||||
"streamName = 123",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"initialPositionInStreamExtended = " + epochTimeInSeconds
|
||||
},
|
||||
'\n'));
|
||||
|
|
@ -116,7 +121,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"applicationName = app",
|
||||
"streamName = 123",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"initialPositionInStream = AT_TIMESTAMP"
|
||||
},
|
||||
'\n'));
|
||||
|
|
@ -136,7 +141,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"applicationName = app",
|
||||
"streamName = 123",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"initialPositionInStreamExtended = null"
|
||||
},
|
||||
'\n'));
|
||||
|
|
@ -147,11 +152,156 @@ public class KinesisClientLibConfiguratorTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGracefulLeaseHandoffConfig() {
|
||||
final Long testGracefulLeaseHandoffTimeoutMillis = 12345L;
|
||||
final boolean testGracefulLeaseHandoffEnabled = true;
|
||||
|
||||
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = dummyApplicationName",
|
||||
"streamName = dummyStreamName",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"gracefulLeaseHandoffTimeoutMillis = " + testGracefulLeaseHandoffTimeoutMillis,
|
||||
"isGracefulLeaseHandoffEnabled = " + testGracefulLeaseHandoffEnabled
|
||||
},
|
||||
'\n'));
|
||||
|
||||
assertEquals(testGracefulLeaseHandoffTimeoutMillis, config.getGracefulLeaseHandoffTimeoutMillis());
|
||||
assertEquals(testGracefulLeaseHandoffEnabled, config.getIsGracefulLeaseHandoffEnabled());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClientVersionConfig() {
|
||||
final CoordinatorConfig.ClientVersionConfig testClientVersionConfig = Arrays.stream(
|
||||
CoordinatorConfig.ClientVersionConfig.values())
|
||||
.findAny()
|
||||
.orElseThrow(NoSuchElementException::new);
|
||||
|
||||
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = dummyApplicationName",
|
||||
"streamName = dummyStreamName",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"clientVersionConfig = " + testClientVersionConfig.name()
|
||||
},
|
||||
'\n'));
|
||||
|
||||
assertEquals(testClientVersionConfig, config.getClientVersionConfig());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCoordinatorStateConfig() {
|
||||
final String testCoordinatorStateTableName = "CoordState";
|
||||
final BillingMode testCoordinatorStateBillingMode = BillingMode.PAY_PER_REQUEST;
|
||||
final long testCoordinatorStateReadCapacity = 123;
|
||||
final long testCoordinatorStateWriteCapacity = 123;
|
||||
|
||||
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = dummyApplicationName",
|
||||
"streamName = dummyStreamName",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"coordinatorStateTableName = " + testCoordinatorStateTableName,
|
||||
"coordinatorStateBillingMode = " + testCoordinatorStateBillingMode.name(),
|
||||
"coordinatorStateReadCapacity = " + testCoordinatorStateReadCapacity,
|
||||
"coordinatorStateWriteCapacity = " + testCoordinatorStateWriteCapacity
|
||||
},
|
||||
'\n'));
|
||||
|
||||
assertEquals(testCoordinatorStateTableName, config.getCoordinatorStateTableName());
|
||||
assertEquals(testCoordinatorStateBillingMode, config.getCoordinatorStateBillingMode());
|
||||
assertEquals(testCoordinatorStateReadCapacity, config.getCoordinatorStateReadCapacity());
|
||||
assertEquals(testCoordinatorStateWriteCapacity, config.getCoordinatorStateWriteCapacity());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorkerUtilizationAwareAssignmentConfig() {
|
||||
final long testInMemoryWorkerMetricsCaptureFrequencyMillis = 123;
|
||||
final long testWorkerMetricsReporterFreqInMillis = 123;
|
||||
final long testNoOfPersistedMetricsPerWorkerMetrics = 123;
|
||||
final Boolean testDisableWorkerMetrics = true;
|
||||
final double testMaxThroughputPerHostKBps = 123;
|
||||
final long testDampeningPercentage = 12;
|
||||
final long testReBalanceThresholdPercentage = 12;
|
||||
final Boolean testAllowThroughputOvershoot = false;
|
||||
final long testVarianceBalancingFrequency = 12;
|
||||
final double testWorkerMetricsEMAAlpha = .123;
|
||||
|
||||
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = dummyApplicationName",
|
||||
"streamName = dummyStreamName",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"inMemoryWorkerMetricsCaptureFrequencyMillis = " + testInMemoryWorkerMetricsCaptureFrequencyMillis,
|
||||
"workerMetricsReporterFreqInMillis = " + testWorkerMetricsReporterFreqInMillis,
|
||||
"noOfPersistedMetricsPerWorkerMetrics = " + testNoOfPersistedMetricsPerWorkerMetrics,
|
||||
"disableWorkerMetrics = " + testDisableWorkerMetrics,
|
||||
"maxThroughputPerHostKBps = " + testMaxThroughputPerHostKBps,
|
||||
"dampeningPercentage = " + testDampeningPercentage,
|
||||
"reBalanceThresholdPercentage = " + testReBalanceThresholdPercentage,
|
||||
"allowThroughputOvershoot = " + testAllowThroughputOvershoot,
|
||||
"varianceBalancingFrequency = " + testVarianceBalancingFrequency,
|
||||
"workerMetricsEMAAlpha = " + testWorkerMetricsEMAAlpha
|
||||
},
|
||||
'\n'));
|
||||
|
||||
assertEquals(
|
||||
testInMemoryWorkerMetricsCaptureFrequencyMillis,
|
||||
config.getInMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||
assertEquals(testWorkerMetricsReporterFreqInMillis, config.getWorkerMetricsReporterFreqInMillis());
|
||||
assertEquals(testNoOfPersistedMetricsPerWorkerMetrics, config.getNoOfPersistedMetricsPerWorkerMetrics());
|
||||
assertEquals(testDisableWorkerMetrics, config.getDisableWorkerMetrics());
|
||||
assertEquals(testMaxThroughputPerHostKBps, config.getMaxThroughputPerHostKBps(), 0.0001);
|
||||
assertEquals(testDampeningPercentage, config.getDampeningPercentage());
|
||||
assertEquals(testReBalanceThresholdPercentage, config.getReBalanceThresholdPercentage());
|
||||
assertEquals(testAllowThroughputOvershoot, config.getAllowThroughputOvershoot());
|
||||
assertEquals(testVarianceBalancingFrequency, config.getVarianceBalancingFrequency());
|
||||
assertEquals(testWorkerMetricsEMAAlpha, config.getWorkerMetricsEMAAlpha(), 0.0001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorkerMetricsConfig() {
|
||||
final String testWorkerMetricsTableName = "CoordState";
|
||||
final BillingMode testWorkerMetricsBillingMode = BillingMode.PROVISIONED;
|
||||
final long testWorkerMetricsReadCapacity = 123;
|
||||
final long testWorkerMetricsWriteCapacity = 123;
|
||||
|
||||
final MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = dummyApplicationName",
|
||||
"streamName = dummyStreamName",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"workerMetricsTableName = " + testWorkerMetricsTableName,
|
||||
"workerMetricsBillingMode = " + testWorkerMetricsBillingMode.name(),
|
||||
"workerMetricsReadCapacity = " + testWorkerMetricsReadCapacity,
|
||||
"workerMetricsWriteCapacity = " + testWorkerMetricsWriteCapacity
|
||||
},
|
||||
'\n'));
|
||||
|
||||
assertEquals(testWorkerMetricsTableName, config.getWorkerMetricsTableName());
|
||||
assertEquals(testWorkerMetricsBillingMode, config.getWorkerMetricsBillingMode());
|
||||
assertEquals(testWorkerMetricsReadCapacity, config.getWorkerMetricsReadCapacity());
|
||||
assertEquals(testWorkerMetricsWriteCapacity, config.getWorkerMetricsWriteCapacity());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testInvalidClientVersionConfig() {
|
||||
getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = dummyApplicationName",
|
||||
"streamName = dummyStreamName",
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"clientVersionConfig = " + "invalid_client_version_config"
|
||||
},
|
||||
'\n'));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithUnsupportedClientConfigurationVariables() {
|
||||
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"AWSCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialName1 + ", " + credentialName2,
|
||||
"workerId = id",
|
||||
"kinesisClientConfig = {}",
|
||||
"streamName = stream",
|
||||
|
|
@ -170,7 +320,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
MultiLangDaemonConfiguration config = getConfiguration(StringUtils.join(
|
||||
new String[] {
|
||||
"streamName = kinesis",
|
||||
"AWSCredentialsProvider = " + credentialName2 + ", " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName2 + ", " + credentialName1,
|
||||
"workerId = w123",
|
||||
"maxRecords = 10",
|
||||
"metricsMaxQueueSize = 20",
|
||||
|
|
@ -195,7 +345,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD, " + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD, " + credentialName1,
|
||||
"workerId = 0",
|
||||
"cleanupLeasesUponShardCompletion = false",
|
||||
"validateSequenceNumberBeforeCheckpointing = true"
|
||||
|
|
@ -215,7 +365,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 1",
|
||||
"kinesisEndpoint = https://kinesis",
|
||||
"metricsLevel = SUMMARY"
|
||||
|
|
@ -233,7 +383,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 1",
|
||||
"metricsEnabledDimensions = ShardId, WorkerIdentifier"
|
||||
},
|
||||
|
|
@ -253,7 +403,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 123",
|
||||
"initialPositionInStream = TriM_Horizon"
|
||||
},
|
||||
|
|
@ -268,7 +418,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 123",
|
||||
"initialPositionInStream = LateSt"
|
||||
},
|
||||
|
|
@ -283,7 +433,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 123",
|
||||
"initialPositionInStream = TriM_Horizon",
|
||||
"abc = 1"
|
||||
|
|
@ -302,7 +452,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 123",
|
||||
"initialPositionInStream = TriM_Horizon",
|
||||
"maxGetRecordsThreadPool = 1"
|
||||
|
|
@ -318,7 +468,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = ABCD," + credentialName1,
|
||||
"AwsCredentialsProvider = ABCD," + credentialName1,
|
||||
"workerId = 123",
|
||||
"initialPositionInStream = TriM_Horizon",
|
||||
"maxGetRecordsThreadPool = 0",
|
||||
|
|
@ -334,7 +484,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"workerId = 123",
|
||||
"failoverTimeMillis = 100nf"
|
||||
},
|
||||
|
|
@ -348,7 +498,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"workerId = 123",
|
||||
"failoverTimeMillis = -12"
|
||||
},
|
||||
|
|
@ -380,7 +530,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"failoverTimeMillis = 100",
|
||||
"shardSyncIntervalMillis = 500"
|
||||
},
|
||||
|
|
@ -397,7 +547,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
String test = StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"workerId = 123",
|
||||
"failoverTimeMillis = 100"
|
||||
},
|
||||
|
|
@ -410,7 +560,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
String test = StringUtils.join(
|
||||
new String[] {
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"workerId = 123",
|
||||
"failoverTimeMillis = 100",
|
||||
"streamName = ",
|
||||
|
|
@ -425,7 +575,7 @@ public class KinesisClientLibConfiguratorTest {
|
|||
String test = StringUtils.join(
|
||||
new String[] {
|
||||
"streamName = a",
|
||||
"AWSCredentialsProvider = " + credentialName1,
|
||||
"AwsCredentialsProvider = " + credentialName1,
|
||||
"workerId = 123",
|
||||
"failoverTimeMillis = 100"
|
||||
},
|
||||
|
|
@ -434,12 +584,12 @@ public class KinesisClientLibConfiguratorTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testWithAWSCredentialsFailed() {
|
||||
public void testWithAwsCredentialsFailed() {
|
||||
String test = StringUtils.join(
|
||||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialName2,
|
||||
"failoverTimeMillis = 100",
|
||||
"shardSyncIntervalMillis = 500"
|
||||
},
|
||||
|
|
@ -457,16 +607,44 @@ public class KinesisClientLibConfiguratorTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProcessKeyWithExpectedCasing() {
|
||||
String key = "AwsCredentialsProvider";
|
||||
String result = configurator.processKey(key);
|
||||
assertEquals("awsCredentialsProvider", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProcessKeyWithOldCasing() {
|
||||
String key = "AWSCredentialsProvider";
|
||||
String result = configurator.processKey(key);
|
||||
assertEquals("awsCredentialsProvider", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProcessKeyWithMixedCasing() {
|
||||
String key = "AwScReDeNtIaLsPrOvIdEr";
|
||||
String result = configurator.processKey(key);
|
||||
assertEquals("awsCredentialsProvider", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testProcessKeyWithSuffix() {
|
||||
String key = "awscredentialsproviderDynamoDB";
|
||||
String result = configurator.processKey(key);
|
||||
assertEquals("awsCredentialsProviderDynamoDB", result);
|
||||
}
|
||||
|
||||
// TODO: fix this test
|
||||
@Test
|
||||
public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatch() {
|
||||
public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatch() {
|
||||
String test = StringUtils.join(
|
||||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialNameKinesis,
|
||||
"AWSCredentialsProviderDynamoDB = " + credentialNameDynamoDB,
|
||||
"AWSCredentialsProviderCloudWatch = " + credentialNameCloudWatch,
|
||||
"AwsCredentialsProvider = " + credentialNameKinesis,
|
||||
"AwsCredentialsProviderDynamoDB = " + credentialNameDynamoDB,
|
||||
"AwsCredentialsProviderCloudWatch = " + credentialNameCloudWatch,
|
||||
"failoverTimeMillis = 100",
|
||||
"shardSyncIntervalMillis = 500"
|
||||
},
|
||||
|
|
@ -487,14 +665,14 @@ public class KinesisClientLibConfiguratorTest {
|
|||
|
||||
// TODO: fix this test
|
||||
@Test
|
||||
public void testWithDifferentAWSCredentialsForDynamoDBAndCloudWatchFailed() {
|
||||
public void testWithDifferentAwsCredentialsForDynamoDBAndCloudWatchFailed() {
|
||||
String test = StringUtils.join(
|
||||
new String[] {
|
||||
"streamName = a",
|
||||
"applicationName = b",
|
||||
"AWSCredentialsProvider = " + credentialNameKinesis,
|
||||
"AWSCredentialsProviderDynamoDB = " + credentialName2,
|
||||
"AWSCredentialsProviderCloudWatch = " + credentialName2,
|
||||
"AwsCredentialsProvider = " + credentialNameKinesis,
|
||||
"AwsCredentialsProviderDynamoDB = " + credentialName2,
|
||||
"AwsCredentialsProviderCloudWatch = " + credentialName2,
|
||||
"failoverTimeMillis = 100",
|
||||
"shardSyncIntervalMillis = 500"
|
||||
},
|
||||
|
|
@ -526,71 +704,52 @@ public class KinesisClientLibConfiguratorTest {
|
|||
/**
|
||||
* This credentials provider will always succeed
|
||||
*/
|
||||
public static class AlwaysSucceedCredentialsProvider implements AWSCredentialsProvider {
|
||||
|
||||
public static class AlwaysSucceedCredentialsProvider implements AwsCredentialsProvider {
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
return new BasicAWSCredentials("a", "b");
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create("a", "b");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
/**
|
||||
* This credentials provider will always succeed
|
||||
*/
|
||||
public static class AlwaysSucceedCredentialsProviderKinesis implements AWSCredentialsProvider {
|
||||
|
||||
public static class AlwaysSucceedCredentialsProviderKinesis implements AwsCredentialsProvider {
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
return new BasicAWSCredentials("", "");
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
/**
|
||||
* This credentials provider will always succeed
|
||||
*/
|
||||
public static class AlwaysSucceedCredentialsProviderDynamoDB implements AWSCredentialsProvider {
|
||||
|
||||
public static class AlwaysSucceedCredentialsProviderDynamoDB implements AwsCredentialsProvider {
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
return new BasicAWSCredentials("", "");
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
/**
|
||||
* This credentials provider will always succeed
|
||||
*/
|
||||
public static class AlwaysSucceedCredentialsProviderCloudWatch implements AWSCredentialsProvider {
|
||||
|
||||
public static class AlwaysSucceedCredentialsProviderCloudWatch implements AwsCredentialsProvider {
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
return new BasicAWSCredentials("", "");
|
||||
public AwsCredentials resolveCredentials() {
|
||||
return AwsBasicCredentials.create("DUMMY_ACCESS_KEY_ID", "DUMMY_SECRET_ACCESS_KEY");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
/**
|
||||
* This credentials provider will always fail
|
||||
*/
|
||||
public static class AlwaysFailCredentialsProvider implements AWSCredentialsProvider {
|
||||
public static class AlwaysFailCredentialsProvider implements AwsCredentialsProvider {
|
||||
|
||||
@Override
|
||||
public AWSCredentials getCredentials() {
|
||||
public AwsCredentials resolveCredentials() {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh() {}
|
||||
}
|
||||
|
||||
private MultiLangDaemonConfiguration getConfiguration(String configString) {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@
|
|||
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import org.apache.commons.beanutils.BeanUtilsBean;
|
||||
import org.apache.commons.beanutils.ConvertUtilsBean;
|
||||
import org.junit.After;
|
||||
|
|
@ -24,8 +27,16 @@ import org.junit.Test;
|
|||
import org.junit.rules.ExpectedException;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.runners.MockitoJUnitRunner;
|
||||
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
|
||||
import software.amazon.awssdk.services.cloudwatch.CloudWatchAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||
import software.amazon.kinesis.common.ConfigsBuilder;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
||||
import software.amazon.kinesis.retrieval.fanout.FanOutConfig;
|
||||
import software.amazon.kinesis.retrieval.polling.PollingConfig;
|
||||
|
|
@ -34,6 +45,7 @@ import static org.hamcrest.CoreMatchers.equalTo;
|
|||
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
|
|
@ -41,6 +53,8 @@ import static org.junit.Assert.assertTrue;
|
|||
public class MultiLangDaemonConfigurationTest {
|
||||
|
||||
private static final String AWS_REGION_PROPERTY_NAME = "aws.region";
|
||||
private static final String DUMMY_APPLICATION_NAME = "dummyApplicationName";
|
||||
private static final String DUMMY_STREAM_NAME = "dummyStreamName";
|
||||
|
||||
private BeanUtilsBean utilsBean;
|
||||
private ConvertUtilsBean convertUtilsBean;
|
||||
|
|
@ -71,8 +85,8 @@ public class MultiLangDaemonConfigurationTest {
|
|||
|
||||
public MultiLangDaemonConfiguration baseConfiguration() {
|
||||
MultiLangDaemonConfiguration configuration = new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
|
||||
configuration.setApplicationName("Test");
|
||||
configuration.setStreamName("Test");
|
||||
configuration.setApplicationName(DUMMY_APPLICATION_NAME);
|
||||
configuration.setStreamName(DUMMY_STREAM_NAME);
|
||||
configuration.getKinesisCredentialsProvider().set("class", DefaultCredentialsProvider.class.getName());
|
||||
|
||||
return configuration;
|
||||
|
|
@ -111,6 +125,197 @@ public class MultiLangDaemonConfigurationTest {
|
|||
assertTrue(resolvedConfiguration.leaseManagementConfig.leaseTableDeletionProtectionEnabled());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGracefulLeaseHandoffConfig() {
|
||||
final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig =
|
||||
getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig();
|
||||
|
||||
final long testGracefulLeaseHandoffTimeoutMillis =
|
||||
defaultGracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis() + 12345;
|
||||
final boolean testGracefulLeaseHandoffEnabled =
|
||||
!defaultGracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled();
|
||||
|
||||
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
configuration.setGracefulLeaseHandoffTimeoutMillis(testGracefulLeaseHandoffTimeoutMillis);
|
||||
configuration.setIsGracefulLeaseHandoffEnabled(testGracefulLeaseHandoffEnabled);
|
||||
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
|
||||
resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig();
|
||||
|
||||
assertEquals(
|
||||
testGracefulLeaseHandoffTimeoutMillis, gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis());
|
||||
assertEquals(testGracefulLeaseHandoffEnabled, gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGracefulLeaseHandoffUsesDefaults() {
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
|
||||
resolvedConfiguration.leaseManagementConfig.gracefulLeaseHandoffConfig();
|
||||
|
||||
final LeaseManagementConfig.GracefulLeaseHandoffConfig defaultGracefulLeaseHandoffConfig =
|
||||
getTestConfigsBuilder().leaseManagementConfig().gracefulLeaseHandoffConfig();
|
||||
|
||||
assertEquals(defaultGracefulLeaseHandoffConfig, gracefulLeaseHandoffConfig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorkerUtilizationAwareAssignmentConfig() {
|
||||
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
|
||||
configuration.setInMemoryWorkerMetricsCaptureFrequencyMillis(123);
|
||||
configuration.setWorkerMetricsReporterFreqInMillis(123);
|
||||
configuration.setNoOfPersistedMetricsPerWorkerMetrics(123);
|
||||
configuration.setDisableWorkerMetrics(true);
|
||||
configuration.setMaxThroughputPerHostKBps(.123);
|
||||
configuration.setDampeningPercentage(12);
|
||||
configuration.setReBalanceThresholdPercentage(12);
|
||||
configuration.setAllowThroughputOvershoot(false);
|
||||
configuration.setVarianceBalancingFrequency(12);
|
||||
configuration.setWorkerMetricsEMAAlpha(.123);
|
||||
|
||||
MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig;
|
||||
LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config =
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
|
||||
|
||||
assertEquals(config.inMemoryWorkerMetricsCaptureFrequencyMillis(), 123);
|
||||
assertEquals(config.workerMetricsReporterFreqInMillis(), 123);
|
||||
assertEquals(config.noOfPersistedMetricsPerWorkerMetrics(), 123);
|
||||
assertTrue(config.disableWorkerMetrics());
|
||||
assertEquals(config.maxThroughputPerHostKBps(), .123, .25);
|
||||
assertEquals(config.dampeningPercentage(), 12);
|
||||
assertEquals(config.reBalanceThresholdPercentage(), 12);
|
||||
assertFalse(config.allowThroughputOvershoot());
|
||||
assertEquals(config.varianceBalancingFrequency(), 12);
|
||||
assertEquals(config.workerMetricsEMAAlpha(), .123, .25);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorkerUtilizationAwareAssignmentConfigUsesDefaults() {
|
||||
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig defaultWorkerUtilAwareAssignmentConfig =
|
||||
getTestConfigsBuilder().leaseManagementConfig().workerUtilizationAwareAssignmentConfig();
|
||||
|
||||
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
configuration.setVarianceBalancingFrequency(
|
||||
defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency() + 12345);
|
||||
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig resolvedWorkerUtilAwareAssignmentConfig =
|
||||
resolvedConfiguration.leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
|
||||
|
||||
assertNotEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig);
|
||||
|
||||
// apart from the single updated configuration, all other config values should be equal to the default
|
||||
resolvedWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency(
|
||||
defaultWorkerUtilAwareAssignmentConfig.varianceBalancingFrequency());
|
||||
assertEquals(defaultWorkerUtilAwareAssignmentConfig, resolvedWorkerUtilAwareAssignmentConfig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorkerMetricsTableConfigBean() {
|
||||
final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PROVISIONED;
|
||||
|
||||
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
|
||||
configuration.setWorkerMetricsTableName("testTable");
|
||||
configuration.setWorkerMetricsBillingMode(testWorkerMetricsTableBillingMode);
|
||||
configuration.setWorkerMetricsReadCapacity(123);
|
||||
configuration.setWorkerMetricsWriteCapacity(123);
|
||||
|
||||
MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
LeaseManagementConfig leaseManagementConfig = resolvedConfiguration.leaseManagementConfig;
|
||||
LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationConfig =
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig();
|
||||
LeaseManagementConfig.WorkerMetricsTableConfig workerMetricsConfig =
|
||||
workerUtilizationConfig.workerMetricsTableConfig();
|
||||
|
||||
assertEquals(workerMetricsConfig.tableName(), "testTable");
|
||||
assertEquals(workerMetricsConfig.billingMode(), testWorkerMetricsTableBillingMode);
|
||||
assertEquals(workerMetricsConfig.readCapacity(), 123);
|
||||
assertEquals(workerMetricsConfig.writeCapacity(), 123);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorkerMetricsTableConfigUsesDefaults() {
|
||||
final LeaseManagementConfig.WorkerMetricsTableConfig defaultWorkerMetricsTableConfig = getTestConfigsBuilder()
|
||||
.leaseManagementConfig()
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig();
|
||||
|
||||
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
configuration.setWorkerMetricsBillingMode(Arrays.stream(BillingMode.values())
|
||||
.filter(billingMode -> billingMode != defaultWorkerMetricsTableConfig.billingMode())
|
||||
.findFirst()
|
||||
.orElseThrow(NoSuchElementException::new));
|
||||
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final LeaseManagementConfig.WorkerMetricsTableConfig resolvedWorkerMetricsTableConfig = resolvedConfiguration
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig();
|
||||
|
||||
assertNotEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig);
|
||||
|
||||
// apart from the single updated configuration, all other config values should be equal to the default
|
||||
resolvedWorkerMetricsTableConfig.billingMode(defaultWorkerMetricsTableConfig.billingMode());
|
||||
assertEquals(defaultWorkerMetricsTableConfig, resolvedWorkerMetricsTableConfig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCoordinatorStateTableConfigBean() {
|
||||
final BillingMode testWorkerMetricsTableBillingMode = BillingMode.PAY_PER_REQUEST;
|
||||
|
||||
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
|
||||
configuration.setCoordinatorStateTableName("testTable");
|
||||
configuration.setCoordinatorStateBillingMode(testWorkerMetricsTableBillingMode);
|
||||
configuration.setCoordinatorStateReadCapacity(123);
|
||||
configuration.setCoordinatorStateWriteCapacity(123);
|
||||
|
||||
MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
CoordinatorConfig coordinatorConfig = resolvedConfiguration.getCoordinatorConfig();
|
||||
CoordinatorConfig.CoordinatorStateTableConfig coordinatorStateConfig =
|
||||
coordinatorConfig.coordinatorStateTableConfig();
|
||||
assertEquals(coordinatorStateConfig.tableName(), "testTable");
|
||||
assertEquals(coordinatorStateConfig.billingMode(), testWorkerMetricsTableBillingMode);
|
||||
assertEquals(coordinatorStateConfig.readCapacity(), 123);
|
||||
assertEquals(coordinatorStateConfig.writeCapacity(), 123);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCoordinatorStateTableConfigUsesDefaults() {
|
||||
final CoordinatorConfig.CoordinatorStateTableConfig defaultCoordinatorStateTableConfig =
|
||||
getTestConfigsBuilder().coordinatorConfig().coordinatorStateTableConfig();
|
||||
|
||||
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
configuration.setCoordinatorStateWriteCapacity(defaultCoordinatorStateTableConfig.writeCapacity() + 12345);
|
||||
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final CoordinatorConfig.CoordinatorStateTableConfig resolvedCoordinatorStateTableConfig =
|
||||
resolvedConfiguration.coordinatorConfig.coordinatorStateTableConfig();
|
||||
|
||||
assertNotEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig);
|
||||
|
||||
// apart from the single updated configuration, all other config values should be equal to the default
|
||||
resolvedCoordinatorStateTableConfig.writeCapacity(defaultCoordinatorStateTableConfig.writeCapacity());
|
||||
assertEquals(defaultCoordinatorStateTableConfig, resolvedCoordinatorStateTableConfig);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetLeaseTablePitrEnabledToTrue() {
|
||||
MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
|
|
@ -266,4 +471,43 @@ public class MultiLangDaemonConfigurationTest {
|
|||
|
||||
assertThat(fanOutConfig.consumerArn(), equalTo(consumerArn));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClientVersionConfig() {
|
||||
final CoordinatorConfig.ClientVersionConfig testClientVersionConfig =
|
||||
CoordinatorConfig.ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X;
|
||||
|
||||
final MultiLangDaemonConfiguration configuration = baseConfiguration();
|
||||
configuration.setClientVersionConfig(testClientVersionConfig);
|
||||
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
configuration.resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig;
|
||||
|
||||
assertEquals(testClientVersionConfig, coordinatorConfig.clientVersionConfig());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClientVersionConfigUsesDefault() {
|
||||
final MultiLangDaemonConfiguration.ResolvedConfiguration resolvedConfiguration =
|
||||
baseConfiguration().resolvedConfiguration(shardRecordProcessorFactory);
|
||||
|
||||
final CoordinatorConfig coordinatorConfig = resolvedConfiguration.coordinatorConfig;
|
||||
|
||||
assertEquals(
|
||||
getTestConfigsBuilder().coordinatorConfig().clientVersionConfig(),
|
||||
coordinatorConfig.clientVersionConfig());
|
||||
}
|
||||
|
||||
private ConfigsBuilder getTestConfigsBuilder() {
|
||||
return new ConfigsBuilder(
|
||||
DUMMY_STREAM_NAME,
|
||||
DUMMY_APPLICATION_NAME,
|
||||
Mockito.mock(KinesisAsyncClient.class),
|
||||
Mockito.mock(DynamoDbAsyncClient.class),
|
||||
Mockito.mock(CloudWatchAsyncClient.class),
|
||||
"dummyWorkerIdentifier",
|
||||
shardRecordProcessorFactory);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,251 @@
|
|||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.Duration;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
|
||||
import software.amazon.kinesis.multilang.MultiLangDaemonConfig;
|
||||
import software.amazon.kinesis.multilang.config.MultiLangDaemonConfiguration.ResolvedConfiguration;
|
||||
import software.amazon.kinesis.processor.ShardRecordProcessor;
|
||||
import software.amazon.kinesis.processor.ShardRecordProcessorFactory;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class PropertiesMappingE2ETest {
|
||||
private static final String PROPERTIES_FILE = "multilang.properties";
|
||||
private static final String PROPERTIES_FILE_V3 = "multilangv3.properties";
|
||||
|
||||
@Test
|
||||
public void testKclV3PropertiesMapping() throws IOException {
|
||||
final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE);
|
||||
|
||||
final ResolvedConfiguration kclV3Config =
|
||||
config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory());
|
||||
|
||||
assertEquals(
|
||||
ClientVersionConfig.CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X,
|
||||
kclV3Config.coordinatorConfig.clientVersionConfig());
|
||||
|
||||
assertEquals(
|
||||
"MultiLangTest-CoordinatorState-CustomName",
|
||||
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName());
|
||||
assertEquals(
|
||||
BillingMode.PROVISIONED,
|
||||
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode());
|
||||
assertEquals(
|
||||
1000,
|
||||
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().readCapacity());
|
||||
assertEquals(
|
||||
500, kclV3Config.coordinatorConfig.coordinatorStateTableConfig().writeCapacity());
|
||||
|
||||
assertEquals(
|
||||
10000L,
|
||||
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis());
|
||||
assertFalse(
|
||||
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled());
|
||||
|
||||
assertEquals(
|
||||
5000L,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.inMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||
assertEquals(
|
||||
60000L,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsReporterFreqInMillis());
|
||||
assertEquals(
|
||||
50,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.noOfPersistedMetricsPerWorkerMetrics());
|
||||
assertTrue(kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.disableWorkerMetrics());
|
||||
assertEquals(
|
||||
10000,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.maxThroughputPerHostKBps());
|
||||
assertEquals(
|
||||
90,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.dampeningPercentage());
|
||||
assertEquals(
|
||||
5,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.reBalanceThresholdPercentage());
|
||||
assertFalse(kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.allowThroughputOvershoot());
|
||||
assertEquals(
|
||||
Duration.ofHours(12),
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.staleWorkerMetricsEntryCleanupDuration());
|
||||
assertEquals(
|
||||
5,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.varianceBalancingFrequency());
|
||||
assertEquals(
|
||||
0.18D,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsEMAAlpha());
|
||||
|
||||
assertEquals(
|
||||
"MultiLangTest-WorkerMetrics-CustomName",
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig()
|
||||
.tableName());
|
||||
assertEquals(
|
||||
BillingMode.PROVISIONED,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig()
|
||||
.billingMode());
|
||||
assertEquals(
|
||||
250,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig()
|
||||
.readCapacity());
|
||||
assertEquals(
|
||||
90,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig()
|
||||
.writeCapacity());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKclV3PropertiesMappingForDefaultValues() throws IOException {
|
||||
final MultiLangDaemonConfig config = new MultiLangDaemonConfig(PROPERTIES_FILE_V3);
|
||||
|
||||
final ResolvedConfiguration kclV3Config =
|
||||
config.getMultiLangDaemonConfiguration().resolvedConfiguration(new TestRecordProcessorFactory());
|
||||
|
||||
assertEquals(ClientVersionConfig.CLIENT_VERSION_CONFIG_3X, kclV3Config.coordinatorConfig.clientVersionConfig());
|
||||
|
||||
assertEquals(
|
||||
"MultiLangTest-CoordinatorState",
|
||||
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().tableName());
|
||||
assertEquals(
|
||||
BillingMode.PAY_PER_REQUEST,
|
||||
kclV3Config.coordinatorConfig.coordinatorStateTableConfig().billingMode());
|
||||
|
||||
assertEquals(
|
||||
30_000L,
|
||||
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().gracefulLeaseHandoffTimeoutMillis());
|
||||
assertTrue(
|
||||
kclV3Config.leaseManagementConfig.gracefulLeaseHandoffConfig().isGracefulLeaseHandoffEnabled());
|
||||
|
||||
assertEquals(
|
||||
1000L,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.inMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||
assertEquals(
|
||||
30000L,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsReporterFreqInMillis());
|
||||
assertEquals(
|
||||
10,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.noOfPersistedMetricsPerWorkerMetrics());
|
||||
assertFalse(kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.disableWorkerMetrics());
|
||||
assertEquals(
|
||||
Double.MAX_VALUE,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.maxThroughputPerHostKBps());
|
||||
assertEquals(
|
||||
60,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.dampeningPercentage());
|
||||
assertEquals(
|
||||
10,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.reBalanceThresholdPercentage());
|
||||
assertTrue(kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.allowThroughputOvershoot());
|
||||
assertEquals(
|
||||
Duration.ofDays(1),
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.staleWorkerMetricsEntryCleanupDuration());
|
||||
assertEquals(
|
||||
3,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.varianceBalancingFrequency());
|
||||
assertEquals(
|
||||
0.5D,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsEMAAlpha());
|
||||
|
||||
assertEquals(
|
||||
"MultiLangTest-WorkerMetricStats",
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig()
|
||||
.tableName());
|
||||
assertEquals(
|
||||
BillingMode.PAY_PER_REQUEST,
|
||||
kclV3Config
|
||||
.leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.workerMetricsTableConfig()
|
||||
.billingMode());
|
||||
}
|
||||
|
||||
private static class TestRecordProcessorFactory implements ShardRecordProcessorFactory {
|
||||
@Override
|
||||
public ShardRecordProcessor shardRecordProcessor() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package software.amazon.kinesis.multilang.config;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.commons.beanutils.BeanUtilsBean;
|
||||
import org.apache.commons.beanutils.ConvertUtilsBean;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.runners.MockitoJUnitRunner;
|
||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||
import software.amazon.kinesis.retrieval.polling.PollingConfig;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
@RunWith(MockitoJUnitRunner.class)
|
||||
public class WorkerUtilizationAwareAssignmentConfigBeanTest {
|
||||
|
||||
@Mock
|
||||
private KinesisAsyncClient kinesisAsyncClient;
|
||||
|
||||
@Test
|
||||
public void testAllPropertiesTransit() {
|
||||
PollingConfigBean pollingConfigBean = new PollingConfigBean();
|
||||
pollingConfigBean.setIdleTimeBetweenReadsInMillis(1000);
|
||||
pollingConfigBean.setMaxGetRecordsThreadPool(20);
|
||||
pollingConfigBean.setMaxRecords(5000);
|
||||
pollingConfigBean.setRetryGetRecordsInSeconds(30);
|
||||
|
||||
ConvertUtilsBean convertUtilsBean = new ConvertUtilsBean();
|
||||
BeanUtilsBean utilsBean = new BeanUtilsBean(convertUtilsBean);
|
||||
|
||||
MultiLangDaemonConfiguration multiLangDaemonConfiguration =
|
||||
new MultiLangDaemonConfiguration(utilsBean, convertUtilsBean);
|
||||
multiLangDaemonConfiguration.setStreamName("test-stream");
|
||||
|
||||
PollingConfig pollingConfig = pollingConfigBean.build(kinesisAsyncClient, multiLangDaemonConfiguration);
|
||||
|
||||
assertThat(pollingConfig.kinesisClient(), equalTo(kinesisAsyncClient));
|
||||
assertThat(pollingConfig.streamName(), equalTo(multiLangDaemonConfiguration.getStreamName()));
|
||||
assertThat(
|
||||
pollingConfig.idleTimeBetweenReadsInMillis(),
|
||||
equalTo(pollingConfigBean.getIdleTimeBetweenReadsInMillis()));
|
||||
assertThat(
|
||||
pollingConfig.maxGetRecordsThreadPool(),
|
||||
equalTo(Optional.of(pollingConfigBean.getMaxGetRecordsThreadPool())));
|
||||
assertThat(pollingConfig.maxRecords(), equalTo(pollingConfigBean.getMaxRecords()));
|
||||
assertThat(
|
||||
pollingConfig.retryGetRecordsInSeconds(),
|
||||
equalTo(Optional.of(pollingConfigBean.getRetryGetRecordsInSeconds())));
|
||||
}
|
||||
}
|
||||
|
|
@ -17,10 +17,12 @@ streamName = kclpysample
|
|||
applicationName = MultiLangTest
|
||||
|
||||
# Users can change the credentials provider the KCL will use to retrieve credentials.
|
||||
# The DefaultAWSCredentialsProviderChain checks several other providers, which is
|
||||
# Expected key name (case-sensitive):
|
||||
# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
|
||||
# The DefaultCredentialsProvider checks several other providers, which is
|
||||
# described here:
|
||||
# http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html
|
||||
AWSCredentialsProvider = DefaultAWSCredentialsProviderChain
|
||||
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
|
||||
AwsCredentialsProvider = DefaultCredentialsProvider
|
||||
|
||||
# Appended to the user agent of the KCL. Does not impact the functionality of the
|
||||
# KCL in any other way.
|
||||
|
|
@ -91,3 +93,73 @@ validateSequenceNumberBeforeCheckpointing = true
|
|||
# active threads set to the provided value. If a non-positive integer or no
|
||||
# value is provided a CachedThreadPool is used.
|
||||
maxActiveThreads = -1
|
||||
|
||||
################### KclV3 configurations ###################
|
||||
# Coordinator config
|
||||
# Version the KCL needs to operate in. For more details check the KCLv3 migration
|
||||
# documentation. Default is CLIENT_VERSION_CONFIG_3X
|
||||
clientVersionConfig = CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2x
|
||||
# TODO: include table deletion protection and pitr config once its added
|
||||
# Configurations to control how the CoordinatorState DDB table is created
|
||||
# Default name is applicationName-CoordinatorState in PAY_PER_REQUEST
|
||||
coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName
|
||||
coordinatorStateBillingMode = PROVISIONED
|
||||
coordinatorStateReadCapacity = 1000
|
||||
coordinatorStateWriteCapacity = 500
|
||||
|
||||
# Graceful handoff config - tuning of the shutdown behavior during lease transfers
|
||||
# default values are 30000 and true respectively
|
||||
gracefulLeaseHandoffTimeoutMillis = 10000
|
||||
isGracefulLeaseHandoffEnabled = false
|
||||
|
||||
# WorkerMetricStats table config - control how the DDB table is created
|
||||
## Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST
|
||||
# TODO: include table deletion protection and pitr config once its added
|
||||
workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName
|
||||
workerMetricsBillingMode = PROVISIONED
|
||||
workerMetricsReadCapacity = 250
|
||||
workerMetricsWriteCapacity = 90
|
||||
|
||||
# WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
|
||||
#
|
||||
# frequency of capturing worker metrics in memory. Default is 1s
|
||||
inMemoryWorkerMetricsCaptureFrequencyMillis = 5000
|
||||
# frequency of reporting worker metric stats to storage. Default is 30s
|
||||
workerMetricsReporterFreqInMillis = 60000
|
||||
# No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10
|
||||
noOfPersistedMetricsPerWorkerMetrics = 50
|
||||
# Disable use of worker metrics to balance lease, default is false.
|
||||
# If it is true, the algorithm balances lease based on worker's processing throughput.
|
||||
disableWorkerMetrics = true
|
||||
# Max throughput per host 10 MBps, to limit processing to the given value
|
||||
# Default is unlimited.
|
||||
maxThroughputPerHostKBps = 10000
|
||||
# Dampen the load that is rebalanced during lease re-balancing, default is 60%
|
||||
dampeningPercentage = 90
|
||||
# Configures the allowed variance range for worker utilization. The upper
|
||||
# limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
|
||||
# The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
|
||||
# any worker's utilization falls outside this range, lease re-balancing is
|
||||
# triggered. The re-balancing algorithm aims to bring variance within the
|
||||
# specified range. It also avoids thrashing by ensuring the utilization of
|
||||
# the worker receiving the load after re-balancing doesn't exceed the fleet
|
||||
# average. This might cause no re-balancing action even the utilization is
|
||||
# out of the variance range. The default value is 10, representing +/-10%
|
||||
# variance from the average value.
|
||||
reBalanceThresholdPercentage = 5
|
||||
# Whether at-least one lease must be taken from a high utilization worker
|
||||
# during re-balancing when there is no lease assigned to that worker which has
|
||||
# throughput is less than or equal to the minimum throughput that needs to be
|
||||
# moved away from that worker to bring the worker back into the allowed variance.
|
||||
# Default is true.
|
||||
allowThroughputOvershoot = false
|
||||
# Lease assignment is performed every failoverTimeMillis but re-balance will
|
||||
# be attempted only once in 5 times based on the below config. Default is 3.
|
||||
varianceBalancingFrequency = 5
|
||||
# Alpha value used for calculating exponential moving average of worker's metricStats.
|
||||
workerMetricsEMAAlpha = 0.18
|
||||
# Duration after which workerMetricStats entry from WorkerMetricStats table will
|
||||
# be cleaned up.
|
||||
# Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
|
||||
# Refer to Duration.parse javadocs for more details
|
||||
staleWorkerMetricsEntryCleanupDuration = PT12H
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
# The script that abides by the multi-language protocol. This script will
|
||||
# be executed by the MultiLangDaemon, which will communicate with this script
|
||||
# over STDIN and STDOUT according to the multi-language protocol.
|
||||
executableName = sample_kclpy_app.py
|
||||
|
||||
# The Stream arn: arn:aws:kinesis:<region>:<account id>:stream/<stream name>
|
||||
# Important: streamArn takes precedence over streamName if both are set
|
||||
streamArn = arn:aws:kinesis:us-east-5:000000000000:stream/kclpysample
|
||||
|
||||
# The name of an Amazon Kinesis stream to process.
|
||||
# Important: streamArn takes precedence over streamName if both are set
|
||||
streamName = kclpysample
|
||||
|
||||
# Used by the KCL as the name of this application. Will be used as the name
|
||||
# of an Amazon DynamoDB table which will store the lease and checkpoint
|
||||
# information for workers with this application name
|
||||
applicationName = MultiLangTest
|
||||
|
||||
# Users can change the credentials provider the KCL will use to retrieve credentials.
|
||||
# Expected key name (case-sensitive):
|
||||
# AwsCredentialsProvider / AwsCredentialsProviderDynamoDB / AwsCredentialsProviderCloudWatch
|
||||
# The DefaultCredentialsProvider checks several other providers, which is
|
||||
# described here:
|
||||
# https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/auth/credentials/DefaultCredentialsProvider.html
|
||||
AwsCredentialsProvider = DefaultCredentialsProvider
|
||||
|
||||
# Appended to the user agent of the KCL. Does not impact the functionality of the
|
||||
# KCL in any other way.
|
||||
processingLanguage = python/3.8
|
||||
|
||||
# Valid options at TRIM_HORIZON or LATEST.
|
||||
# See http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
|
||||
initialPositionInStream = TRIM_HORIZON
|
||||
|
||||
# To specify an initial timestamp from which to start processing records, please specify timestamp value for 'initiatPositionInStreamExtended',
|
||||
# and uncomment below line with right timestamp value.
|
||||
# See more from 'Timestamp' under http://docs.aws.amazon.com/kinesis/latest/APIReference/API_GetShardIterator.html#API_GetShardIterator_RequestSyntax
|
||||
#initialPositionInStreamExtended = 1636609142
|
||||
|
||||
# The following properties are also available for configuring the KCL Worker that is created
|
||||
# by the MultiLangDaemon.
|
||||
|
||||
# The KCL defaults to us-east-1
|
||||
regionName = us-east-1
|
||||
|
||||
# Fail over time in milliseconds. A worker which does not renew it's lease within this time interval
|
||||
# will be regarded as having problems and it's shards will be assigned to other workers.
|
||||
# For applications that have a large number of shards, this msy be set to a higher number to reduce
|
||||
# the number of DynamoDB IOPS required for tracking leases
|
||||
failoverTimeMillis = 10000
|
||||
|
||||
# A worker id that uniquely identifies this worker among all workers using the same applicationName
|
||||
# If this isn't provided a MultiLangDaemon instance will assign a unique workerId to itself.
|
||||
workerId = "workerId"
|
||||
|
||||
# Shard sync interval in milliseconds - e.g. wait for this long between shard sync tasks.
|
||||
shardSyncIntervalMillis = 60000
|
||||
|
||||
# Max records to fetch from Kinesis in a single GetRecords call.
|
||||
maxRecords = 10000
|
||||
|
||||
# Idle time between record reads in milliseconds.
|
||||
idleTimeBetweenReadsInMillis = 1000
|
||||
|
||||
# Enables applications flush/checkpoint (if they have some data "in progress", but don't get new data for while)
|
||||
callProcessRecordsEvenForEmptyRecordList = false
|
||||
|
||||
# Interval in milliseconds between polling to check for parent shard completion.
|
||||
# Polling frequently will take up more DynamoDB IOPS (when there are leases for shards waiting on
|
||||
# completion of parent shards).
|
||||
parentShardPollIntervalMillis = 10000
|
||||
|
||||
# Cleanup leases upon shards completion (don't wait until they expire in Kinesis).
|
||||
# Keeping leases takes some tracking/resources (e.g. they need to be renewed, assigned), so by default we try
|
||||
# to delete the ones we don't need any longer.
|
||||
cleanupLeasesUponShardCompletion = true
|
||||
|
||||
# Backoff time in milliseconds for Amazon Kinesis Client Library tasks (in the event of failures).
|
||||
taskBackoffTimeMillis = 500
|
||||
|
||||
# Buffer metrics for at most this long before publishing to CloudWatch.
|
||||
metricsBufferTimeMillis = 10000
|
||||
|
||||
# Buffer at most this many metrics before publishing to CloudWatch.
|
||||
metricsMaxQueueSize = 10000
|
||||
|
||||
# KCL will validate client provided sequence numbers with a call to Amazon Kinesis before checkpointing for calls
|
||||
# to RecordProcessorCheckpointer#checkpoint(String) by default.
|
||||
validateSequenceNumberBeforeCheckpointing = true
|
||||
|
||||
# The maximum number of active threads for the MultiLangDaemon to permit.
|
||||
# If a value is provided then a FixedThreadPool is used with the maximum
|
||||
# active threads set to the provided value. If a non-positive integer or no
|
||||
# value is provided a CachedThreadPool is used.
|
||||
maxActiveThreads = -1
|
||||
|
||||
################### KclV3 configurations ###################
|
||||
# Coordinator config
|
||||
clientVersionConfig = CLIENT_VERSION_CONFIG_3x
|
||||
|
||||
## Let all other config be defaults
|
||||
## TODO: include table deletion protection and pitr config once its added
|
||||
## Configurations to control how the CoordinatorState DDB table is created
|
||||
## Default name is applicationName-CoordinatorState in PAY_PER_REQUEST
|
||||
#coordinatorStateTableName = MultiLangTest-CoordinatorState-CustomName
|
||||
#coordinatorStateBillingMode = PROVISIONED
|
||||
#coordinatorStateReadCapacity = 1000
|
||||
#coordinatorStateWriteCapacity = 500
|
||||
#
|
||||
## Graceful handoff config - tuning of the shutdown behavior during lease transfers
|
||||
## default values are 30000 and true respectively
|
||||
#gracefulLeaseHandoffTimeoutMillis = 10000
|
||||
#isGracefulLeaseHandoffEnabled = false
|
||||
#
|
||||
## WorkerMetricStats table config - control how the DDB table is created
|
||||
### Default name is applicationName-WorkerMetricStats in PAY_PER_REQUEST
|
||||
## TODO: include table deletion protection and pitr config once its added
|
||||
#workerMetricsTableName = MultiLangTest-WorkerMetrics-CustomName
|
||||
#workerMetricsBillingMode = PROVISIONED
|
||||
#workerMetricsReadCapacity = 250
|
||||
#workerMetricsWriteCapacity = 90
|
||||
#
|
||||
## WorkerUtilizationAwareAssignment config - tune the new KCLv3 Lease balancing algorithm
|
||||
##
|
||||
## frequency of capturing worker metrics in memory. Default is 1s
|
||||
#inMemoryWorkerMetricsCaptureFrequencyMillis = 5000
|
||||
## frequency of reporting worker metric stats to storage. Default is 30s
|
||||
#workerMetricsReporterFreqInMillis = 60000
|
||||
## No. of metricStats that are persisted in WorkerMetricStats ddb table, default is 10.
|
||||
## This provides historic values that are used to compute the workers current
|
||||
## utilization using an exponential-moving-average.
|
||||
#noOfPersistedMetricsPerWorkerMetrics = 50
|
||||
## Disable use of worker metrics to balance lease, default is false.
|
||||
## If it is true, the algorithm balances lease based on worker's processing throughput.
|
||||
#disableWorkerMetrics = true
|
||||
## Max throughput per host 10 MBps, to limit processing to the given value
|
||||
## Default is unlimited.
|
||||
#maxThroughputPerHostKBps = 10000
|
||||
## Dampen the load that is rebalanced during lease re-balancing, default is 60%
|
||||
#dampeningPercentage = 90
|
||||
## Configures the allowed variance range for worker utilization. The upper
|
||||
## limit is calculated as average * (1 + reBalanceThresholdPercentage/100).
|
||||
## The lower limit is average * (1 - reBalanceThresholdPercentage/100). If
|
||||
## any worker's utilization falls outside this range, lease re-balancing is
|
||||
## triggered. The re-balancing algorithm aims to bring variance within the
|
||||
## specified range. It also avoids thrashing by ensuring the utilization of
|
||||
## the worker receiving the load after re-balancing doesn't exceed the fleet
|
||||
## average. This might cause no re-balancing action even the utilization is
|
||||
## out of the variance range. The default value is 10, representing +/-10%
|
||||
## variance from the average value.
|
||||
#reBalanceThresholdPercentage = 5
|
||||
## Whether at-least one lease must be taken from a high utilization worker
|
||||
## during re-balancing when there is no lease assigned to that worker which has
|
||||
## throughput is less than or equal to the minimum throughput that needs to be
|
||||
## moved away from that worker to bring the worker back into the allowed variance.
|
||||
## Default is true.
|
||||
#allowThroughputOvershoot = false
|
||||
## Lease assignment is performed every failoverTimeMillis but re-balance will
|
||||
## be attempted only once in 5 times based on the below config. Default is 3.
|
||||
#varianceBalancingFrequency = 5
|
||||
## Alpha value used for calculating exponential moving average of worker's metricStats.
|
||||
## Default is 0.5, a higher alpha value will make re-balancing more sensitive
|
||||
## to recent metricStats.
|
||||
#workerMetricsEMAAlpha = 0.18
|
||||
## Duration after which workerMetricStats entry from WorkerMetricStats table will
|
||||
## be cleaned up. Default is 1 day.
|
||||
## Duration format examples: PT15M (15 mins) PT10H (10 hours) P2D (2 days)
|
||||
## Refer to Duration.parse javadocs for more details
|
||||
#staleWorkerMetricsEntryCleanupDuration = PT12H
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
<parent>
|
||||
<groupId>software.amazon.kinesis</groupId>
|
||||
<artifactId>amazon-kinesis-client-pom</artifactId>
|
||||
<version>2.6.1-SNAPSHOT</version>
|
||||
<version>3.0.0</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>amazon-kinesis-client</artifactId>
|
||||
|
|
@ -68,6 +68,18 @@
|
|||
<artifactId>dynamodb</artifactId>
|
||||
<version>${awssdk.version}</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/dynamodb-enhanced -->
|
||||
<dependency>
|
||||
<groupId>software.amazon.awssdk</groupId>
|
||||
<artifactId>dynamodb-enhanced</artifactId>
|
||||
<version>${awssdk.version}</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/com.amazonaws/dynamodb-lock-client -->
|
||||
<dependency>
|
||||
<groupId>com.amazonaws</groupId>
|
||||
<artifactId>dynamodb-lock-client</artifactId>
|
||||
<version>1.3.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>software.amazon.awssdk</groupId>
|
||||
<artifactId>cloudwatch</artifactId>
|
||||
|
|
@ -82,6 +94,12 @@
|
|||
<groupId>software.amazon.glue</groupId>
|
||||
<artifactId>schema-registry-serde</artifactId>
|
||||
<version>${gsr.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.amazonaws</groupId>
|
||||
<artifactId>aws-java-sdk-sts</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>software.amazon.glue</groupId>
|
||||
|
|
@ -103,11 +121,23 @@
|
|||
<artifactId>commons-lang3</artifactId>
|
||||
<version>3.14.0</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/commons-collections/commons-collections -->
|
||||
<dependency>
|
||||
<groupId>commons-collections</groupId>
|
||||
<artifactId>commons-collections</artifactId>
|
||||
<version>3.2.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.jetbrains/annotations -->
|
||||
<dependency>
|
||||
<groupId>org.jetbrains</groupId>
|
||||
<artifactId>annotations</artifactId>
|
||||
<version>26.0.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.reactivex.rxjava3</groupId>
|
||||
|
|
@ -123,35 +153,47 @@
|
|||
</dependency>
|
||||
|
||||
<!-- Test -->
|
||||
<!-- TODO: Migrate all tests to Junit5 -->
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-api</artifactId>
|
||||
<version>5.11.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-params</artifactId>
|
||||
<version>5.11.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- Using older version to be compatible with Java 8 -->
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<version>3.12.4</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.hamcrest</groupId>
|
||||
<artifactId>hamcrest-all</artifactId>
|
||||
<version>1.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
<!--<dependency>-->
|
||||
<!--<groupId>com.amazonaws</groupId>-->
|
||||
<!--<artifactId>DynamoDBLocal</artifactId>-->
|
||||
<!--<version>1.11.86</version>-->
|
||||
<!--<scope>test</scope>-->
|
||||
<!--</dependency>-->
|
||||
|
||||
<!-- Using older version to be compatible with Java 8 -->
|
||||
<!-- https://mvnrepository.com/artifact/com.amazonaws/DynamoDBLocal -->
|
||||
<dependency>
|
||||
<groupId>com.amazonaws</groupId>
|
||||
<artifactId>DynamoDBLocal</artifactId>
|
||||
<version>1.25.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
|
|
@ -162,11 +204,11 @@
|
|||
</dependencies>
|
||||
|
||||
<!--<repositories>-->
|
||||
<!--<repository>-->
|
||||
<!--<id>dynamodblocal</id>-->
|
||||
<!--<name>AWS DynamoDB Local Release Repository</name>-->
|
||||
<!--<url>https://s3-us-west-2.amazonaws.com/dynamodb-local/release</url>-->
|
||||
<!--</repository>-->
|
||||
<!--<repository>-->
|
||||
<!--<id>dynamodblocal</id>-->
|
||||
<!--<name>AWS DynamoDB Local Release Repository</name>-->
|
||||
<!--<url>https://s3-us-west-2.amazonaws.com/dynamodb-local/release</url>-->
|
||||
<!--</repository>-->
|
||||
<!--</repositories>-->
|
||||
|
||||
<developers>
|
||||
|
|
@ -203,20 +245,20 @@
|
|||
</pluginManagement>
|
||||
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.xolstice.maven.plugins</groupId>
|
||||
<artifactId>protobuf-maven-plugin</artifactId>
|
||||
<version>0.6.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
|
||||
</configuration>
|
||||
<plugin>
|
||||
<groupId>org.xolstice.maven.plugins</groupId>
|
||||
<artifactId>protobuf-maven-plugin</artifactId>
|
||||
<version>0.6.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
|
|
|
|||
610
amazon-kinesis-client/scripts/KclMigrationTool.py
Normal file
610
amazon-kinesis-client/scripts/KclMigrationTool.py
Normal file
|
|
@ -0,0 +1,610 @@
|
|||
"""
|
||||
Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
Licensed under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from enum import Enum
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
# DynamoDB table suffixes
|
||||
DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX = "-CoordinatorState"
|
||||
DEFAULT_WORKER_METRICS_TABLE_SUFFIX = "-WorkerMetricStats"
|
||||
|
||||
# DynamoDB attribute names and values
|
||||
CLIENT_VERSION_ATTR = 'cv'
|
||||
TIMESTAMP_ATTR = 'mts'
|
||||
MODIFIED_BY_ATTR = 'mb'
|
||||
HISTORY_ATTR = 'h'
|
||||
MIGRATION_KEY = "Migration3.0"
|
||||
|
||||
# GSI constants
|
||||
GSI_NAME = 'LeaseOwnerToLeaseKeyIndex'
|
||||
GSI_DELETION_WAIT_TIME_SECONDS = 120
|
||||
|
||||
config = Config(
|
||||
# TODO: parameterize
|
||||
region_name = 'us-east-1',
|
||||
retries = {
|
||||
'max_attempts': 10,
|
||||
'mode': 'standard'
|
||||
}
|
||||
)
|
||||
|
||||
# TODO: validate where these values came from. None of the originals seem to work.
|
||||
class KclClientVersion(Enum):
|
||||
VERSION_2X = "CLIENT_VERSION_2x"
|
||||
UPGRADE_FROM_2X = "CLIENT_VERSION_UPGRADE_FROM_2x"
|
||||
VERSION_3X_WITH_ROLLBACK = "CLIENT_VERSION_3x_WITH_ROLLBACK"
|
||||
VERSION_3X = "CLIENT_VERSION_3x"
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
def get_time_in_millis():
|
||||
return str(round(time.time() * 1000))
|
||||
|
||||
|
||||
def is_valid_version(version, mode):
|
||||
"""
|
||||
Validate if the given version is valid for the specified mode
|
||||
|
||||
:param version: The KCL client version to validate
|
||||
:param mode: Either 'rollback' or 'rollforward'
|
||||
:return: True if the version is valid for the given mode, False otherwise
|
||||
"""
|
||||
if mode == 'rollback':
|
||||
if version == KclClientVersion.VERSION_2X.value:
|
||||
print("Your KCL application already runs in a mode compatible with KCL 2.x. You can deploy the code with the previous KCL version if you still experience an issue.")
|
||||
return True
|
||||
if version in [KclClientVersion.UPGRADE_FROM_2X.value,
|
||||
KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]:
|
||||
return True
|
||||
if version == KclClientVersion.VERSION_3X.value:
|
||||
print("Cannot roll back the KCL application."
|
||||
" It is not in a state that supports rollback.")
|
||||
return False
|
||||
print("Migration to KCL 3.0 not in progress or application_name / coordinator_state_table_name is incorrect."
|
||||
" Please double check and run again with correct arguments.")
|
||||
return False
|
||||
|
||||
if mode == 'rollforward':
|
||||
if version == KclClientVersion.VERSION_2X.value:
|
||||
return True
|
||||
if version in [KclClientVersion.UPGRADE_FROM_2X.value,
|
||||
KclClientVersion.VERSION_3X_WITH_ROLLBACK.value]:
|
||||
print("Cannot roll-forward application. It is not in a rolled back state.")
|
||||
return False
|
||||
if version == KclClientVersion.VERSION_3X.value:
|
||||
print("Cannot roll-forward the KCL application."
|
||||
" Application has already migrated.")
|
||||
return False
|
||||
print("Cannot roll-forward because migration to KCL 3.0 is not in progress or application_name"
|
||||
" / coordinator_state_table_name is incorrect. Please double check and run again with correct arguments.")
|
||||
return False
|
||||
print(f"Invalid mode: {mode}. Mode must be either 'rollback' or 'rollforward'.")
|
||||
return False
|
||||
|
||||
|
||||
def handle_get_item_client_error(e, operation, table_name):
|
||||
"""
|
||||
Handle ClientError exceptions raised by get_item on given DynamoDB table
|
||||
|
||||
:param e: The ClientError exception object
|
||||
:param operation: Rollback or Roll-forward for logging the errors
|
||||
:param table_name: The name of the DynamoDB table where the error occurred
|
||||
"""
|
||||
error_code = e.response['Error']['Code']
|
||||
error_message = e.response['Error']['Message']
|
||||
print(f"{operation} could not be performed.")
|
||||
if error_code == 'ProvisionedThroughputExceededException':
|
||||
print(f"Throughput exceeded even after retries: {error_message}")
|
||||
else:
|
||||
print(f"Unexpected client error occurred: {error_code} - {error_message}")
|
||||
print("Please resolve the issue and run the KclMigrationTool again.")
|
||||
|
||||
|
||||
def table_exists(dynamodb_client, table_name):
|
||||
"""
|
||||
Check if a DynamoDB table exists.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param table_name: Name of the DynamoDB table to check
|
||||
:return: True if the table exists, False otherwise
|
||||
"""
|
||||
try:
|
||||
dynamodb_client.describe_table(TableName=table_name)
|
||||
return True
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||
print(f"Table '{table_name}' does not exist.")
|
||||
return False
|
||||
print(f"An error occurred while checking table '{table_name}': {e}.")
|
||||
return False
|
||||
|
||||
|
||||
def validate_tables(dynamodb_client, operation, coordinator_state_table_name, lease_table_name=None):
|
||||
"""
|
||||
Validate the existence of DynamoDB tables required for KCL operations
|
||||
|
||||
:param dynamodb_client: A boto3 DynamoDB client object
|
||||
:param operation: Rollback or Roll-forward for logging
|
||||
:param coordinator_state_table_name: Name of the coordinator state table
|
||||
:param lease_table_name: Name of the DynamoDB KCL lease table (optional)
|
||||
:return: True if all required tables exist, False otherwise
|
||||
"""
|
||||
if lease_table_name and not table_exists(dynamodb_client, lease_table_name):
|
||||
print(
|
||||
f"{operation} failed. Could not find a KCL Application DDB lease table "
|
||||
f"with name {lease_table_name}. Please pass in the correct application_name "
|
||||
"and/or lease_table_name that matches your KCL application configuration."
|
||||
)
|
||||
return False
|
||||
|
||||
if not table_exists(dynamodb_client, coordinator_state_table_name):
|
||||
print(
|
||||
f"{operation} failed. Could not find a coordinator state table "
|
||||
f"{coordinator_state_table_name}. Please pass in the correct application_name or"
|
||||
f" coordinator_state_table_name that matches your KCL application configuration."
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def add_current_state_to_history(item, max_history=10):
|
||||
"""
|
||||
Adds the current state of a DynamoDB item to its history attribute.
|
||||
Creates a new history entry from the current value and maintains a capped history list.
|
||||
|
||||
:param item: DynamoDB item to add history to
|
||||
:param max_history: Maximum number of history entries to maintain (default: 10)
|
||||
:return: Updated history attribute as a DynamoDB-formatted dictionary
|
||||
"""
|
||||
# Extract current values
|
||||
current_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown')
|
||||
current_modified_by = item.get(MODIFIED_BY_ATTR, {}).get('S', 'Unknown')
|
||||
current_time_in_millis = (
|
||||
item.get(TIMESTAMP_ATTR, {}).get('N', get_time_in_millis())
|
||||
)
|
||||
|
||||
# Create new history entry
|
||||
new_entry = {
|
||||
'M': {
|
||||
CLIENT_VERSION_ATTR: {'S': current_version},
|
||||
MODIFIED_BY_ATTR: {'S': current_modified_by},
|
||||
TIMESTAMP_ATTR: {'N': current_time_in_millis}
|
||||
}
|
||||
}
|
||||
|
||||
# Get existing history or create new if doesn't exist
|
||||
history_dict = item.get(f'{HISTORY_ATTR}', {'L': []})
|
||||
history_list = history_dict['L']
|
||||
|
||||
# Add new entry to the beginning of the list, capping at max_history
|
||||
history_list.insert(0, new_entry)
|
||||
history_list = history_list[:max_history]
|
||||
|
||||
return history_dict
|
||||
|
||||
|
||||
def get_current_state(dynamodb_client, table_name):
|
||||
"""
|
||||
Retrieve the current state from the DynamoDB table and prepare history update.
|
||||
Fetches the current item from the specified DynamoDB table,
|
||||
extracts the initial client version, and creates a new history entry.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param table_name: Name of the DynamoDB table to query
|
||||
:return: A tuple containing:
|
||||
- initial_version (str): The current client version, or 'Unknown' if not found
|
||||
- new_history (dict): Updated history including the current state
|
||||
"""
|
||||
response = dynamodb_client.get_item(
|
||||
TableName=table_name,
|
||||
Key={'key': {'S': MIGRATION_KEY}}
|
||||
)
|
||||
item = response.get('Item', {})
|
||||
initial_version = item.get(CLIENT_VERSION_ATTR, {}).get('S', 'Unknown')
|
||||
new_history = add_current_state_to_history(item)
|
||||
return initial_version, new_history
|
||||
|
||||
|
||||
def rollback_client_version(dynamodb_client, table_name, history):
|
||||
"""
|
||||
Update the client version in the coordinator state table to initiate rollback.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param table_name: Name of the coordinator state DDB table
|
||||
:param history: Updated history attribute as a DynamoDB-formatted dictionary
|
||||
:return: A tuple containing:
|
||||
- success (bool): True if client version was successfully updated, False otherwise
|
||||
- previous_version (str): The version that was replaced, or None if update failed
|
||||
"""
|
||||
try:
|
||||
print(f"Rolling back client version in table '{table_name}'...")
|
||||
update_response = dynamodb_client.update_item(
|
||||
TableName=table_name,
|
||||
Key={'key': {'S': MIGRATION_KEY}},
|
||||
UpdateExpression=(
|
||||
f"SET {CLIENT_VERSION_ATTR} = :rollback_client_version, "
|
||||
f"{TIMESTAMP_ATTR} = :updated_at, "
|
||||
f"{MODIFIED_BY_ATTR} = :modifier, "
|
||||
f"{HISTORY_ATTR} = :history"
|
||||
),
|
||||
ConditionExpression=(
|
||||
f"{CLIENT_VERSION_ATTR} IN ("
|
||||
":upgrade_from_2x_client_version, "
|
||||
":3x_with_rollback_client_version)"
|
||||
),
|
||||
ExpressionAttributeValues={
|
||||
':rollback_client_version': {'S': KclClientVersion.VERSION_2X.value},
|
||||
':updated_at': {'N': get_time_in_millis()},
|
||||
':modifier': {'S': 'KclMigrationTool-rollback'},
|
||||
':history': history,
|
||||
':upgrade_from_2x_client_version': (
|
||||
{'S': KclClientVersion.UPGRADE_FROM_2X.value}
|
||||
),
|
||||
':3x_with_rollback_client_version': (
|
||||
{'S': KclClientVersion.VERSION_3X_WITH_ROLLBACK.value}
|
||||
),
|
||||
},
|
||||
ReturnValues='UPDATED_OLD'
|
||||
)
|
||||
replaced_item = update_response.get('Attributes', {})
|
||||
replaced_version = replaced_item.get('cv', {}).get('S', '')
|
||||
return True, replaced_version
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'ConditionalCheckFailedException':
|
||||
print("Unable to rollback, as application is not in a state that allows rollback."
|
||||
"Ensure that the given application_name or coordinator_state_table_name is correct and"
|
||||
" you have followed all prior migration steps.")
|
||||
else:
|
||||
print(f"An unexpected error occurred while rolling back: {str(e)}"
|
||||
"Please resolve and run this migration script again.")
|
||||
return False, None
|
||||
|
||||
|
||||
def rollfoward_client_version(dynamodb_client, table_name, history):
|
||||
"""
|
||||
Update the client version in the coordinator state table to initiate roll-forward
|
||||
conditionally if application is currently in rolled back state.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param table_name: Name of the coordinator state DDB table
|
||||
:param history: Updated history attribute as a DynamoDB-formatted dictionary
|
||||
:return: True if client version was successfully updated, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Conditionally update client version
|
||||
dynamodb_client.update_item(
|
||||
TableName=table_name,
|
||||
Key={'key': {'S': MIGRATION_KEY}},
|
||||
UpdateExpression= (
|
||||
f"SET {CLIENT_VERSION_ATTR} = :rollforward_version, "
|
||||
f"{TIMESTAMP_ATTR} = :updated_at, "
|
||||
f"{MODIFIED_BY_ATTR} = :modifier, "
|
||||
f"{HISTORY_ATTR} = :new_history"
|
||||
),
|
||||
ConditionExpression=f"{CLIENT_VERSION_ATTR} = :kcl_2x_version",
|
||||
ExpressionAttributeValues={
|
||||
':rollforward_version': {'S': KclClientVersion.UPGRADE_FROM_2X.value},
|
||||
':updated_at': {'N': get_time_in_millis()},
|
||||
':modifier': {'S': 'KclMigrationTool-rollforward'},
|
||||
':new_history': history,
|
||||
':kcl_2x_version': {'S': KclClientVersion.VERSION_2X.value},
|
||||
}
|
||||
)
|
||||
print("Roll-forward has been initiated. KCL application will monitor for 3.0 readiness and"
|
||||
" automatically switch to 3.0 functionality when readiness criteria have been met.")
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'ConditionalCheckFailedException':
|
||||
print("Unable to roll-forward because application is not in rolled back state."
|
||||
" Ensure that the given application_name or coordinator_state_table_name is correct"
|
||||
" and you have followed all prior migration steps.")
|
||||
else:
|
||||
print(f"Unable to roll-forward due to error: {str(e)}. "
|
||||
"Please resolve and run this migration script again.")
|
||||
except Exception as e:
|
||||
print(f"Unable to roll-forward due to error: {str(e)}. "
|
||||
"Please resolve and run this migration script again.")
|
||||
|
||||
|
||||
def delete_gsi_if_exists(dynamodb_client, table_name):
|
||||
"""
|
||||
Deletes GSI on given lease table if it exists.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param table_name: Name of lease table to remove GSI from
|
||||
"""
|
||||
try:
|
||||
gsi_present = False
|
||||
response = dynamodb_client.describe_table(TableName=table_name)
|
||||
if 'GlobalSecondaryIndexes' in response['Table']:
|
||||
gsi_list = response['Table']['GlobalSecondaryIndexes']
|
||||
for gsi in gsi_list:
|
||||
if gsi['IndexName'] == GSI_NAME:
|
||||
gsi_present = True
|
||||
break
|
||||
|
||||
if not gsi_present:
|
||||
print(f"GSI {GSI_NAME} is not present on lease table {table_name}. It may already be successfully"
|
||||
" deleted. Or if lease table name is incorrect, please re-run the KclMigrationTool with correct"
|
||||
" application_name or lease_table_name.")
|
||||
return
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||
print(f"Lease table {table_name} does not exist, please check application_name or lease_table_name"
|
||||
" configuration and try again.")
|
||||
return
|
||||
else:
|
||||
print(f"An unexpected error occurred while checking if GSI {GSI_NAME} exists"
|
||||
f" on lease table {table_name}: {str(e)}. Please rectify the error and try again.")
|
||||
return
|
||||
|
||||
print(f"Deleting GSI '{GSI_NAME}' from table '{table_name}'...")
|
||||
try:
|
||||
dynamodb_client.update_table(
|
||||
TableName=table_name,
|
||||
GlobalSecondaryIndexUpdates=[
|
||||
{
|
||||
'Delete': {
|
||||
'IndexName': GSI_NAME
|
||||
}
|
||||
}
|
||||
]
|
||||
)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||
print(f"{GSI_NAME} not found or table '{table_name}' not found.")
|
||||
elif e.response['Error']['Code'] == 'ResourceInUseException':
|
||||
print(f"Unable to delete GSI: '{table_name}' is currently being modified.")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while deleting GSI {GSI_NAME} on lease table {table_name}: {str(e)}."
|
||||
" Please manually confirm the GSI is removed from the lease table, or"
|
||||
" resolve the error and rerun the migration script.")
|
||||
|
||||
|
||||
def delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name):
|
||||
"""
|
||||
Deletes worker metrics table based on application name, if it exists.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param worker_metrics_table_name: Name of the DynamoDB worker metrics table
|
||||
"""
|
||||
try:
|
||||
dynamodb_client.describe_table(TableName=worker_metrics_table_name)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'ResourceNotFoundException':
|
||||
print(f"Worker metrics table {worker_metrics_table_name} does not exist."
|
||||
" It may already be successfully deleted. Please check that the application_name"
|
||||
" or worker_metrics_table_name is correct. If not, correct this and rerun the migration script.")
|
||||
return
|
||||
else:
|
||||
print(f"An unexpected error occurred when checking if {worker_metrics_table_name} table exists: {str(e)}."
|
||||
" Please manually confirm the table is deleted, or resolve the error"
|
||||
" and rerun the migration script.")
|
||||
return
|
||||
|
||||
print(f"Deleting worker metrics table {worker_metrics_table_name}...")
|
||||
try:
|
||||
dynamodb_client.delete_table(TableName=worker_metrics_table_name)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'AccessDeniedException':
|
||||
print(f"No permissions to delete table {worker_metrics_table_name}. Please manually delete it if you"
|
||||
" want to avoid any charges until you are ready to rollforward with migration.")
|
||||
else:
|
||||
print(f"An unexpected client error occurred while deleting worker metrics table: {str(e)}."
|
||||
" Please manually confirm the table is deleted, or resolve the error"
|
||||
" and rerun the migration script.")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while deleting worker metrics table: {str(e)}."
|
||||
" Please manually confirm the table is deleted, or resolve the error"
|
||||
" and rerun the migration script.")
|
||||
|
||||
|
||||
def perform_rollback(dynamodb_client, lease_table_name, coordinator_state_table_name, worker_metrics_table_name):
|
||||
"""
|
||||
Perform KCL 3.0 migration rollback by updating MigrationState for the KCL application.
|
||||
Rolls client version back, removes GSI from lease table, deletes worker metrics table.
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param coordinator_state_table_name: Name of the DynamoDB coordinator state table
|
||||
:param coordinator_state_table_name: Name of the DynamoDB coordinator state table
|
||||
:param worker_metrics_table_name: Name of the DynamoDB worker metrics table
|
||||
"""
|
||||
if not validate_tables(dynamodb_client, "Rollback", coordinator_state_table_name, lease_table_name):
|
||||
return
|
||||
|
||||
try:
|
||||
initial_version, new_history = get_current_state(dynamodb_client,
|
||||
coordinator_state_table_name)
|
||||
except ClientError as e:
|
||||
handle_get_item_client_error(e, "Rollback", coordinator_state_table_name)
|
||||
return
|
||||
|
||||
if not is_valid_version(version=initial_version, mode='rollback'):
|
||||
return
|
||||
|
||||
# 1. Rollback client version
|
||||
if initial_version != KclClientVersion.VERSION_2X.value:
|
||||
rollback_succeeded, initial_version = rollback_client_version(
|
||||
dynamodb_client, coordinator_state_table_name, new_history
|
||||
)
|
||||
if not rollback_succeeded:
|
||||
return
|
||||
|
||||
print(f"Waiting for {GSI_DELETION_WAIT_TIME_SECONDS} seconds before cleaning up KCL 3.0 resources after rollback...")
|
||||
time.sleep(GSI_DELETION_WAIT_TIME_SECONDS)
|
||||
|
||||
# 2. Delete the GSI
|
||||
delete_gsi_if_exists(dynamodb_client, lease_table_name)
|
||||
|
||||
# 3. Delete worker metrics table
|
||||
delete_worker_metrics_table_if_exists(dynamodb_client, worker_metrics_table_name)
|
||||
|
||||
# Log success
|
||||
if initial_version == KclClientVersion.UPGRADE_FROM_2X.value:
|
||||
print("\nRollback completed. Your application was running 2x compatible functionality.")
|
||||
print("Please rollback to your previous application binaries by deploying the code with your previous KCL version.")
|
||||
elif initial_version == KclClientVersion.VERSION_3X_WITH_ROLLBACK.value:
|
||||
print("\nRollback completed. Your KCL Application was running 3x functionality and will rollback to 2x compatible functionality.")
|
||||
print("If you don't see mitigation after a short period of time,"
|
||||
" please rollback to your previous application binaries by deploying the code with your previous KCL version.")
|
||||
elif initial_version == KclClientVersion.VERSION_2X.value:
|
||||
print("\nApplication was already rolled back. Any KCLv3 resources that could be deleted were cleaned up"
|
||||
" to avoid charges until the application can be rolled forward with migration.")
|
||||
|
||||
|
||||
def perform_rollforward(dynamodb_client, coordinator_state_table_name):
|
||||
"""
|
||||
Perform KCL 3.0 migration roll-forward by updating MigrationState for the KCL application
|
||||
|
||||
:param dynamodb_client: Boto3 DynamoDB client
|
||||
:param coordinator_state_table_name: Name of the DynamoDB table
|
||||
"""
|
||||
if not validate_tables(dynamodb_client, "Roll-forward", coordinator_state_table_name):
|
||||
return
|
||||
|
||||
try:
|
||||
initial_version, new_history = get_current_state(dynamodb_client,
|
||||
coordinator_state_table_name)
|
||||
except ClientError as e:
|
||||
handle_get_item_client_error(e, "Roll-forward", coordinator_state_table_name)
|
||||
return
|
||||
|
||||
if not is_valid_version(version=initial_version, mode='rollforward'):
|
||||
return
|
||||
|
||||
rollfoward_client_version(dynamodb_client, coordinator_state_table_name, new_history)
|
||||
|
||||
|
||||
def run_kcl_migration(mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name):
|
||||
"""
|
||||
Update the MigrationState in CoordinatorState DDB Table
|
||||
|
||||
:param mode: Either 'rollback' or 'rollforward'
|
||||
:param lease_table_name: Name of the DynamoDB KCL lease table
|
||||
:param coordinator_state_table_name: Name of the DynamoDB coordinator state table
|
||||
:param worker_metrics_table_name: Name of the DynamoDB worker metrics table
|
||||
"""
|
||||
dynamodb_client = boto3.client('dynamodb', config=config)
|
||||
|
||||
if mode == "rollback":
|
||||
perform_rollback(
|
||||
dynamodb_client,
|
||||
lease_table_name,
|
||||
coordinator_state_table_name,
|
||||
worker_metrics_table_name
|
||||
)
|
||||
elif mode == "rollforward":
|
||||
perform_rollforward(dynamodb_client, coordinator_state_table_name)
|
||||
else:
|
||||
print(f"Invalid mode: {mode}. Please use 'rollback' or 'rollforward'.")
|
||||
|
||||
|
||||
def validate_args(args):
|
||||
if args.mode == 'rollforward':
|
||||
if not (args.application_name or args.coordinator_state_table_name):
|
||||
raise ValueError(
|
||||
"For rollforward mode, either application_name or "
|
||||
"coordinator_state_table_name must be provided."
|
||||
)
|
||||
else:
|
||||
if args.application_name:
|
||||
return
|
||||
|
||||
if not (args.lease_table_name and
|
||||
args.coordinator_state_table_name and
|
||||
args.worker_metrics_table_name):
|
||||
raise ValueError(
|
||||
"For rollback mode, either application_name or all three table names "
|
||||
"(lease_table_name, coordinator_state_table_name, and "
|
||||
"worker_metrics_table_name) must be provided."
|
||||
)
|
||||
|
||||
def process_table_names(args):
|
||||
"""
|
||||
Process command line arguments to determine table names based on mode.
|
||||
Args:
|
||||
args: Parsed command line arguments
|
||||
Returns:
|
||||
tuple: (mode, lease_table_name, coordinator_state_table_name, worker_metrics_table_name)
|
||||
"""
|
||||
mode_input = args.mode
|
||||
application_name_input = args.application_name
|
||||
|
||||
coordinator_state_table_name_input = (args.coordinator_state_table_name or
|
||||
application_name_input + DEFAULT_COORDINATOR_STATE_TABLE_SUFFIX)
|
||||
lease_table_name_input = None
|
||||
worker_metrics_table_name_input = None
|
||||
|
||||
if mode_input == "rollback":
|
||||
lease_table_name_input = args.lease_table_name or application_name_input
|
||||
worker_metrics_table_name_input = (args.worker_metrics_table_name or
|
||||
application_name_input + DEFAULT_WORKER_METRICS_TABLE_SUFFIX)
|
||||
|
||||
return (mode_input,
|
||||
lease_table_name_input,
|
||||
coordinator_state_table_name_input,
|
||||
worker_metrics_table_name_input)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description=
|
||||
"""
|
||||
KCL Migration Tool
|
||||
This tool facilitates the migration and rollback processes for Amazon KCLv3 applications.
|
||||
|
||||
Before running this tool:
|
||||
1. Ensure you have the necessary AWS permissions configured to access and modify the following:
|
||||
- KCL application DynamoDB tables (lease table and coordinator state table)
|
||||
|
||||
2. Verify that your AWS credentials are properly set up in your environment or AWS config file.
|
||||
|
||||
3. Confirm that you have the correct KCL application name and lease table name (if configured in KCL).
|
||||
|
||||
Usage:
|
||||
This tool supports two main operations: rollforward (upgrade) and rollback.
|
||||
For detailed usage instructions, use the -h or --help option.
|
||||
""",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument("--mode", choices=['rollback', 'rollforward'], required=True,
|
||||
help="Mode of operation: rollback or rollforward")
|
||||
parser.add_argument("--application_name",
|
||||
help="Name of the KCL application. This must match the application name "
|
||||
"used in the KCL Library configurations.")
|
||||
parser.add_argument("--lease_table_name",
|
||||
help="Name of the DynamoDB lease table (defaults to applicationName)."
|
||||
" If LeaseTable name was specified for the application as part of "
|
||||
"the KCL configurations, the same name must be passed here.")
|
||||
parser.add_argument("--coordinator_state_table_name",
|
||||
help="Name of the DynamoDB coordinator state table "
|
||||
"(defaults to applicationName-CoordinatorState)."
|
||||
" If coordinator state table name was specified for the application "
|
||||
"as part of the KCL configurations, the same name must be passed here.")
|
||||
parser.add_argument("--worker_metrics_table_name",
|
||||
help="Name of the DynamoDB worker metrics table "
|
||||
"(defaults to applicationName-WorkerMetricStats)."
|
||||
" If worker metrics table name was specified for the application "
|
||||
"as part of the KCL configurations, the same name must be passed here.")
|
||||
|
||||
args = parser.parse_args()
|
||||
validate_args(args)
|
||||
run_kcl_migration(*process_table_names(args))
|
||||
|
|
@ -256,7 +256,8 @@ public class ConfigsBuilder {
|
|||
* @return LeaseManagementConfig
|
||||
*/
|
||||
public LeaseManagementConfig leaseManagementConfig() {
|
||||
return new LeaseManagementConfig(tableName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
|
||||
return new LeaseManagementConfig(
|
||||
tableName(), applicationName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.common;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.Accessors;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.awssdk.services.dynamodb.model.Tag;
|
||||
|
||||
/**
|
||||
* Configurations of a DDB table created by KCL for its internal operations.
|
||||
*/
|
||||
@Data
|
||||
@Accessors(fluent = true)
|
||||
@NoArgsConstructor
|
||||
public class DdbTableConfig {
|
||||
|
||||
protected DdbTableConfig(final String applicationName, final String tableSuffix) {
|
||||
this.tableName = applicationName + "-" + tableSuffix;
|
||||
}
|
||||
|
||||
/**
|
||||
* name to use for the DDB table. If null, it will default to
|
||||
* applicationName-tableSuffix. If multiple KCL applications
|
||||
* run in the same account, a unique tableName must be provided.
|
||||
*/
|
||||
private String tableName;
|
||||
|
||||
/**
|
||||
* Billing mode used to create the DDB table.
|
||||
*/
|
||||
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
|
||||
|
||||
/**
|
||||
* read capacity to provision during DDB table creation,
|
||||
* if billing mode is PROVISIONED.
|
||||
*/
|
||||
private long readCapacity;
|
||||
|
||||
/**
|
||||
* write capacity to provision during DDB table creation,
|
||||
* if billing mode is PROVISIONED.
|
||||
*/
|
||||
private long writeCapacity;
|
||||
|
||||
/**
|
||||
* Flag to enable Point in Time Recovery on the DDB table.
|
||||
*/
|
||||
private boolean pointInTimeRecoveryEnabled = false;
|
||||
|
||||
/**
|
||||
* Flag to enable deletion protection on the DDB table.
|
||||
*/
|
||||
private boolean deletionProtectionEnabled = false;
|
||||
|
||||
/**
|
||||
* Tags to add to the DDB table.
|
||||
*/
|
||||
private Collection<Tag> tags = Collections.emptyList();
|
||||
}
|
||||
|
|
@ -15,10 +15,13 @@
|
|||
package software.amazon.kinesis.common;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
public class FutureUtils {
|
||||
|
||||
|
|
@ -31,4 +34,15 @@ public class FutureUtils {
|
|||
throw te;
|
||||
}
|
||||
}
|
||||
|
||||
public static <T> T unwrappingFuture(final Supplier<CompletableFuture<T>> supplier) {
|
||||
try {
|
||||
return supplier.get().join();
|
||||
} catch (CompletionException e) {
|
||||
if (e.getCause() instanceof RuntimeException) {
|
||||
throw (RuntimeException) e.getCause();
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2019 Amazon.com, Inc. or its affiliates.
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
|
|
@ -12,18 +12,16 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.common;
|
||||
|
||||
package software.amazon.kinesis.leases.dynamodb;
|
||||
public class StackTraceUtils {
|
||||
public static String getPrintableStackTrace(final StackTraceElement[] stackTrace) {
|
||||
final StringBuilder stackTraceString = new StringBuilder();
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.NoArgsConstructor;
|
||||
for (final StackTraceElement traceElement : stackTrace) {
|
||||
stackTraceString.append("\tat ").append(traceElement).append("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is just a holder for initial lease table IOPs units. This class will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
@NoArgsConstructor(access = AccessLevel.PRIVATE)
|
||||
public class TableConstants {
|
||||
public static final long DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY = 10L;
|
||||
public static final long DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY = 10L;
|
||||
return stackTraceString.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ package software.amazon.kinesis.coordinator;
|
|||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.Accessors;
|
||||
import software.amazon.kinesis.common.DdbTableConfig;
|
||||
import software.amazon.kinesis.leases.NoOpShardPrioritization;
|
||||
import software.amazon.kinesis.leases.ShardPrioritization;
|
||||
|
||||
|
|
@ -27,6 +28,14 @@ import software.amazon.kinesis.leases.ShardPrioritization;
|
|||
@Data
|
||||
@Accessors(fluent = true)
|
||||
public class CoordinatorConfig {
|
||||
|
||||
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
|
||||
|
||||
public CoordinatorConfig(final String applicationName) {
|
||||
this.applicationName = applicationName;
|
||||
this.coordinatorStateTableConfig = new CoordinatorStateTableConfig(applicationName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Application name used by checkpointer to checkpoint.
|
||||
*
|
||||
|
|
@ -96,4 +105,53 @@ public class CoordinatorConfig {
|
|||
* <p>Default value: 1000L</p>
|
||||
*/
|
||||
private long schedulerInitializationBackoffTimeMillis = 1000L;
|
||||
|
||||
/**
|
||||
* Version the KCL needs to operate in. For more details check the KCLv3 migration
|
||||
* documentation.
|
||||
*/
|
||||
public enum ClientVersionConfig {
|
||||
/**
|
||||
* For an application that was operating with previous KCLv2.x, during
|
||||
* upgrade to KCLv3.x, a migration process is needed due to the incompatible
|
||||
* changes between the 2 versions. During the migration process, application
|
||||
* must use ClientVersion=CLIENT_VERSION_COMPATIBLE_WITH_2x so that it runs in
|
||||
* a compatible mode until all workers in the cluster have upgraded to the version
|
||||
* running 3.x version (which is determined based on workers emitting WorkerMetricStats)
|
||||
* Once all known workers are in 3.x mode, the library auto toggles to 3.x mode;
|
||||
* but prior to that it runs in a mode compatible with 2.x workers.
|
||||
* This version also allows rolling back to the compatible mode from the
|
||||
* auto-toggled 3.x mode.
|
||||
*/
|
||||
CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X,
|
||||
/**
|
||||
* A new application operating with KCLv3.x will use this value. Also, an application
|
||||
* that has successfully upgraded to 3.x version and no longer needs the ability
|
||||
* for a rollback to a 2.x compatible version, will use this value. In this version,
|
||||
* KCL will operate with new algorithms introduced in 3.x which is not compatible
|
||||
* with prior versions. And once in this version, rollback to 2.x is not supported.
|
||||
*/
|
||||
CLIENT_VERSION_CONFIG_3X,
|
||||
}
|
||||
|
||||
/**
|
||||
* Client version KCL must operate in, by default it operates in 3.x version which is not
|
||||
* compatible with prior versions.
|
||||
*/
|
||||
private ClientVersionConfig clientVersionConfig = ClientVersionConfig.CLIENT_VERSION_CONFIG_3X;
|
||||
|
||||
public static class CoordinatorStateTableConfig extends DdbTableConfig {
|
||||
private CoordinatorStateTableConfig(final String applicationName) {
|
||||
super(applicationName, "CoordinatorState");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration to control how the CoordinatorState DDB table is created, such as table name,
|
||||
* billing mode, provisioned capacity. If no table name is specified, the table name will
|
||||
* default to applicationName-CoordinatorState. If no billing more is chosen, default is
|
||||
* On-Demand.
|
||||
*/
|
||||
@NonNull
|
||||
private final CoordinatorStateTableConfig coordinatorStateTableConfig;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
|
||||
/**
|
||||
* DataModel for CoordinatorState, this data model is used to store various state information required
|
||||
* for coordination across the KCL worker fleet. Therefore, the model follows a flexible schema.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor(access = AccessLevel.PRIVATE)
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
public class CoordinatorState {
|
||||
public static final String COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME = "key";
|
||||
|
||||
/**
|
||||
* Key value for the item in the CoordinatorState table used for leader
|
||||
* election among the KCL workers. The attributes relevant to this item
|
||||
* is dictated by the DDB Lock client implementation that is used to
|
||||
* provide mutual exclusion.
|
||||
*/
|
||||
public static final String LEADER_HASH_KEY = "Leader";
|
||||
|
||||
private String key;
|
||||
|
||||
private Map<String, AttributeValue> attributes;
|
||||
}
|
||||
|
|
@ -0,0 +1,425 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions;
|
||||
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions.AmazonDynamoDBLockClientOptionsBuilder;
|
||||
import lombok.NonNull;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.MapUtils;
|
||||
import software.amazon.awssdk.core.waiters.WaiterResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException;
|
||||
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DynamoDbException;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
|
||||
import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
|
||||
import software.amazon.awssdk.services.dynamodb.model.KeyType;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException;
|
||||
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.TableDescription;
|
||||
import software.amazon.awssdk.services.dynamodb.model.TableStatus;
|
||||
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter;
|
||||
import software.amazon.awssdk.utils.CollectionUtils;
|
||||
import software.amazon.kinesis.common.FutureUtils;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
|
||||
import software.amazon.kinesis.coordinator.migration.MigrationState;
|
||||
import software.amazon.kinesis.leases.DynamoUtils;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
import software.amazon.kinesis.utils.DdbUtil;
|
||||
|
||||
import static java.util.Objects.nonNull;
|
||||
import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture;
|
||||
import static software.amazon.kinesis.coordinator.CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME;
|
||||
|
||||
/**
|
||||
* Data Access Object to abstract accessing {@link CoordinatorState} from
|
||||
* the CoordinatorState DDB table.
|
||||
*/
|
||||
@Slf4j
|
||||
public class CoordinatorStateDAO {
|
||||
private final DynamoDbAsyncClient dynamoDbAsyncClient;
|
||||
private final DynamoDbClient dynamoDbSyncClient;
|
||||
|
||||
private final CoordinatorStateTableConfig config;
|
||||
|
||||
public CoordinatorStateDAO(
|
||||
final DynamoDbAsyncClient dynamoDbAsyncClient, final CoordinatorStateTableConfig config) {
|
||||
this.dynamoDbAsyncClient = dynamoDbAsyncClient;
|
||||
this.config = config;
|
||||
this.dynamoDbSyncClient = createDelegateClient();
|
||||
}
|
||||
|
||||
public void initialize() throws DependencyException {
|
||||
createTableIfNotExists();
|
||||
}
|
||||
|
||||
private DynamoDbClient createDelegateClient() {
|
||||
return new DynamoDbAsyncToSyncClientAdapter(dynamoDbAsyncClient);
|
||||
}
|
||||
|
||||
public AmazonDynamoDBLockClientOptionsBuilder getDDBLockClientOptionsBuilder() {
|
||||
return AmazonDynamoDBLockClientOptions.builder(dynamoDbSyncClient, config.tableName())
|
||||
.withPartitionKeyName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all the {@link CoordinatorState} from the DDB table synchronously
|
||||
*
|
||||
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||
* @throws InvalidStateException if ddb table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||
*
|
||||
* @return list of state
|
||||
*/
|
||||
public List<CoordinatorState> listCoordinatorState()
|
||||
throws ProvisionedThroughputException, DependencyException, InvalidStateException {
|
||||
log.debug("Listing coordinatorState");
|
||||
|
||||
final ScanRequest request =
|
||||
ScanRequest.builder().tableName(config.tableName()).build();
|
||||
|
||||
try {
|
||||
ScanResponse response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(request));
|
||||
final List<CoordinatorState> stateList = new ArrayList<>();
|
||||
while (Objects.nonNull(response)) {
|
||||
log.debug("Scan response {}", response);
|
||||
|
||||
response.items().stream().map(this::fromDynamoRecord).forEach(stateList::add);
|
||||
if (!CollectionUtils.isNullOrEmpty(response.lastEvaluatedKey())) {
|
||||
final ScanRequest continuationRequest = request.toBuilder()
|
||||
.exclusiveStartKey(response.lastEvaluatedKey())
|
||||
.build();
|
||||
log.debug("Scan request {}", continuationRequest);
|
||||
response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(continuationRequest));
|
||||
} else {
|
||||
log.debug("Scan finished");
|
||||
response = null;
|
||||
}
|
||||
}
|
||||
return stateList;
|
||||
} catch (final ProvisionedThroughputExceededException e) {
|
||||
log.warn(
|
||||
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||
+ " on the table.",
|
||||
config.tableName());
|
||||
throw new ProvisionedThroughputException(e);
|
||||
} catch (final ResourceNotFoundException e) {
|
||||
throw new InvalidStateException(
|
||||
String.format("Cannot list coordinatorState, because table %s does not exist", config.tableName()));
|
||||
} catch (final DynamoDbException e) {
|
||||
throw new DependencyException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link CoordinatorState} if it does not exist.
|
||||
* @param state the state to create
|
||||
* @return true if state was created, false if it already exists
|
||||
*
|
||||
* @throws DependencyException if DynamoDB put fails in an unexpected way
|
||||
* @throws InvalidStateException if lease table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB put fails due to lack of capacity
|
||||
*/
|
||||
public boolean createCoordinatorStateIfNotExists(final CoordinatorState state)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
log.debug("Creating coordinatorState {}", state);
|
||||
|
||||
final PutItemRequest request = PutItemRequest.builder()
|
||||
.tableName(config.tableName())
|
||||
.item(toDynamoRecord(state))
|
||||
.expected(getDynamoNonExistentExpectation())
|
||||
.build();
|
||||
|
||||
try {
|
||||
FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.putItem(request));
|
||||
} catch (final ConditionalCheckFailedException e) {
|
||||
log.info("Not creating coordinator state because the key already exists");
|
||||
return false;
|
||||
} catch (final ProvisionedThroughputExceededException e) {
|
||||
log.warn(
|
||||
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||
+ " on the table.",
|
||||
config.tableName());
|
||||
throw new ProvisionedThroughputException(e);
|
||||
} catch (final ResourceNotFoundException e) {
|
||||
throw new InvalidStateException(String.format(
|
||||
"Cannot create coordinatorState %s, because table %s does not exist", state, config.tableName()));
|
||||
} catch (final DynamoDbException e) {
|
||||
throw new DependencyException(e);
|
||||
}
|
||||
|
||||
log.info("Created CoordinatorState: {}", state);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key Get the CoordinatorState for this key
|
||||
*
|
||||
* @throws InvalidStateException if ddb table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB get fails due to lack of capacity
|
||||
* @throws DependencyException if DynamoDB get fails in an unexpected way
|
||||
*
|
||||
* @return state for the specified key, or null if one doesn't exist
|
||||
*/
|
||||
public CoordinatorState getCoordinatorState(@NonNull final String key)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
log.debug("Getting coordinatorState with key {}", key);
|
||||
|
||||
final GetItemRequest request = GetItemRequest.builder()
|
||||
.tableName(config.tableName())
|
||||
.key(getCoordinatorStateKey(key))
|
||||
.consistentRead(true)
|
||||
.build();
|
||||
|
||||
try {
|
||||
final GetItemResponse result = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.getItem(request));
|
||||
|
||||
final Map<String, AttributeValue> dynamoRecord = result.item();
|
||||
if (CollectionUtils.isNullOrEmpty(dynamoRecord)) {
|
||||
log.debug("No coordinatorState found with key {}, returning null.", key);
|
||||
return null;
|
||||
}
|
||||
return fromDynamoRecord(dynamoRecord);
|
||||
} catch (final ProvisionedThroughputExceededException e) {
|
||||
log.warn(
|
||||
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||
+ " on the table.",
|
||||
config.tableName());
|
||||
throw new ProvisionedThroughputException(e);
|
||||
} catch (final ResourceNotFoundException e) {
|
||||
throw new InvalidStateException(String.format(
|
||||
"Cannot get coordinatorState for key %s, because table %s does not exist",
|
||||
key, config.tableName()));
|
||||
} catch (final DynamoDbException e) {
|
||||
throw new DependencyException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update fields of the given coordinator state in DynamoDB. Conditional on the provided expectation.
|
||||
*
|
||||
* @return true if update succeeded, false otherwise when expectations are not met
|
||||
*
|
||||
* @throws InvalidStateException if table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
|
||||
* @throws DependencyException if DynamoDB update fails in an unexpected way
|
||||
*/
|
||||
public boolean updateCoordinatorStateWithExpectation(
|
||||
@NonNull final CoordinatorState state, final Map<String, ExpectedAttributeValue> expectations)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
final Map<String, ExpectedAttributeValue> expectationMap = getDynamoExistentExpectation(state.getKey());
|
||||
expectationMap.putAll(MapUtils.emptyIfNull(expectations));
|
||||
|
||||
final Map<String, AttributeValueUpdate> updateMap = getDynamoCoordinatorStateUpdate(state);
|
||||
|
||||
final UpdateItemRequest request = UpdateItemRequest.builder()
|
||||
.tableName(config.tableName())
|
||||
.key(getCoordinatorStateKey(state.getKey()))
|
||||
.expected(expectationMap)
|
||||
.attributeUpdates(updateMap)
|
||||
.build();
|
||||
|
||||
try {
|
||||
FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.updateItem(request));
|
||||
} catch (final ConditionalCheckFailedException e) {
|
||||
log.debug("CoordinatorState update {} failed because conditions were not met", state);
|
||||
return false;
|
||||
} catch (final ProvisionedThroughputExceededException e) {
|
||||
log.warn(
|
||||
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
|
||||
+ " on the table.",
|
||||
config.tableName());
|
||||
throw new ProvisionedThroughputException(e);
|
||||
} catch (final ResourceNotFoundException e) {
|
||||
throw new InvalidStateException(String.format(
|
||||
"Cannot update coordinatorState for key %s, because table %s does not exist",
|
||||
state.getKey(), config.tableName()));
|
||||
} catch (final DynamoDbException e) {
|
||||
throw new DependencyException(e);
|
||||
}
|
||||
|
||||
log.info("Coordinator state updated {}", state);
|
||||
return true;
|
||||
}
|
||||
|
||||
private void createTableIfNotExists() throws DependencyException {
|
||||
TableDescription tableDescription = getTableDescription();
|
||||
if (tableDescription == null) {
|
||||
final CreateTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.createTable(getRequest()));
|
||||
tableDescription = response.tableDescription();
|
||||
log.info("DDB Table: {} created", config.tableName());
|
||||
} else {
|
||||
log.info("Skipping DDB table {} creation as it already exists", config.tableName());
|
||||
}
|
||||
|
||||
if (tableDescription.tableStatus() != TableStatus.ACTIVE) {
|
||||
log.info("Waiting for DDB Table: {} to become active", config.tableName());
|
||||
try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) {
|
||||
final WaiterResponse<DescribeTableResponse> response =
|
||||
unwrappingFuture(() -> waiter.waitUntilTableExists(
|
||||
r -> r.tableName(config.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10))));
|
||||
response.matched()
|
||||
.response()
|
||||
.orElseThrow(() -> new DependencyException(new IllegalStateException(
|
||||
"Creating CoordinatorState table timed out",
|
||||
response.matched().exception().orElse(null))));
|
||||
}
|
||||
unwrappingFuture(() -> DdbUtil.pitrEnabler(config, dynamoDbAsyncClient));
|
||||
}
|
||||
}
|
||||
|
||||
private CreateTableRequest getRequest() {
|
||||
final CreateTableRequest.Builder requestBuilder = CreateTableRequest.builder()
|
||||
.tableName(config.tableName())
|
||||
.keySchema(KeySchemaElement.builder()
|
||||
.attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
|
||||
.keyType(KeyType.HASH)
|
||||
.build())
|
||||
.attributeDefinitions(AttributeDefinition.builder()
|
||||
.attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
|
||||
.attributeType(ScalarAttributeType.S)
|
||||
.build())
|
||||
.deletionProtectionEnabled(config.deletionProtectionEnabled());
|
||||
|
||||
if (nonNull(config.tags()) && !config.tags().isEmpty()) {
|
||||
requestBuilder.tags(config.tags());
|
||||
}
|
||||
|
||||
switch (config.billingMode()) {
|
||||
case PAY_PER_REQUEST:
|
||||
requestBuilder.billingMode(BillingMode.PAY_PER_REQUEST);
|
||||
break;
|
||||
case PROVISIONED:
|
||||
requestBuilder.billingMode(BillingMode.PROVISIONED);
|
||||
|
||||
final ProvisionedThroughput throughput = ProvisionedThroughput.builder()
|
||||
.readCapacityUnits(config.readCapacity())
|
||||
.writeCapacityUnits(config.writeCapacity())
|
||||
.build();
|
||||
requestBuilder.provisionedThroughput(throughput);
|
||||
break;
|
||||
}
|
||||
return requestBuilder.build();
|
||||
}
|
||||
|
||||
private Map<String, AttributeValue> getCoordinatorStateKey(@NonNull final String key) {
|
||||
return Collections.singletonMap(
|
||||
COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(key));
|
||||
}
|
||||
|
||||
private CoordinatorState fromDynamoRecord(final Map<String, AttributeValue> dynamoRecord) {
|
||||
final HashMap<String, AttributeValue> attributes = new HashMap<>(dynamoRecord);
|
||||
final String keyValue =
|
||||
DynamoUtils.safeGetString(attributes.remove(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME));
|
||||
|
||||
final MigrationState migrationState = MigrationState.deserialize(keyValue, attributes);
|
||||
if (migrationState != null) {
|
||||
log.debug("Retrieved MigrationState {}", migrationState);
|
||||
return migrationState;
|
||||
}
|
||||
|
||||
final CoordinatorState c =
|
||||
CoordinatorState.builder().key(keyValue).attributes(attributes).build();
|
||||
log.debug("Retrieved coordinatorState {}", c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
private Map<String, AttributeValue> toDynamoRecord(final CoordinatorState state) {
|
||||
final Map<String, AttributeValue> result = new HashMap<>();
|
||||
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(state.getKey()));
|
||||
if (state instanceof MigrationState) {
|
||||
result.putAll(((MigrationState) state).serialize());
|
||||
}
|
||||
if (!CollectionUtils.isNullOrEmpty(state.getAttributes())) {
|
||||
result.putAll(state.getAttributes());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private Map<String, ExpectedAttributeValue> getDynamoNonExistentExpectation() {
|
||||
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||
|
||||
final ExpectedAttributeValue expectedAV =
|
||||
ExpectedAttributeValue.builder().exists(false).build();
|
||||
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private Map<String, ExpectedAttributeValue> getDynamoExistentExpectation(final String keyValue) {
|
||||
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||
|
||||
final ExpectedAttributeValue expectedAV = ExpectedAttributeValue.builder()
|
||||
.value(AttributeValue.fromS(keyValue))
|
||||
.build();
|
||||
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private Map<String, AttributeValueUpdate> getDynamoCoordinatorStateUpdate(final CoordinatorState state) {
|
||||
final HashMap<String, AttributeValueUpdate> updates = new HashMap<>();
|
||||
if (state instanceof MigrationState) {
|
||||
updates.putAll(((MigrationState) state).getDynamoUpdate());
|
||||
}
|
||||
state.getAttributes()
|
||||
.forEach((attribute, value) -> updates.put(
|
||||
attribute,
|
||||
AttributeValueUpdate.builder()
|
||||
.value(value)
|
||||
.action(AttributeAction.PUT)
|
||||
.build()));
|
||||
return updates;
|
||||
}
|
||||
|
||||
private TableDescription getTableDescription() {
|
||||
try {
|
||||
final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(
|
||||
DescribeTableRequest.builder().tableName(config.tableName()).build()));
|
||||
return response.table();
|
||||
} catch (final ResourceNotFoundException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -28,12 +28,17 @@ import java.util.function.BooleanSupplier;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.awssdk.utils.CollectionUtils;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
/**
|
||||
* An implementation of the {@code LeaderDecider} to elect leader(s) based on workerId.
|
||||
|
|
@ -46,7 +51,7 @@ import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
|||
* This ensures redundancy for shard-sync during host failures.
|
||||
*/
|
||||
@Slf4j
|
||||
class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||
public class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
||||
// Fixed seed so that the shuffle order is preserved across workers
|
||||
static final int DETERMINISTIC_SHUFFLE_SEED = 1947;
|
||||
|
||||
|
|
@ -59,6 +64,7 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
|||
private final LeaseRefresher leaseRefresher;
|
||||
private final int numPeriodicShardSyncWorkers;
|
||||
private final ScheduledExecutorService leaderElectionThreadPool;
|
||||
private final MetricsFactory metricsFactory;
|
||||
|
||||
private volatile Set<String> leaders;
|
||||
|
||||
|
|
@ -67,11 +73,17 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
|||
* @param leaderElectionThreadPool Thread-pool to be used for leaderElection.
|
||||
* @param numPeriodicShardSyncWorkers Number of leaders that will be elected to perform periodic shard syncs.
|
||||
*/
|
||||
DeterministicShuffleShardSyncLeaderDecider(
|
||||
public DeterministicShuffleShardSyncLeaderDecider(
|
||||
LeaseRefresher leaseRefresher,
|
||||
ScheduledExecutorService leaderElectionThreadPool,
|
||||
int numPeriodicShardSyncWorkers) {
|
||||
this(leaseRefresher, leaderElectionThreadPool, numPeriodicShardSyncWorkers, new ReentrantReadWriteLock());
|
||||
int numPeriodicShardSyncWorkers,
|
||||
MetricsFactory metricsFactory) {
|
||||
this(
|
||||
leaseRefresher,
|
||||
leaderElectionThreadPool,
|
||||
numPeriodicShardSyncWorkers,
|
||||
new ReentrantReadWriteLock(),
|
||||
metricsFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -84,11 +96,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
|||
LeaseRefresher leaseRefresher,
|
||||
ScheduledExecutorService leaderElectionThreadPool,
|
||||
int numPeriodicShardSyncWorkers,
|
||||
ReadWriteLock readWriteLock) {
|
||||
ReadWriteLock readWriteLock,
|
||||
MetricsFactory metricsFactory) {
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.leaderElectionThreadPool = leaderElectionThreadPool;
|
||||
this.numPeriodicShardSyncWorkers = numPeriodicShardSyncWorkers;
|
||||
this.readWriteLock = readWriteLock;
|
||||
this.metricsFactory = metricsFactory;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -146,8 +160,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
|
|||
ELECTION_SCHEDULING_INTERVAL_MILLIS,
|
||||
TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
return executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
|
||||
final boolean response = executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
|
||||
final MetricsScope metricsScope =
|
||||
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
|
||||
metricsScope.addData(
|
||||
METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
|
||||
MetricsUtil.endScope(metricsScope);
|
||||
return response;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -0,0 +1,403 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.Accessors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode;
|
||||
import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
|
||||
import software.amazon.kinesis.coordinator.migration.ClientVersion;
|
||||
import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
|
||||
import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsReporter;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
|
||||
import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||
import static software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager.DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD;
|
||||
|
||||
/**
|
||||
* This class is responsible for initializing the KCL components that supports
|
||||
* seamless upgrade from v2.x to v3.x.
|
||||
* During specific versions, it also dynamically switches the functionality
|
||||
* to be either vanilla 3.x or 2.x compatible.
|
||||
*
|
||||
* It is responsible for creating:
|
||||
* 1. LeaderDecider
|
||||
* 2. LAM
|
||||
* 3. WorkerMetricStatsReporter
|
||||
*
|
||||
* It manages initializing the following components at initialization time
|
||||
* 1. workerMetricsDAO and workerMetricsManager
|
||||
* 2. leaderDecider
|
||||
* 3. MigrationAdaptiveLeaseAssignmentModeProvider
|
||||
*
|
||||
* It updates the following components dynamically:
|
||||
* 1. starts/stops LAM
|
||||
* 2. starts/stops WorkerMetricStatsReporter
|
||||
* 3. updates LeaseAssignmentMode to either DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT or WORKER_UTILIZATION_AWARE_ASSIGNMENT
|
||||
* 4. creates GSI (deletion is done by KclMigrationTool)
|
||||
* 5. creates WorkerMetricStats table (deletion is done by KclMigrationTool)
|
||||
* 6. updates LeaderDecider to either DeterministicShuffleShardSyncLeaderDecider or DynamoDBLockBasedLeaderDecider
|
||||
*/
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
@ThreadSafe
|
||||
@Accessors(fluent = true)
|
||||
public final class DynamicMigrationComponentsInitializer {
|
||||
private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L;
|
||||
|
||||
@Getter
|
||||
private final MetricsFactory metricsFactory;
|
||||
|
||||
@Getter
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ScheduledExecutorService workerMetricsThreadPool;
|
||||
|
||||
@Getter
|
||||
private final WorkerMetricStatsDAO workerMetricsDAO;
|
||||
|
||||
private final WorkerMetricStatsManager workerMetricsManager;
|
||||
private final ScheduledExecutorService lamThreadPool;
|
||||
private final BiFunction<ScheduledExecutorService, LeaderDecider, LeaseAssignmentManager> lamCreator;
|
||||
private final Supplier<MigrationAdaptiveLeaderDecider> adaptiveLeaderDeciderCreator;
|
||||
private final Supplier<DeterministicShuffleShardSyncLeaderDecider> deterministicLeaderDeciderCreator;
|
||||
private final Supplier<DynamoDBLockBasedLeaderDecider> ddbLockBasedLeaderDeciderCreator;
|
||||
|
||||
@Getter
|
||||
private final String workerIdentifier;
|
||||
|
||||
private final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
|
||||
|
||||
@Getter
|
||||
private final long workerMetricsExpirySeconds;
|
||||
|
||||
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseModeChangeConsumer;
|
||||
|
||||
@Getter
|
||||
private LeaderDecider leaderDecider;
|
||||
|
||||
private LeaseAssignmentManager leaseAssignmentManager;
|
||||
private ScheduledFuture<?> workerMetricsReporterFuture;
|
||||
private LeaseAssignmentMode currentAssignmentMode;
|
||||
private boolean dualMode;
|
||||
private boolean initialized;
|
||||
|
||||
@Builder(access = AccessLevel.PACKAGE)
|
||||
DynamicMigrationComponentsInitializer(
|
||||
final MetricsFactory metricsFactory,
|
||||
final LeaseRefresher leaseRefresher,
|
||||
final CoordinatorStateDAO coordinatorStateDAO,
|
||||
final ScheduledExecutorService workerMetricsThreadPool,
|
||||
final WorkerMetricStatsDAO workerMetricsDAO,
|
||||
final WorkerMetricStatsManager workerMetricsManager,
|
||||
final ScheduledExecutorService lamThreadPool,
|
||||
final BiFunction<ScheduledExecutorService, LeaderDecider, LeaseAssignmentManager> lamCreator,
|
||||
final Supplier<MigrationAdaptiveLeaderDecider> adaptiveLeaderDeciderCreator,
|
||||
final Supplier<DeterministicShuffleShardSyncLeaderDecider> deterministicLeaderDeciderCreator,
|
||||
final Supplier<DynamoDBLockBasedLeaderDecider> ddbLockBasedLeaderDeciderCreator,
|
||||
final String workerIdentifier,
|
||||
final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
|
||||
final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) {
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.coordinatorStateDAO = coordinatorStateDAO;
|
||||
this.workerIdentifier = workerIdentifier;
|
||||
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
|
||||
this.workerMetricsExpirySeconds = Duration.ofMillis(DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
|
||||
* workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis())
|
||||
.getSeconds();
|
||||
this.workerMetricsManager = workerMetricsManager;
|
||||
this.workerMetricsDAO = workerMetricsDAO;
|
||||
this.workerMetricsThreadPool = workerMetricsThreadPool;
|
||||
this.lamThreadPool = lamThreadPool;
|
||||
this.lamCreator = lamCreator;
|
||||
this.adaptiveLeaderDeciderCreator = adaptiveLeaderDeciderCreator;
|
||||
this.deterministicLeaderDeciderCreator = deterministicLeaderDeciderCreator;
|
||||
this.ddbLockBasedLeaderDeciderCreator = ddbLockBasedLeaderDeciderCreator;
|
||||
this.leaseModeChangeConsumer = leaseAssignmentModeProvider;
|
||||
}
|
||||
|
||||
public void initialize(final ClientVersion migrationStateMachineStartingClientVersion) throws DependencyException {
|
||||
if (initialized) {
|
||||
log.info("Already initialized, nothing to do");
|
||||
return;
|
||||
}
|
||||
|
||||
// always collect metrics so that when we flip to start reporting we will have accurate historical data.
|
||||
log.info("Start collection of WorkerMetricStats");
|
||||
workerMetricsManager.startManager();
|
||||
if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X) {
|
||||
initializeComponentsFor3x();
|
||||
} else {
|
||||
initializeComponentsForMigration(migrationStateMachineStartingClientVersion);
|
||||
}
|
||||
log.info("Initialized dual mode {} current assignment mode {}", dualMode, currentAssignmentMode);
|
||||
|
||||
log.info("Creating LAM");
|
||||
leaseAssignmentManager = lamCreator.apply(lamThreadPool, leaderDecider);
|
||||
log.info("Initializing {}", leaseModeChangeConsumer.getClass().getSimpleName());
|
||||
leaseModeChangeConsumer.initialize(dualMode, currentAssignmentMode);
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
private void initializeComponentsFor3x() {
|
||||
log.info("Initializing for 3x functionality");
|
||||
dualMode = false;
|
||||
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||
log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
|
||||
leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
|
||||
log.info("Initializing {}", leaderDecider.getClass().getSimpleName());
|
||||
leaderDecider.initialize();
|
||||
}
|
||||
|
||||
private void initializeComponentsForMigration(final ClientVersion migrationStateMachineStartingClientVersion) {
|
||||
log.info("Initializing for migration to 3x");
|
||||
dualMode = true;
|
||||
final LeaderDecider initialLeaderDecider;
|
||||
if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) {
|
||||
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||
initialLeaderDecider = ddbLockBasedLeaderDeciderCreator.get();
|
||||
} else {
|
||||
currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
|
||||
initialLeaderDecider = deterministicLeaderDeciderCreator.get();
|
||||
}
|
||||
log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
|
||||
|
||||
final MigrationAdaptiveLeaderDecider adaptiveLeaderDecider = adaptiveLeaderDeciderCreator.get();
|
||||
log.info(
|
||||
"Initializing MigrationAdaptiveLeaderDecider with {}",
|
||||
initialLeaderDecider.getClass().getSimpleName());
|
||||
adaptiveLeaderDecider.updateLeaderDecider(initialLeaderDecider);
|
||||
this.leaderDecider = adaptiveLeaderDecider;
|
||||
}
|
||||
|
||||
void shutdown() {
|
||||
log.info("Shutting down components");
|
||||
if (initialized) {
|
||||
log.info("Stopping LAM, LeaderDecider, workerMetrics reporting and collection");
|
||||
leaseAssignmentManager.stop();
|
||||
// leader decider is shut down later when scheduler is doing a final shutdown
|
||||
// since scheduler still accesses the leader decider while shutting down
|
||||
stopWorkerMetricsReporter();
|
||||
workerMetricsManager.stopManager();
|
||||
}
|
||||
|
||||
// lam does not manage lifecycle of its threadpool to easily stop/start dynamically.
|
||||
// once migration code is obsolete (i.e. all 3x functionality is the baseline and no
|
||||
// migration is needed), it can be moved inside lam
|
||||
log.info("Shutting down lamThreadPool and workerMetrics reporter thread pool");
|
||||
lamThreadPool.shutdown();
|
||||
workerMetricsThreadPool.shutdown();
|
||||
try {
|
||||
if (!lamThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
|
||||
lamThreadPool.shutdownNow();
|
||||
}
|
||||
} catch (final InterruptedException e) {
|
||||
log.warn("Interrupted while waiting for shutdown of LeaseAssignmentManager ThreadPool", e);
|
||||
lamThreadPool.shutdownNow();
|
||||
}
|
||||
|
||||
try {
|
||||
if (!workerMetricsThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
|
||||
workerMetricsThreadPool.shutdownNow();
|
||||
}
|
||||
} catch (final InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
log.warn("Interrupted while waiting for shutdown of WorkerMetricStatsManager ThreadPool", e);
|
||||
workerMetricsThreadPool.shutdownNow();
|
||||
}
|
||||
}
|
||||
|
||||
private void startWorkerMetricsReporting() throws DependencyException {
|
||||
if (workerMetricsReporterFuture != null) {
|
||||
log.info("Worker metrics reporting is already running...");
|
||||
return;
|
||||
}
|
||||
log.info("Initializing WorkerMetricStats");
|
||||
this.workerMetricsDAO.initialize();
|
||||
log.info("Starting worker metrics reporter");
|
||||
// Start with a delay for workerStatsManager to capture some values and start reporting.
|
||||
workerMetricsReporterFuture = workerMetricsThreadPool.scheduleAtFixedRate(
|
||||
new WorkerMetricStatsReporter(metricsFactory, workerIdentifier, workerMetricsManager, workerMetricsDAO),
|
||||
workerUtilizationAwareAssignmentConfig.inMemoryWorkerMetricsCaptureFrequencyMillis() * 2L,
|
||||
workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis(),
|
||||
TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
private void stopWorkerMetricsReporter() {
|
||||
log.info("Stopping worker metrics reporter");
|
||||
if (workerMetricsReporterFuture != null) {
|
||||
workerMetricsReporterFuture.cancel(false);
|
||||
workerMetricsReporterFuture = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create LeaseOwnerToLeaseKey GSI for the lease table
|
||||
* @param blockingWait whether to wait for the GSI creation or not, if false, the gsi creation will be initiated
|
||||
* but this call will not block for its creation
|
||||
* @throws DependencyException If DDB fails unexpectedly when creating the GSI
|
||||
*/
|
||||
private void createGsi(final boolean blockingWait) throws DependencyException {
|
||||
log.info("Creating Lease table GSI if it does not exist");
|
||||
// KCLv3.0 always starts with GSI available
|
||||
leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists();
|
||||
|
||||
if (blockingWait) {
|
||||
log.info("Waiting for Lease table GSI creation");
|
||||
final long secondsBetweenPolls = 10L;
|
||||
final long timeoutSeconds = 600L;
|
||||
final boolean isIndexActive =
|
||||
leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(secondsBetweenPolls, timeoutSeconds);
|
||||
|
||||
if (!isIndexActive) {
|
||||
throw new DependencyException(
|
||||
new IllegalStateException("Creating LeaseOwnerToLeaseKeyIndex on Lease table timed out"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize KCL with components and configuration to support upgrade from 2x. This can happen
|
||||
* at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X.
|
||||
* Or Dynamically during roll-forward from ClientVersion.CLIENT_VERSION_2X.
|
||||
*/
|
||||
public synchronized void initializeClientVersionForUpgradeFrom2x(final ClientVersion fromClientVersion)
|
||||
throws DependencyException {
|
||||
log.info("Initializing KCL components for upgrade from 2x from {}", fromClientVersion);
|
||||
|
||||
createGsi(false);
|
||||
startWorkerMetricsReporting();
|
||||
// LAM is not started until the dynamic flip to 3xWithRollback
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize KCL with components and configuration to run vanilla 3x functionality. This can happen
|
||||
* at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X, or dynamically
|
||||
* during a new deployment when existing worker are in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK
|
||||
*/
|
||||
public synchronized void initializeClientVersionFor3x(final ClientVersion fromClientVersion)
|
||||
throws DependencyException {
|
||||
log.info("Initializing KCL components for 3x from {}", fromClientVersion);
|
||||
|
||||
log.info("Initializing LeaseAssignmentManager, DDB-lock-based leader decider, WorkerMetricStats manager"
|
||||
+ " and creating the Lease table GSI if it does not exist");
|
||||
if (fromClientVersion == ClientVersion.CLIENT_VERSION_INIT) {
|
||||
// gsi may already exist and be active for migrated application.
|
||||
createGsi(true);
|
||||
startWorkerMetricsReporting();
|
||||
log.info("Starting LAM");
|
||||
leaseAssignmentManager.start();
|
||||
}
|
||||
// nothing to do when transitioning from CLIENT_VERSION_3X_WITH_ROLLBACK.
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize KCL with components and configuration to run 2x compatible functionality
|
||||
* while allowing roll-forward. This can happen at KCL Worker startup when MigrationStateMachine
|
||||
* starts in ClientVersion.CLIENT_VERSION_2X (after a rollback)
|
||||
* Or Dynamically during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK.
|
||||
*/
|
||||
public synchronized void initializeClientVersionFor2x(final ClientVersion fromClientVersion) {
|
||||
log.info("Initializing KCL components for rollback to 2x from {}", fromClientVersion);
|
||||
|
||||
if (fromClientVersion != ClientVersion.CLIENT_VERSION_INIT) {
|
||||
// dynamic rollback
|
||||
stopWorkerMetricsReporter();
|
||||
// Migration Tool will delete the lease table LeaseOwner GSI
|
||||
// and WorkerMetricStats table
|
||||
}
|
||||
|
||||
if (fromClientVersion == ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK) {
|
||||
// we are rolling back after flip
|
||||
currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
|
||||
notifyLeaseAssignmentModeChange();
|
||||
log.info("Stopping LAM");
|
||||
leaseAssignmentManager.stop();
|
||||
final LeaderDecider leaderDecider = deterministicLeaderDeciderCreator.get();
|
||||
if (this.leaderDecider instanceof MigrationAdaptiveLeaderDecider) {
|
||||
log.info(
|
||||
"Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
|
||||
((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
|
||||
} else {
|
||||
throw new IllegalStateException(String.format("Unexpected leader decider %s", this.leaderDecider));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize KCL with components and configuration to run vanilla 3x functionality
|
||||
* while allowing roll-back to 2x functionality. This can happen at KCL Worker startup
|
||||
* when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK (after the flip)
|
||||
* Or Dynamically during flip from CLIENT_VERSION_UPGRADE_FROM_2X.
|
||||
*/
|
||||
public synchronized void initializeClientVersionFor3xWithRollback(final ClientVersion fromClientVersion)
|
||||
throws DependencyException {
|
||||
log.info("Initializing KCL components for 3x with rollback from {}", fromClientVersion);
|
||||
|
||||
if (fromClientVersion == ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X) {
|
||||
// dynamic flip
|
||||
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||
notifyLeaseAssignmentModeChange();
|
||||
final LeaderDecider leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
|
||||
log.info("Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
|
||||
((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
|
||||
} else {
|
||||
startWorkerMetricsReporting();
|
||||
}
|
||||
|
||||
log.info("Starting LAM");
|
||||
leaseAssignmentManager.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronously invoke the consumer to change the lease assignment mode.
|
||||
*/
|
||||
private void notifyLeaseAssignmentModeChange() {
|
||||
if (dualMode) {
|
||||
log.info("Notifying {} of {}", leaseModeChangeConsumer, currentAssignmentMode);
|
||||
if (Objects.nonNull(leaseModeChangeConsumer)) {
|
||||
try {
|
||||
leaseModeChangeConsumer.updateLeaseAssignmentMode(currentAssignmentMode);
|
||||
} catch (final Exception e) {
|
||||
log.warn("LeaseAssignmentMode change consumer threw exception", e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected assignment mode change");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
package software.amazon.kinesis.coordinator;
|
||||
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BatchGetItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BatchGetItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DeleteTableResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.PutItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.QueryRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.QueryResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.paginators.BatchGetItemIterable;
|
||||
import software.amazon.awssdk.services.dynamodb.paginators.QueryIterable;
|
||||
import software.amazon.awssdk.services.dynamodb.paginators.ScanIterable;
|
||||
|
||||
/**
|
||||
* DDB Lock client depends on DynamoDbClient and KCL only has DynamoDbAsyncClient configured.
|
||||
* This wrapper delegates APIs from sync client to async client internally so that it can
|
||||
* be used with the DDB Lock client.
|
||||
*/
|
||||
public class DynamoDbAsyncToSyncClientAdapter implements DynamoDbClient {
|
||||
private final DynamoDbAsyncClient asyncClient;
|
||||
|
||||
public DynamoDbAsyncToSyncClientAdapter(final DynamoDbAsyncClient asyncClient) {
|
||||
this.asyncClient = asyncClient;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String serviceName() {
|
||||
return asyncClient.serviceName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
asyncClient.close();
|
||||
}
|
||||
|
||||
private <T> T handleException(final Supplier<CompletableFuture<T>> task) {
|
||||
try {
|
||||
return task.get().join();
|
||||
} catch (final CompletionException e) {
|
||||
rethrow(e.getCause());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public CreateTableResponse createTable(final CreateTableRequest request) {
|
||||
return handleException(() -> asyncClient.createTable(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DescribeTableResponse describeTable(final DescribeTableRequest request) {
|
||||
return handleException(() -> asyncClient.describeTable(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DeleteTableResponse deleteTable(final DeleteTableRequest request) {
|
||||
return handleException(() -> asyncClient.deleteTable(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public DeleteItemResponse deleteItem(final DeleteItemRequest request) {
|
||||
return handleException(() -> asyncClient.deleteItem(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public GetItemResponse getItem(final GetItemRequest request) {
|
||||
return handleException(() -> asyncClient.getItem(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public PutItemResponse putItem(final PutItemRequest request) {
|
||||
return handleException(() -> asyncClient.putItem(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateItemResponse updateItem(final UpdateItemRequest request) {
|
||||
return handleException(() -> asyncClient.updateItem(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResponse query(final QueryRequest request) {
|
||||
return handleException(() -> asyncClient.query(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScanResponse scan(final ScanRequest request) {
|
||||
return handleException(() -> asyncClient.scan(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryIterable queryPaginator(final QueryRequest request) {
|
||||
return new QueryIterable(this, request);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScanIterable scanPaginator(final ScanRequest request) {
|
||||
return new ScanIterable(this, request);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BatchGetItemResponse batchGetItem(final BatchGetItemRequest request) {
|
||||
return handleException(() -> asyncClient.batchGetItem(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BatchWriteItemResponse batchWriteItem(final BatchWriteItemRequest request) {
|
||||
return handleException(() -> asyncClient.batchWriteItem(request));
|
||||
}
|
||||
|
||||
@Override
|
||||
public BatchGetItemIterable batchGetItemPaginator(final BatchGetItemRequest request) {
|
||||
return new BatchGetItemIterable(this, request);
|
||||
}
|
||||
|
||||
private static void rethrow(final Throwable e) {
|
||||
castAndThrow(e);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static <T extends Throwable> void castAndThrow(final Throwable e) throws T {
|
||||
throw (T) e;
|
||||
}
|
||||
}
|
||||
|
|
@ -21,6 +21,8 @@ package software.amazon.kinesis.coordinator;
|
|||
* worker is one of the leaders designated to execute shard-sync and then acts accordingly.
|
||||
*/
|
||||
public interface LeaderDecider {
|
||||
String METRIC_OPERATION_LEADER_DECIDER = "LeaderDecider";
|
||||
String METRIC_OPERATION_LEADER_DECIDER_IS_LEADER = METRIC_OPERATION_LEADER_DECIDER + ":IsLeader";
|
||||
|
||||
/**
|
||||
* Method invoked to check the given workerId corresponds to one of the workers
|
||||
|
|
@ -36,4 +38,32 @@ public interface LeaderDecider {
|
|||
* being used in the LeaderDecider implementation.
|
||||
*/
|
||||
void shutdown();
|
||||
|
||||
/**
|
||||
* Performs initialization tasks for decider if any.
|
||||
*/
|
||||
default void initialize() {
|
||||
// No-op by default
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns if any ACTIVE leader exists that is elected by the current implementation.
|
||||
* Note: Some implementation (like DeterministicShuffleShardSyncLeaderDecider) will always have a leader and will
|
||||
* return true always.
|
||||
*/
|
||||
default boolean isAnyLeaderElected() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the current worker is the leader, then releases the leadership else does nothing.
|
||||
* This might not be relevant for some implementations, for e.g. DeterministicShuffleShardSyncLeaderDecider does
|
||||
* not have mechanism to release leadership.
|
||||
*
|
||||
* Current worker if leader releases leadership, it's possible that the current worker assume leadership sometime
|
||||
* later again in future elections.
|
||||
*/
|
||||
default void releaseLeadershipIfHeld() {
|
||||
// No-op by default
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator;
|
||||
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
|
||||
/**
|
||||
* Provides the lease assignment mode KCL must operate in during migration
|
||||
* from 2.x to 3.x.
|
||||
* KCL v2.x lease assignment is based on distributed-worker-stealing algorithm
|
||||
* which balances lease count across workers.
|
||||
* KCL v3.x lease assignment is based on a centralized-lease-assignment algorithm
|
||||
* which balances resource utilization metrics(e.g. CPU utilization) across workers.
|
||||
*
|
||||
* For a new application starting in KCL v3.x, there is no migration needed,
|
||||
* so KCL will initialize with the lease assignment mode accordingly, and it will
|
||||
* not change dynamically.
|
||||
*
|
||||
* During upgrade from 2.x to 3.x, KCL library needs an ability to
|
||||
* start in v2.x assignment mode but dynamically change to v3.x assignment.
|
||||
* In this case, both 2.x and 3.x lease assignment will be running but one
|
||||
* of them will be a no-op based on the mode.
|
||||
*
|
||||
* The methods and internal state is guarded for concurrent access to allow
|
||||
* both lease assignment algorithms to access the state concurrently while
|
||||
* it could be dynamically updated.
|
||||
*/
|
||||
@KinesisClientInternalApi
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
@NoArgsConstructor
|
||||
public final class MigrationAdaptiveLeaseAssignmentModeProvider {
|
||||
|
||||
public enum LeaseAssignmentMode {
|
||||
/**
|
||||
* This is the 2.x assignment mode.
|
||||
* This mode assigns leases based on the number of leases.
|
||||
* This mode involves each worker independently determining how many leases to pick or how many leases to steal
|
||||
* from other workers.
|
||||
*/
|
||||
DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT,
|
||||
|
||||
/**
|
||||
* This is the 3.x assigment mode.
|
||||
* This mode uses each worker's resource utilization to perform lease assignment.
|
||||
* Assignment is done by a single worker (elected leader), which looks at WorkerMetricStats for each worker to
|
||||
* determine lease assignment.
|
||||
*
|
||||
* This mode primarily does
|
||||
* 1. Starts WorkerMetricStatsManager on the worker which starts publishing WorkerMetricStats
|
||||
* 2. Starts the LeaseDiscoverer
|
||||
* 3. Creates if not already available the LeaseOwnerToLeaseKey GSI on the lease table and validate that is
|
||||
* ACTIVE.
|
||||
*/
|
||||
WORKER_UTILIZATION_AWARE_ASSIGNMENT;
|
||||
}
|
||||
|
||||
private LeaseAssignmentMode currentMode;
|
||||
private boolean initialized = false;
|
||||
private boolean dynamicModeChangeSupportNeeded;
|
||||
|
||||
/**
|
||||
* Specify whether both lease assignment algorithms should be initialized to
|
||||
* support dynamically changing lease mode.
|
||||
* @return true if lease assignment mode can change dynamically
|
||||
* false otherwise.
|
||||
*/
|
||||
public synchronized boolean dynamicModeChangeSupportNeeded() {
|
||||
return dynamicModeChangeSupportNeeded;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide the current lease assignment mode in which KCL should perform lease assignment
|
||||
* @return the current lease assignment mode
|
||||
*/
|
||||
public synchronized LeaseAssignmentMode getLeaseAssignmentMode() {
|
||||
if (!initialized) {
|
||||
throw new IllegalStateException("AssignmentMode is not initialized");
|
||||
}
|
||||
return currentMode;
|
||||
}
|
||||
|
||||
synchronized void initialize(final boolean dynamicModeChangeSupportNeeded, final LeaseAssignmentMode mode) {
|
||||
if (!initialized) {
|
||||
log.info("Initializing dynamicModeChangeSupportNeeded {} mode {}", dynamicModeChangeSupportNeeded, mode);
|
||||
this.dynamicModeChangeSupportNeeded = dynamicModeChangeSupportNeeded;
|
||||
this.currentMode = mode;
|
||||
this.initialized = true;
|
||||
return;
|
||||
}
|
||||
log.info(
|
||||
"Already initialized dynamicModeChangeSupportNeeded {} mode {}. Ignoring new values {}, {}",
|
||||
this.dynamicModeChangeSupportNeeded,
|
||||
this.currentMode,
|
||||
dynamicModeChangeSupportNeeded,
|
||||
mode);
|
||||
}
|
||||
|
||||
synchronized void updateLeaseAssignmentMode(final LeaseAssignmentMode mode) {
|
||||
if (!initialized) {
|
||||
throw new IllegalStateException("Cannot change mode before initializing");
|
||||
}
|
||||
if (dynamicModeChangeSupportNeeded) {
|
||||
log.info("Changing Lease assignment mode from {} to {}", currentMode, mode);
|
||||
this.currentMode = mode;
|
||||
return;
|
||||
}
|
||||
throw new IllegalStateException(String.format(
|
||||
"Lease assignment mode already initialized to %s cannot" + " change to %s", this.currentMode, mode));
|
||||
}
|
||||
}
|
||||
|
|
@ -87,7 +87,7 @@ class PeriodicShardSyncManager {
|
|||
private final Map<StreamIdentifier, HashRangeHoleTracker> hashRangeHoleTrackerMap = new HashMap<>();
|
||||
|
||||
private final String workerId;
|
||||
private final LeaderDecider leaderDecider;
|
||||
private LeaderDecider leaderDecider;
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap;
|
||||
private final Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider;
|
||||
|
|
@ -105,7 +105,6 @@ class PeriodicShardSyncManager {
|
|||
|
||||
PeriodicShardSyncManager(
|
||||
String workerId,
|
||||
LeaderDecider leaderDecider,
|
||||
LeaseRefresher leaseRefresher,
|
||||
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
|
||||
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
|
||||
|
|
@ -117,7 +116,6 @@ class PeriodicShardSyncManager {
|
|||
AtomicBoolean leaderSynced) {
|
||||
this(
|
||||
workerId,
|
||||
leaderDecider,
|
||||
leaseRefresher,
|
||||
currentStreamConfigMap,
|
||||
shardSyncTaskManagerProvider,
|
||||
|
|
@ -132,7 +130,6 @@ class PeriodicShardSyncManager {
|
|||
|
||||
PeriodicShardSyncManager(
|
||||
String workerId,
|
||||
LeaderDecider leaderDecider,
|
||||
LeaseRefresher leaseRefresher,
|
||||
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
|
||||
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
|
||||
|
|
@ -144,9 +141,7 @@ class PeriodicShardSyncManager {
|
|||
int leasesRecoveryAuditorInconsistencyConfidenceThreshold,
|
||||
AtomicBoolean leaderSynced) {
|
||||
Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager.");
|
||||
Validate.notNull(leaderDecider, "LeaderDecider is required to initialize PeriodicShardSyncManager.");
|
||||
this.workerId = workerId;
|
||||
this.leaderDecider = leaderDecider;
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.currentStreamConfigMap = currentStreamConfigMap;
|
||||
this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider;
|
||||
|
|
@ -160,7 +155,9 @@ class PeriodicShardSyncManager {
|
|||
this.leaderSynced = leaderSynced;
|
||||
}
|
||||
|
||||
public synchronized TaskResult start() {
|
||||
public synchronized TaskResult start(final LeaderDecider leaderDecider) {
|
||||
Validate.notNull(leaderDecider, "LeaderDecider is required to start PeriodicShardSyncManager.");
|
||||
this.leaderDecider = leaderDecider;
|
||||
if (!isRunning) {
|
||||
final Runnable periodicShardSyncer = () -> {
|
||||
try {
|
||||
|
|
@ -435,7 +432,7 @@ class PeriodicShardSyncManager {
|
|||
leaseRefresher.updateLeaseWithMetaInfo(lease, UpdateField.HASH_KEY_RANGE);
|
||||
} catch (Exception e) {
|
||||
log.warn(
|
||||
"Unable to update hash range key information for lease {} of stream {}."
|
||||
"Unable to update hash range key information for lease {} of stream {}. "
|
||||
+ "This may result in explicit lease sync.",
|
||||
lease.leaseKey(),
|
||||
streamIdentifier);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
|
@ -44,6 +45,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Stopwatch;
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Getter;
|
||||
|
|
@ -55,15 +57,23 @@ import lombok.extern.slf4j.Slf4j;
|
|||
import software.amazon.awssdk.arns.Arn;
|
||||
import software.amazon.awssdk.regions.Region;
|
||||
import software.amazon.awssdk.utils.Validate;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.checkpoint.CheckpointConfig;
|
||||
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
|
||||
import software.amazon.kinesis.common.StreamConfig;
|
||||
import software.amazon.kinesis.common.StreamIdentifier;
|
||||
import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
|
||||
import software.amazon.kinesis.coordinator.migration.MigrationStateMachine;
|
||||
import software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl;
|
||||
import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
|
||||
import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
|
||||
import software.amazon.kinesis.leases.HierarchicalShardSyncer;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseCleanupManager;
|
||||
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
|
||||
import software.amazon.kinesis.leases.LeaseManagementFactory;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.LeaseSerializer;
|
||||
import software.amazon.kinesis.leases.MultiStreamLease;
|
||||
|
|
@ -98,6 +108,9 @@ import software.amazon.kinesis.retrieval.AggregatorUtil;
|
|||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||
import software.amazon.kinesis.retrieval.RetrievalConfig;
|
||||
import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder;
|
||||
import software.amazon.kinesis.worker.WorkerMetricsSelector;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
|
||||
|
||||
import static software.amazon.kinesis.common.ArnUtil.constructStreamArn;
|
||||
import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrategy.StreamsLeasesDeletionType;
|
||||
|
|
@ -106,12 +119,14 @@ import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrat
|
|||
/**
|
||||
*
|
||||
*/
|
||||
@Getter
|
||||
@Getter(AccessLevel.PRIVATE)
|
||||
@Accessors(fluent = true)
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
public class Scheduler implements Runnable {
|
||||
|
||||
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
|
||||
|
||||
private static final long LEASE_TABLE_CHECK_FREQUENCY_MILLIS = 3 * 1000L;
|
||||
private static final long MIN_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 1000L;
|
||||
private static final long MAX_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 30 * 1000L;
|
||||
|
|
@ -133,7 +148,9 @@ public class Scheduler implements Runnable {
|
|||
private final ProcessorConfig processorConfig;
|
||||
private final RetrievalConfig retrievalConfig;
|
||||
|
||||
@Getter(AccessLevel.PACKAGE)
|
||||
private final String applicationName;
|
||||
|
||||
private final int maxInitializationAttempts;
|
||||
private final Checkpointer checkpoint;
|
||||
private final long shardConsumerDispatchPollIntervalMillis;
|
||||
|
|
@ -156,7 +173,10 @@ public class Scheduler implements Runnable {
|
|||
private final long failoverTimeMillis;
|
||||
private final long taskBackoffTimeMillis;
|
||||
private final boolean isMultiStreamMode;
|
||||
|
||||
@Getter(AccessLevel.PACKAGE)
|
||||
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap = new StreamConfigMap();
|
||||
|
||||
private final StreamTracker streamTracker;
|
||||
private final FormerStreamsLeasesDeletionStrategy formerStreamsLeasesDeletionStrategy;
|
||||
private final long listShardsBackoffTimeMillis;
|
||||
|
|
@ -167,19 +187,30 @@ public class Scheduler implements Runnable {
|
|||
private final AggregatorUtil aggregatorUtil;
|
||||
private final Function<StreamConfig, HierarchicalShardSyncer> hierarchicalShardSyncerProvider;
|
||||
private final long schedulerInitializationBackoffTimeMillis;
|
||||
private final LeaderDecider leaderDecider;
|
||||
private LeaderDecider leaderDecider;
|
||||
|
||||
@Getter(AccessLevel.PACKAGE)
|
||||
private final Map<StreamIdentifier, Instant> staleStreamDeletionMap = new HashMap<>();
|
||||
|
||||
private final LeaseCleanupManager leaseCleanupManager;
|
||||
private final SchemaRegistryDecoder schemaRegistryDecoder;
|
||||
|
||||
@Getter(AccessLevel.PACKAGE)
|
||||
private final DeletedStreamListProvider deletedStreamListProvider;
|
||||
|
||||
private final MigrationStateMachine migrationStateMachine;
|
||||
private final DynamicMigrationComponentsInitializer migrationComponentsInitializer;
|
||||
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
|
||||
|
||||
// Holds consumers for shards the worker is currently tracking. Key is shard
|
||||
// info, value is ShardConsumer.
|
||||
@Getter(AccessLevel.PACKAGE)
|
||||
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap = new ConcurrentHashMap<>();
|
||||
|
||||
private volatile boolean shutdown;
|
||||
private volatile long shutdownStartTimeMillis;
|
||||
|
||||
@Getter(AccessLevel.PACKAGE)
|
||||
private volatile boolean shutdownComplete = false;
|
||||
|
||||
private final Object lock = new Object();
|
||||
|
|
@ -187,8 +218,6 @@ public class Scheduler implements Runnable {
|
|||
private final Stopwatch streamSyncWatch = Stopwatch.createUnstarted();
|
||||
|
||||
private boolean leasesSyncedOnAppInit = false;
|
||||
|
||||
@Getter(AccessLevel.NONE)
|
||||
private final AtomicBoolean leaderSynced = new AtomicBoolean(false);
|
||||
|
||||
/**
|
||||
|
|
@ -200,7 +229,6 @@ public class Scheduler implements Runnable {
|
|||
* CountDownLatch used by the GracefulShutdownCoordinator. Reaching zero means that
|
||||
* the scheduler's finalShutdown() call has completed.
|
||||
*/
|
||||
@Getter(AccessLevel.NONE)
|
||||
private final CountDownLatch finalShutdownLatch = new CountDownLatch(1);
|
||||
|
||||
@VisibleForTesting
|
||||
|
|
@ -259,11 +287,32 @@ public class Scheduler implements Runnable {
|
|||
// Determine leaseSerializer based on availability of MultiStreamTracker.
|
||||
final LeaseSerializer leaseSerializer =
|
||||
isMultiStreamMode ? new DynamoDBMultiStreamLeaseSerializer() : new DynamoDBLeaseSerializer();
|
||||
this.leaseCoordinator = this.leaseManagementConfig
|
||||
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
|
||||
.createLeaseCoordinator(this.metricsFactory);
|
||||
|
||||
final LeaseManagementFactory leaseManagementFactory =
|
||||
this.leaseManagementConfig.leaseManagementFactory(leaseSerializer, isMultiStreamMode);
|
||||
this.leaseCoordinator =
|
||||
leaseManagementFactory.createLeaseCoordinator(this.metricsFactory, shardInfoShardConsumerMap);
|
||||
this.leaseRefresher = this.leaseCoordinator.leaseRefresher();
|
||||
|
||||
final CoordinatorStateDAO coordinatorStateDAO = new CoordinatorStateDAO(
|
||||
leaseManagementConfig.dynamoDBClient(), coordinatorConfig().coordinatorStateTableConfig());
|
||||
this.leaseAssignmentModeProvider = new MigrationAdaptiveLeaseAssignmentModeProvider();
|
||||
this.migrationComponentsInitializer = createDynamicMigrationComponentsInitializer(coordinatorStateDAO);
|
||||
this.migrationStateMachine = new MigrationStateMachineImpl(
|
||||
metricsFactory,
|
||||
System::currentTimeMillis,
|
||||
coordinatorStateDAO,
|
||||
Executors.newScheduledThreadPool(
|
||||
2,
|
||||
new ThreadFactoryBuilder()
|
||||
.setNameFormat("MigrationStateMachine-%04d")
|
||||
.build()),
|
||||
coordinatorConfig.clientVersionConfig(),
|
||||
new Random(),
|
||||
this.migrationComponentsInitializer,
|
||||
leaseManagementConfig.workerIdentifier(),
|
||||
Duration.ofMinutes(10).getSeconds());
|
||||
|
||||
//
|
||||
// TODO: Figure out what to do with lease manage <=> checkpoint relationship
|
||||
//
|
||||
|
|
@ -280,9 +329,8 @@ public class Scheduler implements Runnable {
|
|||
this.diagnosticEventFactory = diagnosticEventFactory;
|
||||
this.diagnosticEventHandler = new DiagnosticEventLogger();
|
||||
this.deletedStreamListProvider = new DeletedStreamListProvider();
|
||||
this.shardSyncTaskManagerProvider = streamConfig -> this.leaseManagementConfig
|
||||
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
|
||||
.createShardSyncTaskManager(this.metricsFactory, streamConfig, this.deletedStreamListProvider);
|
||||
this.shardSyncTaskManagerProvider = streamConfig -> leaseManagementFactory.createShardSyncTaskManager(
|
||||
this.metricsFactory, streamConfig, this.deletedStreamListProvider);
|
||||
this.shardPrioritization = this.coordinatorConfig.shardPrioritization();
|
||||
this.cleanupLeasesUponShardCompletion = this.leaseManagementConfig.cleanupLeasesUponShardCompletion();
|
||||
this.skipShardSyncAtWorkerInitializationIfLeasesExist =
|
||||
|
|
@ -299,8 +347,6 @@ public class Scheduler implements Runnable {
|
|||
this.workerStateChangeListener =
|
||||
this.coordinatorConfig.coordinatorFactory().createWorkerStateChangeListener();
|
||||
}
|
||||
this.leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(
|
||||
leaseRefresher, Executors.newSingleThreadScheduledExecutor(), PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT);
|
||||
this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis();
|
||||
this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis();
|
||||
this.listShardsBackoffTimeMillis = this.retrievalConfig.listShardsBackoffTimeInMillis();
|
||||
|
|
@ -315,7 +361,6 @@ public class Scheduler implements Runnable {
|
|||
this.coordinatorConfig.schedulerInitializationBackoffTimeMillis();
|
||||
this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager(
|
||||
leaseManagementConfig.workerIdentifier(),
|
||||
leaderDecider,
|
||||
leaseRefresher,
|
||||
currentStreamConfigMap,
|
||||
shardSyncTaskManagerProvider,
|
||||
|
|
@ -325,14 +370,69 @@ public class Scheduler implements Runnable {
|
|||
leaseManagementConfig.leasesRecoveryAuditorExecutionFrequencyMillis(),
|
||||
leaseManagementConfig.leasesRecoveryAuditorInconsistencyConfidenceThreshold(),
|
||||
leaderSynced);
|
||||
this.leaseCleanupManager = this.leaseManagementConfig
|
||||
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
|
||||
.createLeaseCleanupManager(metricsFactory);
|
||||
this.leaseCleanupManager = leaseManagementFactory.createLeaseCleanupManager(metricsFactory);
|
||||
this.schemaRegistryDecoder = this.retrievalConfig.glueSchemaRegistryDeserializer() == null
|
||||
? null
|
||||
: new SchemaRegistryDecoder(this.retrievalConfig.glueSchemaRegistryDeserializer());
|
||||
}
|
||||
|
||||
/**
|
||||
* Depends on LeaseCoordinator and LeaseRefresher to be created first
|
||||
*/
|
||||
private DynamicMigrationComponentsInitializer createDynamicMigrationComponentsInitializer(
|
||||
final CoordinatorStateDAO coordinatorStateDAO) {
|
||||
selectWorkerMetricsIfAvailable(leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
|
||||
|
||||
final WorkerMetricStatsManager workerMetricsManager = new WorkerMetricStatsManager(
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().noOfPersistedMetricsPerWorkerMetrics(),
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricList(),
|
||||
metricsFactory,
|
||||
leaseManagementConfig
|
||||
.workerUtilizationAwareAssignmentConfig()
|
||||
.inMemoryWorkerMetricsCaptureFrequencyMillis());
|
||||
|
||||
final WorkerMetricStatsDAO workerMetricsDAO = new WorkerMetricStatsDAO(
|
||||
leaseManagementConfig.dynamoDBClient(),
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig(),
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsReporterFreqInMillis());
|
||||
|
||||
return DynamicMigrationComponentsInitializer.builder()
|
||||
.metricsFactory(metricsFactory)
|
||||
.leaseRefresher(leaseRefresher)
|
||||
.coordinatorStateDAO(coordinatorStateDAO)
|
||||
.workerMetricsThreadPool(Executors.newScheduledThreadPool(
|
||||
1,
|
||||
new ThreadFactoryBuilder()
|
||||
.setNameFormat("worker-metrics-reporter")
|
||||
.build()))
|
||||
.workerMetricsDAO(workerMetricsDAO)
|
||||
.workerMetricsManager(workerMetricsManager)
|
||||
.lamThreadPool(Executors.newScheduledThreadPool(
|
||||
1,
|
||||
new ThreadFactoryBuilder().setNameFormat("lam-thread").build()))
|
||||
.lamCreator((lamThreadPool, leaderDecider) -> new LeaseAssignmentManager(
|
||||
leaseRefresher,
|
||||
workerMetricsDAO,
|
||||
leaderDecider,
|
||||
leaseManagementConfig.workerUtilizationAwareAssignmentConfig(),
|
||||
leaseCoordinator.workerIdentifier(),
|
||||
leaseManagementConfig.failoverTimeMillis(),
|
||||
metricsFactory,
|
||||
lamThreadPool,
|
||||
System::nanoTime,
|
||||
leaseManagementConfig.maxLeasesForWorker(),
|
||||
leaseManagementConfig.gracefulLeaseHandoffConfig()))
|
||||
.adaptiveLeaderDeciderCreator(() -> new MigrationAdaptiveLeaderDecider(metricsFactory))
|
||||
.deterministicLeaderDeciderCreator(() -> new DeterministicShuffleShardSyncLeaderDecider(
|
||||
leaseRefresher, Executors.newSingleThreadScheduledExecutor(), 1, metricsFactory))
|
||||
.ddbLockBasedLeaderDeciderCreator(() -> DynamoDBLockBasedLeaderDecider.create(
|
||||
coordinatorStateDAO, leaseCoordinator.workerIdentifier(), metricsFactory))
|
||||
.workerIdentifier(leaseCoordinator.workerIdentifier())
|
||||
.workerUtilizationAwareAssignmentConfig(leaseManagementConfig.workerUtilizationAwareAssignmentConfig())
|
||||
.leaseAssignmentModeProvider(leaseAssignmentModeProvider)
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start consuming data from the stream, and pass it to the application record processors.
|
||||
*/
|
||||
|
|
@ -342,13 +442,19 @@ public class Scheduler implements Runnable {
|
|||
return;
|
||||
}
|
||||
|
||||
final MetricsScope metricsScope =
|
||||
MetricsUtil.createMetricsWithOperation(metricsFactory, "Scheduler:Initialize");
|
||||
boolean success = false;
|
||||
try {
|
||||
initialize();
|
||||
success = true;
|
||||
log.info("Initialization complete. Starting worker loop.");
|
||||
} catch (RuntimeException e) {
|
||||
log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e);
|
||||
workerStateChangeListener.onAllInitializationAttemptsFailed(e);
|
||||
shutdown();
|
||||
} finally {
|
||||
MetricsUtil.addSuccess(metricsScope, "Initialize", success, MetricsLevel.SUMMARY);
|
||||
}
|
||||
while (!shouldShutdown()) {
|
||||
runProcessLoop();
|
||||
|
|
@ -363,14 +469,13 @@ public class Scheduler implements Runnable {
|
|||
synchronized (lock) {
|
||||
registerErrorHandlerForUndeliverableAsyncTaskExceptions();
|
||||
workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.INITIALIZING);
|
||||
|
||||
boolean isDone = false;
|
||||
Exception lastException = null;
|
||||
|
||||
for (int i = 0; (!isDone) && (i < maxInitializationAttempts); i++) {
|
||||
try {
|
||||
log.info("Initializing LeaseCoordinator attempt {}", (i + 1));
|
||||
leaseCoordinator.initialize();
|
||||
|
||||
if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) {
|
||||
if (shouldInitiateLeaseSync()) {
|
||||
log.info(
|
||||
|
|
@ -382,21 +487,29 @@ public class Scheduler implements Runnable {
|
|||
log.info("Skipping shard sync per configuration setting (and lease table is not empty)");
|
||||
}
|
||||
|
||||
// Initialize the state machine after lease table has been initialized
|
||||
// Migration state machine creates and waits for GSI if necessary,
|
||||
// it must be initialized before starting leaseCoordinator, which runs LeaseDiscoverer
|
||||
// and that requires GSI to be present and active. (migrationStateMachine.initialize is idempotent)
|
||||
migrationStateMachine.initialize();
|
||||
leaderDecider = migrationComponentsInitializer.leaderDecider();
|
||||
|
||||
leaseCleanupManager.start();
|
||||
|
||||
// If we reach this point, then we either skipped the lease sync or did not have any exception
|
||||
// for any of the shard sync in the previous attempt.
|
||||
|
||||
if (!leaseCoordinator.isRunning()) {
|
||||
log.info("Starting LeaseCoordinator");
|
||||
leaseCoordinator.start();
|
||||
leaseCoordinator.start(leaseAssignmentModeProvider);
|
||||
} else {
|
||||
log.info("LeaseCoordinator is already running. No need to start it.");
|
||||
}
|
||||
log.info("Scheduling periodicShardSync");
|
||||
leaderElectedPeriodicShardSyncManager.start();
|
||||
leaderElectedPeriodicShardSyncManager.start(leaderDecider);
|
||||
streamSyncWatch.start();
|
||||
isDone = true;
|
||||
} catch (Exception e) {
|
||||
} catch (final Exception e) {
|
||||
log.error("Caught exception when initializing LeaseCoordinator", e);
|
||||
lastException = e;
|
||||
}
|
||||
|
|
@ -863,7 +976,7 @@ public class Scheduler implements Runnable {
|
|||
leaseCoordinator, lease, notificationCompleteLatch, shutdownCompleteLatch);
|
||||
ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
|
||||
ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
|
||||
if (consumer != null) {
|
||||
if (consumer != null && !consumer.isShutdown()) {
|
||||
consumer.gracefulShutdown(shutdownNotification);
|
||||
} else {
|
||||
//
|
||||
|
|
@ -912,6 +1025,8 @@ public class Scheduler implements Runnable {
|
|||
shutdown = true;
|
||||
shutdownStartTimeMillis = System.currentTimeMillis();
|
||||
|
||||
migrationStateMachine.shutdown();
|
||||
migrationComponentsInitializer.shutdown();
|
||||
// Stop lease coordinator, so leases are not renewed or stolen from other workers.
|
||||
// Lost leases will force Worker to begin shutdown process for all shard consumers in
|
||||
// Worker.run().
|
||||
|
|
@ -1228,4 +1343,23 @@ public class Scheduler implements Runnable {
|
|||
public Future<Void> requestShutdown() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* If WorkerMetricStats list is empty and the disable flag is false, select WorkerMetricStats automatically.
|
||||
*/
|
||||
private void selectWorkerMetricsIfAvailable(
|
||||
final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig) {
|
||||
try {
|
||||
if (workerUtilizationAwareAssignmentConfig.workerMetricList().isEmpty()
|
||||
&& !workerUtilizationAwareAssignmentConfig.disableWorkerMetrics()) {
|
||||
workerUtilizationAwareAssignmentConfig.workerMetricList(
|
||||
WorkerMetricsSelector.create().getDefaultWorkerMetrics());
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
log.warn(
|
||||
"Exception encountered during WorkerMetricStats selection. If this is persistent please try setting the "
|
||||
+ "WorkerMetricStats explicitly.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
package software.amazon.kinesis.coordinator.assignment;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
|
||||
public interface LeaseAssignmentDecider {
|
||||
|
||||
/**
|
||||
* Assigns expiredOrUnAssignedLeases to the available workers.
|
||||
*/
|
||||
void assignExpiredOrUnassignedLeases(final List<Lease> expiredOrUnAssignedLeases);
|
||||
|
||||
/**
|
||||
* Balances the leases between workers in the fleet.
|
||||
* Implementation can choose to balance leases based on lease count or throughput or to bring the variance in
|
||||
* resource utilization to a minimum.
|
||||
* Check documentation on implementation class to see how it balances the leases.
|
||||
*/
|
||||
void balanceWorkerVariance();
|
||||
}
|
||||
|
|
@ -0,0 +1,719 @@
|
|||
package software.amazon.kinesis.coordinator.assignment;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
import software.amazon.kinesis.metrics.NullMetricsScope;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||
|
||||
import static java.util.Objects.isNull;
|
||||
import static java.util.Objects.nonNull;
|
||||
|
||||
/**
|
||||
* Performs the LeaseAssignment for the application. This starts by loading the leases and workerMetrics from the
|
||||
* storage and then starts by assignment (in-memory) of expired and/or unassigned leases after which it tries to perform
|
||||
* balancing of load among the workers by re-assign leases.
|
||||
* In the end, performs actual assignment by writing to storage.
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@KinesisClientInternalApi
|
||||
public final class LeaseAssignmentManager {
|
||||
|
||||
/**
|
||||
* Default number of continuous failure execution after which leadership is released.
|
||||
*/
|
||||
private static final int DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER = 3;
|
||||
|
||||
/**
|
||||
* Default multiplier for LAM frequency with respect to leaseDurationMillis (lease failover millis).
|
||||
* If leaseDurationMillis is 10000 millis, default LAM frequency is 20000 millis.
|
||||
*/
|
||||
private static final int DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER = 2;
|
||||
|
||||
/**
|
||||
* Default parallelism factor for scaling lease table.
|
||||
*/
|
||||
private static final int DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR = 10;
|
||||
|
||||
private static final String FORCE_LEADER_RELEASE_METRIC_NAME = "ForceLeaderRelease";
|
||||
|
||||
/**
|
||||
* Default retry attempt for loading leases and workers before giving up.
|
||||
*/
|
||||
private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
|
||||
|
||||
/**
|
||||
* Internal threadpool used to parallely perform assignment operation by calling storage.
|
||||
*/
|
||||
private static final ExecutorService LEASE_ASSIGNMENT_CALL_THREAD_POOL =
|
||||
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
||||
|
||||
private static final String METRICS_LEASE_ASSIGNMENT_MANAGER = "LeaseAssignmentManager";
|
||||
private static final String METRICS_INCOMPLETE_EXPIRED_LEASES_ASSIGNMENT =
|
||||
"LeaseAssignmentManager.IncompleteExpiredLeasesAssignment";
|
||||
public static final int DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD = 2;
|
||||
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
private final WorkerMetricStatsDAO workerMetricsDAO;
|
||||
private final LeaderDecider leaderDecider;
|
||||
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config;
|
||||
private final String currentWorkerId;
|
||||
private final Long leaseDurationMillis;
|
||||
private final MetricsFactory metricsFactory;
|
||||
private final ScheduledExecutorService executorService;
|
||||
private final Supplier<Long> nanoTimeProvider;
|
||||
private final int maxLeasesForWorker;
|
||||
private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig;
|
||||
private boolean tookOverLeadershipInThisRun = false;
|
||||
private final Map<String, Lease> prevRunLeasesState = new HashMap<>();
|
||||
|
||||
private Future<?> managerFuture;
|
||||
|
||||
private int noOfContinuousFailedAttempts = 0;
|
||||
private int lamRunCounter = 0;
|
||||
|
||||
public synchronized void start() {
|
||||
if (isNull(managerFuture)) {
|
||||
// LAM can be dynamically started/stopped and restarted during MigrationStateMachine execution
|
||||
// so reset the flag to refresh the state before processing during a restart of LAM.
|
||||
tookOverLeadershipInThisRun = false;
|
||||
managerFuture = executorService.scheduleWithFixedDelay(
|
||||
this::performAssignment,
|
||||
0L,
|
||||
leaseDurationMillis * DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER,
|
||||
TimeUnit.MILLISECONDS);
|
||||
log.info("Started LeaseAssignmentManager");
|
||||
return;
|
||||
}
|
||||
log.info("LeaseAssignmentManager already running...");
|
||||
}
|
||||
|
||||
public synchronized void stop() {
|
||||
if (nonNull(managerFuture)) {
|
||||
log.info("Completed shutdown of LeaseAssignmentManager");
|
||||
managerFuture.cancel(true);
|
||||
managerFuture = null;
|
||||
return;
|
||||
}
|
||||
log.info("LeaseAssignmentManager is not running...");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the MetricsScope for given {@param operation} by calling metricsFactory and falls back to
|
||||
* NullMetricsScope if failed to create MetricsScope.
|
||||
* @param operation Operation name for MetricsScope
|
||||
* @return instance of MetricsScope
|
||||
*/
|
||||
private MetricsScope createMetricsScope(final String operation) {
|
||||
try {
|
||||
return MetricsUtil.createMetricsWithOperation(metricsFactory, operation);
|
||||
} catch (final Exception e) {
|
||||
log.error("Failed to create metrics scope defaulting to no metrics.", e);
|
||||
return new NullMetricsScope();
|
||||
}
|
||||
}
|
||||
|
||||
private void performAssignment() {
|
||||
|
||||
final MetricsScope metricsScope = createMetricsScope(METRICS_LEASE_ASSIGNMENT_MANAGER);
|
||||
final long startTime = System.currentTimeMillis();
|
||||
boolean success = false;
|
||||
|
||||
try {
|
||||
|
||||
// If the current worker is not leader, then do nothing as assignment is executed on leader.
|
||||
if (!leaderDecider.isLeader(currentWorkerId)) {
|
||||
log.info("Current worker {} is not a leader, ignore", currentWorkerId);
|
||||
this.tookOverLeadershipInThisRun = false;
|
||||
success = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.tookOverLeadershipInThisRun) {
|
||||
// This means that there was leader change, perform cleanup of state as this is leader switch.
|
||||
this.tookOverLeadershipInThisRun = true;
|
||||
this.lamRunCounter = 0;
|
||||
prepareAfterLeaderSwitch();
|
||||
}
|
||||
log.info("Current worker {} is a leader, performing assignment", currentWorkerId);
|
||||
|
||||
final InMemoryStorageView inMemoryStorageView = new InMemoryStorageView();
|
||||
|
||||
final long loadStartTime = System.currentTimeMillis();
|
||||
inMemoryStorageView.loadInMemoryStorageView(metricsScope);
|
||||
MetricsUtil.addLatency(metricsScope, "LeaseAndWorkerMetricsLoad", loadStartTime, MetricsLevel.DETAILED);
|
||||
|
||||
publishLeaseAndWorkerCountMetrics(metricsScope, inMemoryStorageView);
|
||||
final LeaseAssignmentDecider leaseAssignmentDecider = new VarianceBasedLeaseAssignmentDecider(
|
||||
inMemoryStorageView,
|
||||
config.dampeningPercentage(),
|
||||
config.reBalanceThresholdPercentage(),
|
||||
config.allowThroughputOvershoot());
|
||||
|
||||
updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
|
||||
inMemoryStorageView.getLeaseList(), inMemoryStorageView.getLeaseTableScanTime());
|
||||
|
||||
// This does not include the leases from the worker that has expired (based on WorkerMetricStats's
|
||||
// lastUpdateTime)
|
||||
// but the lease is not expired (based on the leaseCounter on lease).
|
||||
// If a worker has died, the lease will be expired and assigned in next iteration.
|
||||
final List<Lease> expiredOrUnAssignedLeases = inMemoryStorageView.getLeaseList().stream()
|
||||
.filter(lease -> lease.isExpired(
|
||||
TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis),
|
||||
inMemoryStorageView.getLeaseTableScanTime()))
|
||||
// marking them for direct reassignment.
|
||||
.map(l -> l.isExpiredOrUnassigned(true))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
log.info("Total expiredOrUnassignedLeases count : {}", expiredOrUnAssignedLeases.size());
|
||||
metricsScope.addData(
|
||||
"ExpiredLeases", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
|
||||
final long expiredAndUnassignedLeaseAssignmentStartTime = System.currentTimeMillis();
|
||||
leaseAssignmentDecider.assignExpiredOrUnassignedLeases(expiredOrUnAssignedLeases);
|
||||
MetricsUtil.addLatency(
|
||||
metricsScope,
|
||||
"AssignExpiredOrUnassignedLeases",
|
||||
expiredAndUnassignedLeaseAssignmentStartTime,
|
||||
MetricsLevel.DETAILED);
|
||||
|
||||
if (!expiredOrUnAssignedLeases.isEmpty()) {
|
||||
// When expiredOrUnAssignedLeases is not empty, that means
|
||||
// that we were not able to assign all expired or unassigned leases and hit the maxThroughput
|
||||
// per worker for all workers.
|
||||
log.warn("Not able to assign all expiredOrUnAssignedLeases");
|
||||
metricsScope.addData(
|
||||
"LeaseSpillover", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
}
|
||||
|
||||
if (shouldRunVarianceBalancing()) {
|
||||
final long balanceWorkerVarianceStartTime = System.currentTimeMillis();
|
||||
final int totalNewAssignmentBeforeWorkerVarianceBalancing =
|
||||
inMemoryStorageView.leaseToNewAssignedWorkerMap.size();
|
||||
leaseAssignmentDecider.balanceWorkerVariance();
|
||||
MetricsUtil.addLatency(
|
||||
metricsScope, "BalanceWorkerVariance", balanceWorkerVarianceStartTime, MetricsLevel.DETAILED);
|
||||
metricsScope.addData(
|
||||
"NumOfLeasesReassignment",
|
||||
inMemoryStorageView.leaseToNewAssignedWorkerMap.size()
|
||||
- totalNewAssignmentBeforeWorkerVarianceBalancing,
|
||||
StandardUnit.COUNT,
|
||||
MetricsLevel.SUMMARY);
|
||||
}
|
||||
|
||||
if (inMemoryStorageView.leaseToNewAssignedWorkerMap.isEmpty()) {
|
||||
log.info("No new lease assignment performed in this iteration");
|
||||
}
|
||||
|
||||
parallelyAssignLeases(inMemoryStorageView, metricsScope);
|
||||
printPerWorkerLeases(inMemoryStorageView);
|
||||
deleteStaleWorkerMetricsEntries(inMemoryStorageView, metricsScope);
|
||||
success = true;
|
||||
noOfContinuousFailedAttempts = 0;
|
||||
} catch (final Exception e) {
|
||||
log.error("LeaseAssignmentManager failed to perform lease assignment.", e);
|
||||
noOfContinuousFailedAttempts++;
|
||||
if (noOfContinuousFailedAttempts >= DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER) {
|
||||
log.error(
|
||||
"Failed to perform assignment {} times in a row, releasing leadership from worker : {}",
|
||||
DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER,
|
||||
currentWorkerId);
|
||||
MetricsUtil.addCount(metricsScope, FORCE_LEADER_RELEASE_METRIC_NAME, 1, MetricsLevel.SUMMARY);
|
||||
leaderDecider.releaseLeadershipIfHeld();
|
||||
}
|
||||
} finally {
|
||||
MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
|
||||
MetricsUtil.endScope(metricsScope);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean shouldRunVarianceBalancing() {
|
||||
final boolean response = this.lamRunCounter == 0;
|
||||
/*
|
||||
To avoid lamRunCounter grow large, keep it within [0,varianceBalancingFrequency).
|
||||
If varianceBalancingFrequency is 5 lamRunCounter value will be within 0 to 4 and method return true when
|
||||
lamRunCounter is 0.
|
||||
*/
|
||||
this.lamRunCounter = (this.lamRunCounter + 1) % config.varianceBalancingFrequency();
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the WorkerMetricStats entries which are stale(not updated since long time, ref
|
||||
* {@link LeaseAssignmentManager#isWorkerMetricsEntryStale} for the condition to evaluate staleness)
|
||||
*/
|
||||
private void deleteStaleWorkerMetricsEntries(
|
||||
final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
|
||||
final long startTime = System.currentTimeMillis();
|
||||
try {
|
||||
final List<WorkerMetricStats> staleWorkerMetricsList = inMemoryStorageView.getWorkerMetricsList().stream()
|
||||
.filter(this::isWorkerMetricsEntryStale)
|
||||
.collect(Collectors.toList());
|
||||
MetricsUtil.addCount(
|
||||
metricsScope, "TotalStaleWorkerMetricsEntry", staleWorkerMetricsList.size(), MetricsLevel.DETAILED);
|
||||
log.info("Number of stale workerMetrics entries : {}", staleWorkerMetricsList.size());
|
||||
log.info("Stale workerMetrics list : {}", staleWorkerMetricsList);
|
||||
|
||||
final List<CompletableFuture<Boolean>> completableFutures = staleWorkerMetricsList.stream()
|
||||
.map(workerMetrics -> CompletableFuture.supplyAsync(
|
||||
() -> workerMetricsDAO.deleteMetrics(workerMetrics), LEASE_ASSIGNMENT_CALL_THREAD_POOL))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
CompletableFuture.allOf(completableFutures.toArray(new CompletableFuture[0]))
|
||||
.join();
|
||||
} finally {
|
||||
MetricsUtil.addLatency(metricsScope, "StaleWorkerMetricsCleanup", startTime, MetricsLevel.DETAILED);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* WorkerMetricStats entry is considered stale if the lastUpdateTime of the workerMetrics is older than
|
||||
* workerMetricsStalenessThreshold * workerMetricsReporterFreqInMillis.
|
||||
*/
|
||||
private boolean isWorkerMetricsEntryStale(final WorkerMetricStats workerMetrics) {
|
||||
return Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now())
|
||||
.toMillis()
|
||||
> config.staleWorkerMetricsEntryCleanupDuration().toMillis();
|
||||
}
|
||||
|
||||
private void printPerWorkerLeases(final InMemoryStorageView storageView) {
|
||||
storageView.getActiveWorkerIdSet().forEach(activeWorkerId -> {
|
||||
log.info(
|
||||
"Worker : {} and total leases : {} and totalThroughput : {}",
|
||||
activeWorkerId,
|
||||
Optional.ofNullable(storageView.getWorkerToLeasesMap().get(activeWorkerId))
|
||||
.orElse(Collections.EMPTY_SET)
|
||||
.size(),
|
||||
storageView.getWorkerToTotalAssignedThroughputMap().get(activeWorkerId));
|
||||
});
|
||||
}
|
||||
|
||||
private void parallelyAssignLeases(final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
|
||||
final AtomicInteger failedAssignmentCounter = new AtomicInteger(0);
|
||||
final long startTime = System.currentTimeMillis();
|
||||
boolean success = false;
|
||||
try {
|
||||
CompletableFuture.allOf(inMemoryStorageView.getLeaseToNewAssignedWorkerMap().entrySet().stream()
|
||||
// ignore leases that are heartbeating and pending graceful shutdown checkpoint.
|
||||
.filter(entry -> !entry.getKey().blockedOnPendingCheckpoint(getNanoTimeMillis()))
|
||||
.map(entry -> CompletableFuture.supplyAsync(
|
||||
() -> {
|
||||
try {
|
||||
final Lease lease = entry.getKey();
|
||||
if (gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled()
|
||||
&& lease.isEligibleForGracefulShutdown()) {
|
||||
return handleGracefulLeaseHandoff(
|
||||
lease, entry.getValue(), failedAssignmentCounter);
|
||||
} else {
|
||||
return handleRegularLeaseAssignment(
|
||||
lease, entry.getValue(), failedAssignmentCounter);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new CompletionException(e);
|
||||
}
|
||||
},
|
||||
LEASE_ASSIGNMENT_CALL_THREAD_POOL))
|
||||
.toArray(CompletableFuture[]::new))
|
||||
.join();
|
||||
success = true;
|
||||
} finally {
|
||||
MetricsUtil.addCount(
|
||||
metricsScope, "FailedAssignmentCount", failedAssignmentCounter.get(), MetricsLevel.DETAILED);
|
||||
MetricsUtil.addSuccessAndLatency(
|
||||
metricsScope, "ParallelyAssignLeases", success, startTime, MetricsLevel.DETAILED);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean handleGracefulLeaseHandoff(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
|
||||
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||
final boolean response = leaseRefresher.initiateGracefulLeaseHandoff(lease, newOwner);
|
||||
if (response) {
|
||||
// new handoff assignment. add the timeout.
|
||||
lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
|
||||
} else {
|
||||
failedAssignmentCounter.incrementAndGet();
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
private boolean handleRegularLeaseAssignment(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
|
||||
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||
final boolean response = leaseRefresher.assignLease(lease, newOwner);
|
||||
if (response) {
|
||||
// Successful assignment updates the leaseCounter, update the nanoTime for counter update.
|
||||
lease.lastCounterIncrementNanos(nanoTimeProvider.get());
|
||||
} else {
|
||||
failedAssignmentCounter.incrementAndGet();
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
private void publishLeaseAndWorkerCountMetrics(
|
||||
final MetricsScope metricsScope, final InMemoryStorageView inMemoryStorageView) {
|
||||
// Names of the metrics are kept in sync with what is published in LeaseTaker.
|
||||
metricsScope.addData(
|
||||
"TotalLeases", inMemoryStorageView.leaseList.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
metricsScope.addData(
|
||||
"NumWorkers", inMemoryStorageView.activeWorkerMetrics.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
}
|
||||
|
||||
// Method updates all new leases with currentTime if the counter is updated since last run else keeps whatever
|
||||
// was prev and update the prevRunLeasesState
|
||||
private void updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
|
||||
final List<Lease> leaseList, final Long scanTime) {
|
||||
for (final Lease lease : leaseList) {
|
||||
final Lease prevLease = prevRunLeasesState.get(lease.leaseKey());
|
||||
|
||||
// make sure lease shutdown timeouts are tracked.
|
||||
if (lease.shutdownRequested()) {
|
||||
// previous and current leases might have same next and checkpoint owners but there is no
|
||||
// guarantee that the latest shutdown is the same shutdown in the previous lease for example
|
||||
// some other leaders change the lease states while this worker waiting for it's LAM run.
|
||||
// This is the best effort to prevent marking the incorrect timeout.
|
||||
if (isNull(prevLease) || !prevLease.shutdownRequested() || !isSameOwners(lease, prevLease)) {
|
||||
// Add new value if previous is null, previous lease is not shutdown pending or the owners
|
||||
// don't match
|
||||
lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
|
||||
} else {
|
||||
lease.checkpointOwnerTimeoutTimestampMillis(prevLease.checkpointOwnerTimeoutTimestampMillis());
|
||||
}
|
||||
}
|
||||
|
||||
if (isNull(prevLease)) {
|
||||
lease.lastCounterIncrementNanos(
|
||||
isNull(lease.actualOwner())
|
||||
// This is an unassigned lease, mark as 0L that puts this in first in assignment order
|
||||
? 0L
|
||||
: scanTime);
|
||||
} else {
|
||||
lease.lastCounterIncrementNanos(
|
||||
lease.leaseCounter() > prevLease.leaseCounter()
|
||||
? scanTime
|
||||
: prevLease.lastCounterIncrementNanos());
|
||||
}
|
||||
}
|
||||
prevRunLeasesState.clear();
|
||||
prevRunLeasesState.putAll(leaseList.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity())));
|
||||
}
|
||||
|
||||
private void prepareAfterLeaderSwitch() {
|
||||
prevRunLeasesState.clear();
|
||||
noOfContinuousFailedAttempts = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* In memory view of the leases and workerMetrics.
|
||||
* This class supports queries (e.g., leases assigned to worker or total throughout assigned to worker).
|
||||
*/
|
||||
@Getter
|
||||
class InMemoryStorageView {
|
||||
|
||||
// This is in-memory view of the workerToLeaseMapping, this is updated in-memory before actual
|
||||
// changes to storage.
|
||||
private final Map<String, Set<Lease>> workerToLeasesMap = new HashMap<>();
|
||||
/**
|
||||
* This is computed initially after the loading leases and then updated when the
|
||||
* {@link InMemoryStorageView#performLeaseAssignment} is called.
|
||||
*/
|
||||
private final Map<String, Double> workerToTotalAssignedThroughputMap = new HashMap<>();
|
||||
/**
|
||||
* Captures the new assignment done during the lifecycle of single run.
|
||||
*/
|
||||
private final Map<Lease, String> leaseToNewAssignedWorkerMap = new HashMap<>();
|
||||
|
||||
/**
|
||||
* List of all leases in the application.
|
||||
*/
|
||||
private List<Lease> leaseList;
|
||||
/**
|
||||
* List of workers which are active (i.e., updated metric stats before the threshold ref)
|
||||
* {@link this#computeWorkerExpiryThresholdInSecond})
|
||||
*/
|
||||
private List<WorkerMetricStats> activeWorkerMetrics;
|
||||
/**
|
||||
* List of all workerMetrics entries from storage.
|
||||
*/
|
||||
private List<WorkerMetricStats> workerMetricsList;
|
||||
/**
|
||||
* List of active workers ids.
|
||||
*/
|
||||
private Set<String> activeWorkerIdSet;
|
||||
/**
|
||||
* Wall time in nanoseconds when the lease table scan was completed.
|
||||
*/
|
||||
private long leaseTableScanTime = 0L;
|
||||
/**
|
||||
* Average throughput for all workers.
|
||||
*/
|
||||
private double targetAverageThroughput;
|
||||
|
||||
/**
|
||||
* Update {@ref inMemoryWorkerToLeasesMapping} with the change in ownership and update newLeaseAssignmentMap
|
||||
*
|
||||
* @param lease lease changing assignment
|
||||
* @param newOwner new owner of the lease
|
||||
*/
|
||||
public void performLeaseAssignment(final Lease lease, final String newOwner) {
|
||||
final String existingOwner = lease.actualOwner();
|
||||
workerToLeasesMap.get(existingOwner).remove(lease);
|
||||
workerToLeasesMap
|
||||
.computeIfAbsent(newOwner, owner -> new HashSet<>())
|
||||
.add(lease);
|
||||
updateWorkerThroughput(newOwner, lease.throughputKBps());
|
||||
// Remove the same lease throughput from oldOwner
|
||||
updateWorkerThroughput(existingOwner, -lease.throughputKBps());
|
||||
leaseToNewAssignedWorkerMap.put(lease, newOwner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans the LeaseTable and WorkerMetricStats in parallel and load the data and populate datastructures used
|
||||
* in lease assignment.
|
||||
*/
|
||||
public void loadInMemoryStorageView(final MetricsScope metricsScope) throws Exception {
|
||||
final CompletableFuture<Map.Entry<List<Lease>, List<String>>> leaseListFuture = loadLeaseListAsync();
|
||||
|
||||
final CompletableFuture<List<WorkerMetricStats>> workerMetricsFuture = loadWorkerMetricStats();
|
||||
|
||||
final List<WorkerMetricStats> workerMetricsFromStorage = workerMetricsFuture.join();
|
||||
|
||||
final List<String> listOfWorkerIdOfInvalidWorkerMetricsEntry = workerMetricsFromStorage.stream()
|
||||
.filter(workerMetrics -> !workerMetrics.isValidWorkerMetric())
|
||||
.map(WorkerMetricStats::getWorkerId)
|
||||
.collect(Collectors.toList());
|
||||
log.warn("List of workerIds with invalid entries : {}", listOfWorkerIdOfInvalidWorkerMetricsEntry);
|
||||
if (!listOfWorkerIdOfInvalidWorkerMetricsEntry.isEmpty()) {
|
||||
metricsScope.addData(
|
||||
"NumWorkersWithInvalidEntry",
|
||||
listOfWorkerIdOfInvalidWorkerMetricsEntry.size(),
|
||||
StandardUnit.COUNT,
|
||||
MetricsLevel.SUMMARY);
|
||||
}
|
||||
|
||||
// Valid entries are considered further, for validity of entry refer WorkerMetricStats#isValidWorkerMetrics
|
||||
this.workerMetricsList = workerMetricsFromStorage.stream()
|
||||
.filter(WorkerMetricStats::isValidWorkerMetric)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
log.info("Total WorkerMetricStats available : {}", workerMetricsList.size());
|
||||
final long workerExpiryThreshold = computeWorkerExpiryThresholdInSecond();
|
||||
|
||||
final long countOfWorkersWithFailingWorkerMetric = workerMetricsList.stream()
|
||||
.filter(WorkerMetricStats::isAnyWorkerMetricFailing)
|
||||
.count();
|
||||
if (countOfWorkersWithFailingWorkerMetric != 0) {
|
||||
metricsScope.addData(
|
||||
"NumWorkersWithFailingWorkerMetric",
|
||||
countOfWorkersWithFailingWorkerMetric,
|
||||
StandardUnit.COUNT,
|
||||
MetricsLevel.SUMMARY);
|
||||
}
|
||||
|
||||
final Map.Entry<List<Lease>, List<String>> leaseListResponse = leaseListFuture.join();
|
||||
this.leaseList = leaseListResponse.getKey();
|
||||
log.warn("Leases that failed deserialization : {}", leaseListResponse.getValue());
|
||||
if (!leaseListResponse.getValue().isEmpty()) {
|
||||
MetricsUtil.addCount(
|
||||
metricsScope,
|
||||
"LeaseDeserializationFailureCount",
|
||||
leaseListResponse.getValue().size(),
|
||||
MetricsLevel.SUMMARY);
|
||||
}
|
||||
this.leaseTableScanTime = nanoTimeProvider.get();
|
||||
log.info("Total Leases available : {}", leaseList.size());
|
||||
|
||||
final double averageLeaseThroughput = leaseList.stream()
|
||||
.filter(lease -> nonNull(lease.throughputKBps()))
|
||||
.mapToDouble(Lease::throughputKBps)
|
||||
.average()
|
||||
// If none of the leases has any value, that means its app
|
||||
// startup time and thus assigns 0 in that case to start with.
|
||||
.orElse(0D);
|
||||
/*
|
||||
* If a workerMetrics has a metric (i.e. has -1 value in last index which denotes failure),
|
||||
* skip it from activeWorkerMetrics and no new action on it will be done
|
||||
* (new assignment etc.) until the metric has non -1 value in last index. This is to avoid performing action
|
||||
* with the stale data on worker.
|
||||
*/
|
||||
this.activeWorkerMetrics = workerMetricsList.stream()
|
||||
.filter(workerMetrics -> workerMetrics.getLastUpdateTime() >= workerExpiryThreshold
|
||||
&& !workerMetrics.isAnyWorkerMetricFailing())
|
||||
.collect(Collectors.toList());
|
||||
log.info("activeWorkerMetrics : {}", activeWorkerMetrics.size());
|
||||
targetAverageThroughput =
|
||||
averageLeaseThroughput * leaseList.size() / Math.max(1, activeWorkerMetrics.size());
|
||||
leaseList.forEach(lease -> {
|
||||
if (isNull(lease.throughputKBps())) {
|
||||
// If the lease is unassigned, it will not have any throughput value, use average throughput
|
||||
// as good enough value to start with.
|
||||
lease.throughputKBps(averageLeaseThroughput);
|
||||
}
|
||||
workerToLeasesMap
|
||||
.computeIfAbsent(lease.actualOwner(), workerId -> new HashSet<>())
|
||||
.add(lease);
|
||||
updateWorkerThroughput(lease.actualOwner(), lease.throughputKBps());
|
||||
});
|
||||
|
||||
this.activeWorkerIdSet = new HashSet<>();
|
||||
// Calculate initial ratio
|
||||
this.activeWorkerMetrics.forEach(workerMetrics -> {
|
||||
activeWorkerIdSet.add(workerMetrics.getWorkerId());
|
||||
workerMetrics.setEmaAlpha(config.workerMetricsEMAAlpha());
|
||||
if (workerMetrics.isUsingDefaultWorkerMetric()) {
|
||||
setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
|
||||
workerMetrics,
|
||||
getTotalAssignedThroughput(workerMetrics.getWorkerId()) / targetAverageThroughput);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void updateWorkerThroughput(final String workerId, final double leaseThroughput) {
|
||||
double value = workerToTotalAssignedThroughputMap.computeIfAbsent(workerId, worker -> (double) 0L);
|
||||
workerToTotalAssignedThroughputMap.put(workerId, value + leaseThroughput);
|
||||
}
|
||||
|
||||
private void setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
|
||||
final WorkerMetricStats workerMetrics, final Double ratio) {
|
||||
// for workers with default WorkerMetricStats, the operating range ceiling of 100 represents the
|
||||
// target throughput. This way, with either heterogeneous or homogeneous fleets
|
||||
// of explicit WorkerMetricStats and default WorkerMetricStats applications, load will be evenly
|
||||
// distributed.
|
||||
log.info(
|
||||
"Worker [{}] is using default WorkerMetricStats, setting initial utilization ratio to [{}].",
|
||||
workerMetrics.getWorkerId(),
|
||||
ratio);
|
||||
workerMetrics.setOperatingRange(ImmutableMap.of("T", ImmutableList.of(100L)));
|
||||
workerMetrics.setMetricStats(ImmutableMap.of("T", ImmutableList.of(ratio * 100, ratio * 100)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the value threshold in seconds for a worker to be considered as active.
|
||||
* If a worker has not updated the WorkerMetricStats entry within this threshold, the worker is not considered
|
||||
* as active.
|
||||
*
|
||||
* @return wall time in seconds
|
||||
*/
|
||||
private long computeWorkerExpiryThresholdInSecond() {
|
||||
final long timeInSeconds = Duration.ofMillis(System.currentTimeMillis()
|
||||
- DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
|
||||
* config.workerMetricsReporterFreqInMillis())
|
||||
.getSeconds();
|
||||
log.info("WorkerMetricStats expiry time in seconds : {}", timeInSeconds);
|
||||
return timeInSeconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks at inMemoryWorkerToLeasesMapping for lease assignment and figures out if there is room considering
|
||||
* any new assignment that would have happened.
|
||||
*/
|
||||
public boolean isWorkerTotalThroughputLessThanMaxThroughput(final String workerId) {
|
||||
return getTotalAssignedThroughput(workerId) <= config.maxThroughputPerHostKBps();
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks at inMemoryWorkerToLeasesMapping for lease assignment of a worker and returns true if the worker has
|
||||
* no leases assigned or less than maxNumberOfLeasesPerHost else false.
|
||||
*/
|
||||
public boolean isWorkerAssignedLeasesLessThanMaxLeases(final String workerId) {
|
||||
final Set<Lease> assignedLeases = workerToLeasesMap.get(workerId);
|
||||
if (CollectionUtils.isEmpty(assignedLeases)) {
|
||||
// There are no leases assigned to the worker, that means its less than maxNumberOfLeasesPerHost.
|
||||
return true;
|
||||
} else {
|
||||
return assignedLeases.size() < maxLeasesForWorker;
|
||||
}
|
||||
}
|
||||
|
||||
public Double getTotalAssignedThroughput(final String workerId) {
|
||||
return workerToTotalAssignedThroughputMap.getOrDefault(workerId, 0D);
|
||||
}
|
||||
|
||||
private CompletableFuture<List<WorkerMetricStats>> loadWorkerMetricStats() {
|
||||
return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricsDAO::getAllWorkerMetricStats));
|
||||
}
|
||||
|
||||
private CompletableFuture<Map.Entry<List<Lease>, List<String>>> loadLeaseListAsync() {
|
||||
return CompletableFuture.supplyAsync(() -> loadWithRetry(() -> leaseRefresher.listLeasesParallely(
|
||||
LEASE_ASSIGNMENT_CALL_THREAD_POOL, DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR)));
|
||||
}
|
||||
|
||||
private <T> T loadWithRetry(final Callable<T> loadFunction) {
|
||||
int retryAttempt = 0;
|
||||
while (true) {
|
||||
try {
|
||||
return loadFunction.call();
|
||||
} catch (final Exception e) {
|
||||
if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
|
||||
log.warn(
|
||||
"Failed to load : {}, retrying",
|
||||
loadFunction.getClass().getName(),
|
||||
e);
|
||||
retryAttempt++;
|
||||
} else {
|
||||
throw new CompletionException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private long getCheckpointOwnerTimeoutTimestampMillis() {
|
||||
// this is a future timestamp in millis that the graceful lease handoff shutdown can be considered
|
||||
// expired. LeaseDurationMillis is used here to account for how long it might take for the
|
||||
// lease owner to receive the shutdown signal before executing shutdown.
|
||||
return getNanoTimeMillis()
|
||||
+ gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis()
|
||||
+ leaseDurationMillis;
|
||||
}
|
||||
|
||||
private long getNanoTimeMillis() {
|
||||
// this is not a wall clock time. But if we stick with using this time provider for calculating the elapsed
|
||||
// time it should be okay to use in checkpoint expiration calculation.
|
||||
return TimeUnit.NANOSECONDS.toMillis(nanoTimeProvider.get());
|
||||
}
|
||||
|
||||
private static boolean isSameOwners(Lease currentLease, Lease previousLease) {
|
||||
return Objects.equals(currentLease.leaseOwner(), previousLease.leaseOwner())
|
||||
&& Objects.equals(currentLease.checkpointOwner(), previousLease.checkpointOwner());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,348 @@
|
|||
package software.amazon.kinesis.coordinator.assignment;
|
||||
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
|
||||
|
||||
import static java.util.Objects.isNull;
|
||||
import static java.util.Objects.nonNull;
|
||||
|
||||
/**
|
||||
* VarianceBasedLeaseAssignmentDecider
|
||||
* This implementation of LeaseAssignmentDecider performs lease assignment by considering the WorkerMetricStats values of workers
|
||||
* with respect to fleet level average of that WorkerMetricStats.
|
||||
* Rebalanced leases are assigned to workers which has maximum capacity to in terms of throughput to reach fleet level
|
||||
* across the WorkerMetricStats value. In case of multiple WorkerMetricStats, the capacity to reach fleet level average is determined by outlier
|
||||
* WorkerMetricStats.
|
||||
* To minimize the variance, the algorithm picks the fleet level average of the WorkerMetricStats for workers as a
|
||||
* pivot point and uses it to determine workers to take leases from and then assign to other workers.
|
||||
* The threshold for considering a worker for re-balance is configurable via
|
||||
* {@code reBalanceThreshold}. During reassignments the {@code dampeningPercentageValue} is used to achieve
|
||||
* critical dampening.
|
||||
*/
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
public final class VarianceBasedLeaseAssignmentDecider implements LeaseAssignmentDecider {
|
||||
private final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView;
|
||||
private final int dampeningPercentageValue;
|
||||
private final int reBalanceThreshold;
|
||||
private final boolean allowThroughputOvershoot;
|
||||
private final Map<String, Double> workerMetricsToFleetLevelAverageMap = new HashMap<>();
|
||||
private final PriorityQueue<WorkerMetricStats> assignableWorkerSortedByAvailableCapacity;
|
||||
private int targetLeasePerWorker;
|
||||
|
||||
public VarianceBasedLeaseAssignmentDecider(
|
||||
final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView,
|
||||
final int dampeningPercentageValue,
|
||||
final int reBalanceThreshold,
|
||||
final boolean allowThroughputOvershoot) {
|
||||
this.inMemoryStorageView = inMemoryStorageView;
|
||||
this.dampeningPercentageValue = dampeningPercentageValue;
|
||||
this.reBalanceThreshold = reBalanceThreshold;
|
||||
this.allowThroughputOvershoot = allowThroughputOvershoot;
|
||||
initialize();
|
||||
final Comparator<WorkerMetricStats> comparator = Comparator.comparingDouble(
|
||||
workerMetrics -> workerMetrics.computePercentageToReachAverage(workerMetricsToFleetLevelAverageMap));
|
||||
this.assignableWorkerSortedByAvailableCapacity = new PriorityQueue<>(comparator.reversed());
|
||||
this.assignableWorkerSortedByAvailableCapacity.addAll(
|
||||
getAvailableWorkersForAssignment(inMemoryStorageView.getActiveWorkerMetrics()));
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
final Map<String, Double> workerMetricsNameToAverage = inMemoryStorageView.getActiveWorkerMetrics().stream()
|
||||
.flatMap(workerMetrics -> workerMetrics.getMetricStats().keySet().stream()
|
||||
.map(workerMetricsName ->
|
||||
new SimpleEntry<>(workerMetricsName, workerMetrics.getMetricStat(workerMetricsName))))
|
||||
.collect(Collectors.groupingBy(
|
||||
SimpleEntry::getKey, HashMap::new, Collectors.averagingDouble(SimpleEntry::getValue)));
|
||||
|
||||
workerMetricsToFleetLevelAverageMap.putAll(workerMetricsNameToAverage);
|
||||
|
||||
final int totalWorkers =
|
||||
Math.max(inMemoryStorageView.getActiveWorkerMetrics().size(), 1);
|
||||
this.targetLeasePerWorker = Math.max(inMemoryStorageView.getLeaseList().size() / totalWorkers, 1);
|
||||
}
|
||||
|
||||
private List<WorkerMetricStats> getAvailableWorkersForAssignment(final List<WorkerMetricStats> workerMetricsList) {
|
||||
// Workers with WorkerMetricStats running hot are also available for assignment as the goal is to balance
|
||||
// utilization
|
||||
// always (e.g., if all workers have hot WorkerMetricStats, balance the variance between them too)
|
||||
return workerMetricsList.stream()
|
||||
.filter(workerMetrics -> inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(
|
||||
workerMetrics.getWorkerId())
|
||||
&& inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void assignExpiredOrUnassignedLeases(final List<Lease> expiredOrUnAssignedLeases) {
|
||||
// Sort the expiredOrUnAssignedLeases using lastCounterIncrementNanos such that leases expired first are
|
||||
// picked first.
|
||||
// Unassigned leases have lastCounterIncrementNanos as zero and thus assigned first.
|
||||
Collections.sort(expiredOrUnAssignedLeases, Comparator.comparing(Lease::lastCounterIncrementNanos));
|
||||
final Set<Lease> assignedLeases = new HashSet<>();
|
||||
for (final Lease lease : expiredOrUnAssignedLeases) {
|
||||
final WorkerMetricStats workerToAssignLease = assignableWorkerSortedByAvailableCapacity.poll();
|
||||
if (nonNull(workerToAssignLease)) {
|
||||
assignLease(lease, workerToAssignLease);
|
||||
assignedLeases.add(lease);
|
||||
} else {
|
||||
log.info("No worker available to assign lease {}", lease.leaseKey());
|
||||
break;
|
||||
}
|
||||
}
|
||||
expiredOrUnAssignedLeases.removeAll(assignedLeases);
|
||||
}
|
||||
|
||||
private List<WorkerMetricStats> getWorkersToTakeLeasesFromIfRequired(
|
||||
final List<WorkerMetricStats> currentWorkerMetrics,
|
||||
final String workerMetricsName,
|
||||
final double workerMetricsValueAvg) {
|
||||
final List<WorkerMetricStats> workerIdsAboveAverage = new ArrayList<>();
|
||||
|
||||
final double upperLimit = workerMetricsValueAvg * (1.0D + (double) reBalanceThreshold / 100);
|
||||
final double lowerLimit = workerMetricsValueAvg * (1.0D - (double) reBalanceThreshold / 100);
|
||||
|
||||
WorkerMetricStats mostLoadedWorker = null;
|
||||
|
||||
log.info("Range for re-balance upper threshold {} and lower threshold {}", upperLimit, lowerLimit);
|
||||
|
||||
boolean shouldTriggerReBalance = false;
|
||||
for (final WorkerMetricStats workerMetrics : currentWorkerMetrics) {
|
||||
final double currentWorkerMetricsValue = workerMetrics.getMetricStat(workerMetricsName);
|
||||
final boolean isCurrentWorkerMetricsAboveOperatingRange =
|
||||
workerMetrics.isWorkerMetricAboveOperatingRange(workerMetricsName);
|
||||
/*
|
||||
If there is any worker, whose WorkerMetricStats value is between +/- reBalanceThreshold % of workerMetricsValueAvg or if
|
||||
worker's WorkerMetricStats value is above operating range trigger re-balance
|
||||
*/
|
||||
if (currentWorkerMetricsValue > upperLimit
|
||||
|| currentWorkerMetricsValue < lowerLimit
|
||||
|| isCurrentWorkerMetricsAboveOperatingRange) {
|
||||
shouldTriggerReBalance = true;
|
||||
}
|
||||
// Perform re-balance on the worker if its above upperLimit or if current WorkerMetricStats is above
|
||||
// operating range.
|
||||
if (currentWorkerMetricsValue >= upperLimit || isCurrentWorkerMetricsAboveOperatingRange) {
|
||||
workerIdsAboveAverage.add(workerMetrics);
|
||||
}
|
||||
if (mostLoadedWorker == null
|
||||
|| mostLoadedWorker.getMetricStat(workerMetricsName) < currentWorkerMetricsValue) {
|
||||
mostLoadedWorker = workerMetrics;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
If workerIdsAboveAverage is empty that means there is no worker with WorkerMetricStats value above upperLimit so pick
|
||||
the worker with higher CPU. This can happen when there is worker with WorkerMetricStats value below lowerLimit but
|
||||
all other workers are within upperLimit.
|
||||
*/
|
||||
if (workerIdsAboveAverage.isEmpty()) {
|
||||
workerIdsAboveAverage.add(mostLoadedWorker);
|
||||
}
|
||||
|
||||
return shouldTriggerReBalance ? workerIdsAboveAverage : Collections.emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the balancing of the throughput assigned to workers based on the WorkerMetricsValues of worker with respect
|
||||
* to fleet level average.
|
||||
* Each WorkerMetricStats is treated independently to determine workers for re-balance computed (computed based on
|
||||
* reBalanceThreshold) are determined.
|
||||
* The magnitude of throughput to take is determined by how much worker is away from the average of that WorkerMetricStats
|
||||
* across fleet and in case of multiple WorkerMetricStats, the one with maximum magnitude of throughput is considered.
|
||||
*/
|
||||
@Override
|
||||
public void balanceWorkerVariance() {
|
||||
final List<WorkerMetricStats> activeWorkerMetrics = inMemoryStorageView.getActiveWorkerMetrics();
|
||||
|
||||
log.info("WorkerMetricStats to corresponding fleet level average : {}", workerMetricsToFleetLevelAverageMap);
|
||||
log.info("Active WorkerMetricStats : {}", activeWorkerMetrics);
|
||||
|
||||
final Map<String, Double> workerIdToThroughputToTakeMap = new HashMap<>();
|
||||
String largestOutlierWorkerMetricsName = "";
|
||||
double maxThroughputTake = -1.0D;
|
||||
|
||||
for (final Map.Entry<String, Double> workerMetricsToFleetLevelAverageEntry :
|
||||
workerMetricsToFleetLevelAverageMap.entrySet()) {
|
||||
final String workerMetricsName = workerMetricsToFleetLevelAverageEntry.getKey();
|
||||
|
||||
// Filter workers that does not have current WorkerMetricStats. This is possible if application is adding a
|
||||
// new WorkerMetricStats and currently in phase of deployment.
|
||||
final List<WorkerMetricStats> currentWorkerMetrics = activeWorkerMetrics.stream()
|
||||
.filter(workerMetrics -> workerMetrics.containsMetricStat(workerMetricsName))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
final double fleetAverageForWorkerMetrics = workerMetricsToFleetLevelAverageEntry.getValue();
|
||||
|
||||
final List<WorkerMetricStats> workerToTakeLeasesFrom = getWorkersToTakeLeasesFromIfRequired(
|
||||
currentWorkerMetrics, workerMetricsName, fleetAverageForWorkerMetrics);
|
||||
|
||||
final Map<String, Double> workerIdToThroughputToTakeForCurrentWorkerMetrics = new HashMap<>();
|
||||
double totalThroughputToTakeForCurrentWorkerMetrics = 0D;
|
||||
for (final WorkerMetricStats workerToTakeLease : workerToTakeLeasesFrom) {
|
||||
final double workerMetricsValueForWorker = workerToTakeLease.getMetricStat(workerMetricsName);
|
||||
// Load to take based on the difference compared to the fleet level average
|
||||
final double loadPercentageToTake =
|
||||
(workerMetricsValueForWorker - fleetAverageForWorkerMetrics) / workerMetricsValueForWorker;
|
||||
// Dampen the load based on dampeningPercentageValue
|
||||
final double dampenedLoadPercentageToTake =
|
||||
loadPercentageToTake * ((double) dampeningPercentageValue / 100);
|
||||
final double throughputToTake =
|
||||
inMemoryStorageView.getTotalAssignedThroughput(workerToTakeLease.getWorkerId())
|
||||
* dampenedLoadPercentageToTake;
|
||||
log.info(
|
||||
"For worker : {} taking throughput : {} after dampening based on WorkerMetricStats : {}",
|
||||
workerToTakeLease.getWorkerId(),
|
||||
throughputToTake,
|
||||
workerMetricsName);
|
||||
totalThroughputToTakeForCurrentWorkerMetrics += throughputToTake;
|
||||
workerIdToThroughputToTakeForCurrentWorkerMetrics.put(
|
||||
workerToTakeLease.getWorkerId(), throughputToTake);
|
||||
}
|
||||
|
||||
/*
|
||||
If totalThroughputToTakeForCurrentWorkerMetrics is more than maxThroughputTake that means this WorkerMetricStats is more
|
||||
outlier so consider this for reBalancing
|
||||
*/
|
||||
if (maxThroughputTake < totalThroughputToTakeForCurrentWorkerMetrics) {
|
||||
largestOutlierWorkerMetricsName = workerMetricsName;
|
||||
workerIdToThroughputToTakeMap.clear();
|
||||
workerIdToThroughputToTakeMap.putAll(workerIdToThroughputToTakeForCurrentWorkerMetrics);
|
||||
maxThroughputTake = totalThroughputToTakeForCurrentWorkerMetrics;
|
||||
}
|
||||
}
|
||||
|
||||
log.info(
|
||||
"Largest outlier WorkerMetricStats is : {} and total of {} throughput will be rebalanced",
|
||||
largestOutlierWorkerMetricsName,
|
||||
maxThroughputTake);
|
||||
log.info("Workers to throughput taken from them is : {}", workerIdToThroughputToTakeMap);
|
||||
|
||||
final List<Map.Entry<String, Double>> sortedWorkerIdToThroughputToTakeEntries =
|
||||
new ArrayList<>(workerIdToThroughputToTakeMap.entrySet());
|
||||
// sort entries by values.
|
||||
Collections.sort(sortedWorkerIdToThroughputToTakeEntries, (e1, e2) -> e2.getValue()
|
||||
.compareTo(e1.getValue()));
|
||||
|
||||
for (final Map.Entry<String, Double> workerIdToThroughputToTakeEntry :
|
||||
sortedWorkerIdToThroughputToTakeEntries) {
|
||||
final String workerId = workerIdToThroughputToTakeEntry.getKey();
|
||||
|
||||
final double throughputToTake = workerIdToThroughputToTakeEntry.getValue();
|
||||
|
||||
final Queue<Lease> leasesToTake = getLeasesToTake(workerId, throughputToTake);
|
||||
|
||||
log.info(
|
||||
"Leases taken from worker : {} are : {}",
|
||||
workerId,
|
||||
leasesToTake.stream().map(Lease::leaseKey).collect(Collectors.toSet()));
|
||||
|
||||
for (final Lease lease : leasesToTake) {
|
||||
final WorkerMetricStats workerToAssign = assignableWorkerSortedByAvailableCapacity.poll();
|
||||
if (nonNull(workerToAssign)
|
||||
&& workerToAssign.willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange(
|
||||
workerMetricsToFleetLevelAverageMap,
|
||||
inMemoryStorageView.getTargetAverageThroughput(),
|
||||
lease.throughputKBps(),
|
||||
targetLeasePerWorker)) {
|
||||
log.info("No worker to assign anymore in this iteration due to hitting average values");
|
||||
break;
|
||||
}
|
||||
if (nonNull(workerToAssign)) {
|
||||
assignLease(lease, workerToAssign);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printWorkerToUtilizationLog(inMemoryStorageView.getActiveWorkerMetrics());
|
||||
}
|
||||
|
||||
private Queue<Lease> getLeasesToTake(final String workerId, final double throughputToTake) {
|
||||
final Set<Lease> existingLeases =
|
||||
inMemoryStorageView.getWorkerToLeasesMap().get(workerId);
|
||||
|
||||
if (isNull(existingLeases) || existingLeases.isEmpty()) {
|
||||
return new ArrayDeque<>();
|
||||
}
|
||||
|
||||
if (inMemoryStorageView.getTotalAssignedThroughput(workerId) == 0D) {
|
||||
// This is the case where throughput of this worker is zero and have 1 or more leases assigned.
|
||||
// Its not possible to determine leases to take based on throughput so simply take 1 lease and move on.
|
||||
return new ArrayDeque<>(new ArrayList<>(existingLeases).subList(0, 1));
|
||||
}
|
||||
|
||||
return getLeasesCombiningToThroughput(workerId, throughputToTake);
|
||||
}
|
||||
|
||||
private void assignLease(final Lease lease, final WorkerMetricStats workerMetrics) {
|
||||
if (nonNull(lease.actualOwner()) && lease.actualOwner().equals(workerMetrics.getWorkerId())) {
|
||||
// if a new owner and current owner are same then no assignment to do
|
||||
// put back the worker as well as no assignment is done
|
||||
assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
|
||||
return;
|
||||
}
|
||||
workerMetrics.extrapolateMetricStatValuesForAddedThroughput(
|
||||
workerMetricsToFleetLevelAverageMap,
|
||||
inMemoryStorageView.getTargetAverageThroughput(),
|
||||
lease.throughputKBps(),
|
||||
targetLeasePerWorker);
|
||||
log.info("Assigning lease : {} to worker : {}", lease.leaseKey(), workerMetrics.getWorkerId());
|
||||
inMemoryStorageView.performLeaseAssignment(lease, workerMetrics.getWorkerId());
|
||||
if (inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(workerMetrics.getWorkerId())
|
||||
&& inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId())) {
|
||||
assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
|
||||
}
|
||||
}
|
||||
|
||||
private void printWorkerToUtilizationLog(final List<WorkerMetricStats> activeWorkerMetrics) {
|
||||
activeWorkerMetrics.forEach(workerMetrics -> log.info(
|
||||
"WorkerId : {} and average WorkerMetricStats data : {}",
|
||||
workerMetrics.getWorkerId(),
|
||||
workerMetrics.getMetricStatsMap()));
|
||||
}
|
||||
|
||||
private Queue<Lease> getLeasesCombiningToThroughput(final String workerId, final double throughputToGet) {
|
||||
final List<Lease> assignedLeases =
|
||||
new ArrayList<>(inMemoryStorageView.getWorkerToLeasesMap().get(workerId));
|
||||
if (assignedLeases.isEmpty()) {
|
||||
// This is possible if the worker is having high utilization but does not have any leases assigned to it
|
||||
return new ArrayDeque<>();
|
||||
}
|
||||
// Shuffle leases to randomize what leases gets picked.
|
||||
Collections.shuffle(assignedLeases);
|
||||
final Queue<Lease> response = new ArrayDeque<>();
|
||||
double remainingThroughputToGet = throughputToGet;
|
||||
for (final Lease lease : assignedLeases) {
|
||||
// if adding this lease makes throughout to take go below zero avoid taking this lease.
|
||||
if (remainingThroughputToGet - lease.throughputKBps() <= 0) {
|
||||
continue;
|
||||
}
|
||||
remainingThroughputToGet -= lease.throughputKBps();
|
||||
response.add(lease);
|
||||
}
|
||||
|
||||
// If allowThroughputOvershoot is set to true, take a minimum throughput lease
|
||||
if (allowThroughputOvershoot && response.isEmpty()) {
|
||||
assignedLeases.stream()
|
||||
.min(Comparator.comparingDouble(Lease::throughputKBps))
|
||||
.ifPresent(response::add);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
/**
|
||||
* ClientVersion support during upgrade from KCLv2.x to KCLv3.x
|
||||
*
|
||||
* This enum is persisted in storage, so any changes to it needs to be backward compatible.
|
||||
* Reorganizing the values is not backward compatible, also if versions are removed, the corresponding
|
||||
* enum value cannot be reused without backward compatibility considerations.
|
||||
*/
|
||||
public enum ClientVersion {
|
||||
/**
|
||||
* This is a transient start state version used during initialization of the Migration State Machine.
|
||||
*/
|
||||
CLIENT_VERSION_INIT,
|
||||
/**
|
||||
* This version is used during the upgrade of an application from KCLv2.x to KCLv3.x, in this version
|
||||
* KCL workers will emit WorkerMetricStats and run KCLv2.x algorithms for leader election and lease
|
||||
* assignment. KCL will also monitor for upgrade to KCLv3.x readiness of the worker fleet.
|
||||
*/
|
||||
CLIENT_VERSION_UPGRADE_FROM_2X,
|
||||
/**
|
||||
* This version is used during rollback from CLIENT_VERSION_UPGRADE_FROM_2X or CLIENT_VERSION_3X_WITH_ROLLBACK,
|
||||
* which can only be initiated using a KCL migration tool, when customer wants to revert to KCLv2.x functionality.
|
||||
* In this version, KCL will not emit WorkerMetricStats and run KCLv2.x algorithms for leader election
|
||||
* and lease assignment. In this version, KCL will monitor for roll-forward scenario where
|
||||
* client version is updated to CLIENT_VERSION_UPGRADE_FROM_2X using the migration tool.
|
||||
*/
|
||||
CLIENT_VERSION_2X,
|
||||
/**
|
||||
* When workers are operating in CLIENT_VERSION_UPGRADE_FROM_2X and when worker fleet is determined to be
|
||||
* KCLv3.x ready (when lease table GSI is active and worker-metrics are being emitted by all lease owners)
|
||||
* then the leader will initiate the switch to KCLv3.x algorithms for leader election and lease assignment,
|
||||
* by using this version and persisting it in the {@link MigrationState} that allows all worker hosts
|
||||
* to also flip to KCLv3.x functionality. In this KCL will also monitor for rollback to detect when the
|
||||
* customer updates version to CLIENT_VERSION_2X using migration tool, so that it instantly flips back
|
||||
* to CLIENT_VERSION_2X.
|
||||
*/
|
||||
CLIENT_VERSION_3X_WITH_ROLLBACK,
|
||||
/**
|
||||
* A new application starting KCLv3.x or an upgraded application from KCLv2.x after upgrade is successful
|
||||
* can use this version to default all KCLv3.x algorithms without any monitor to rollback.
|
||||
*/
|
||||
CLIENT_VERSION_3X;
|
||||
}
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||
|
||||
/**
|
||||
* Change monitor for MigrationState.clientVersion to notify a callback if the value
|
||||
* changes from a given value. This monitor will be run to monitor
|
||||
* rollback, roll-forward and also upgrade to 3.x scenarios. Look at {@link ClientVersion}
|
||||
* for more details.
|
||||
*
|
||||
* Since all KCL workers will be running the monitor, the monitor poll interval uses
|
||||
* a random jitter to stagger the reads to ddb.
|
||||
*
|
||||
* The class is thread-safe and will invoke callback on a separate thread.
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@ThreadSafe
|
||||
public class ClientVersionChangeMonitor implements Runnable {
|
||||
|
||||
/**
|
||||
* Interface of a callback to invoke when monitor condition is true.
|
||||
*/
|
||||
public interface ClientVersionChangeCallback {
|
||||
void accept(final MigrationState currentMigrationState) throws InvalidStateException, DependencyException;
|
||||
}
|
||||
|
||||
private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
|
||||
private static final double JITTER_FACTOR = 0.1;
|
||||
|
||||
private final MetricsFactory metricsFactory;
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ScheduledExecutorService stateMachineThreadPool;
|
||||
private final ClientVersionChangeCallback callback;
|
||||
private final ClientVersion expectedVersion;
|
||||
private final Random random;
|
||||
private long monitorIntervalMillis;
|
||||
|
||||
private ScheduledFuture<?> scheduledFuture;
|
||||
|
||||
public synchronized void startMonitor() {
|
||||
if (scheduledFuture == null) {
|
||||
final long jitter = (long) (random.nextDouble() * MONITOR_INTERVAL_MILLIS * JITTER_FACTOR);
|
||||
monitorIntervalMillis = MONITOR_INTERVAL_MILLIS + jitter;
|
||||
log.info(
|
||||
"Monitoring for MigrationState client version change from {} every {}ms",
|
||||
expectedVersion,
|
||||
monitorIntervalMillis);
|
||||
scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
|
||||
this, monitorIntervalMillis, monitorIntervalMillis, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringBuilder(getClass().getSimpleName())
|
||||
.append("[")
|
||||
.append(expectedVersion)
|
||||
.append("]")
|
||||
.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel the monitor explicity before the condition is met, e.g. when the worker is going down.
|
||||
* Note on synchronization: callback of this monitor is invoked while holding the lock on this monitor object.
|
||||
* If cancel is called from within the same lock context that callback uses, then it can lead to
|
||||
* deadlock. Ensure synchronization context between callback the caller of cancel is not shared.
|
||||
*/
|
||||
public synchronized void cancel() {
|
||||
if (scheduledFuture != null) {
|
||||
log.info("Cancelling {}", this);
|
||||
scheduledFuture.cancel(false);
|
||||
} else {
|
||||
log.info("Monitor {} is not running", this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void run() {
|
||||
try {
|
||||
if (scheduledFuture == null) {
|
||||
log.debug("Monitor has been cancelled, not running...");
|
||||
return;
|
||||
}
|
||||
|
||||
final MigrationState migrationState =
|
||||
(MigrationState) coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
|
||||
if (migrationState != null) {
|
||||
if (migrationState.getClientVersion() != expectedVersion) {
|
||||
log.info("MigrationState client version has changed {}, invoking monitor callback", migrationState);
|
||||
callback.accept(migrationState);
|
||||
log.info("Callback successful, monitoring cancelling itself.");
|
||||
// stop further monitoring
|
||||
scheduledFuture.cancel(false);
|
||||
scheduledFuture = null;
|
||||
} else {
|
||||
emitMetrics();
|
||||
log.debug("No change detected {}", this);
|
||||
}
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
log.warn(
|
||||
"Exception occurred when monitoring for client version change from {}, will retry in {}",
|
||||
expectedVersion,
|
||||
monitorIntervalMillis,
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
private void emitMetrics() {
|
||||
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
|
||||
try {
|
||||
switch (expectedVersion) {
|
||||
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||
scope.addData("CurrentState:3xWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
break;
|
||||
case CLIENT_VERSION_2X:
|
||||
case CLIENT_VERSION_UPGRADE_FROM_2X:
|
||||
scope.addData("CurrentState:2xCompatibleWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException(String.format("Unexpected version %s", expectedVersion.name()));
|
||||
}
|
||||
} finally {
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||
|
||||
/**
|
||||
* State for CLIENT_VERSION_2X. In this state, the only allowed valid transition is
|
||||
* the roll-forward scenario which can only be performed using the KCL Migration tool.
|
||||
* So when the state machine enters this state, a monitor is started to detect the
|
||||
* roll-forward scenario.
|
||||
*/
|
||||
@KinesisClientInternalApi
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
public class MigrationClientVersion2xState implements MigrationClientVersionState {
|
||||
private final MigrationStateMachine stateMachine;
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ScheduledExecutorService stateMachineThreadPool;
|
||||
private final DynamicMigrationComponentsInitializer initializer;
|
||||
private final Random random;
|
||||
|
||||
private ClientVersionChangeMonitor rollForwardMonitor;
|
||||
private boolean entered = false;
|
||||
private boolean left = false;
|
||||
|
||||
@Override
|
||||
public ClientVersion clientVersion() {
|
||||
return CLIENT_VERSION_2X;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void enter(final ClientVersion fromClientVersion) {
|
||||
if (!entered) {
|
||||
log.info("Entering {} from {}", this, fromClientVersion);
|
||||
initializer.initializeClientVersionFor2x(fromClientVersion);
|
||||
|
||||
log.info("Starting roll-forward monitor");
|
||||
rollForwardMonitor = new ClientVersionChangeMonitor(
|
||||
initializer.metricsFactory(),
|
||||
coordinatorStateDAO,
|
||||
stateMachineThreadPool,
|
||||
this::onClientVersionChange,
|
||||
clientVersion(),
|
||||
random);
|
||||
rollForwardMonitor.startMonitor();
|
||||
entered = true;
|
||||
} else {
|
||||
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void leave() {
|
||||
if (entered && !left) {
|
||||
log.info("Leaving {}", this);
|
||||
cancelRollForwardMonitor();
|
||||
left = false;
|
||||
} else {
|
||||
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback handler to handle client version changes in MigrationState in DDB.
|
||||
* @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_2X
|
||||
* @throws InvalidStateException during transition to the next state based on the new ClientVersion
|
||||
* or if the new state in DDB is unexpected.
|
||||
*/
|
||||
private synchronized void onClientVersionChange(@NonNull final MigrationState newState)
|
||||
throws InvalidStateException, DependencyException {
|
||||
if (!entered || left) {
|
||||
log.warn("Received client version change notification on inactive state {}", this);
|
||||
return;
|
||||
}
|
||||
final MetricsScope scope =
|
||||
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||
try {
|
||||
if (newState.getClientVersion() == CLIENT_VERSION_UPGRADE_FROM_2X) {
|
||||
log.info(
|
||||
"A roll-forward has been initiated for the application. Transition to {}",
|
||||
CLIENT_VERSION_UPGRADE_FROM_2X);
|
||||
// If this succeeds, the monitor will cancel itself.
|
||||
stateMachine.transitionTo(CLIENT_VERSION_UPGRADE_FROM_2X, newState);
|
||||
} else {
|
||||
// This should not happen, so throw an exception that allows the monitor to continue monitoring
|
||||
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
|
||||
// state transition is possible.
|
||||
// However, there could be a split brain here, new workers will use DDB value as source of truth,
|
||||
// so we could also write back CLIENT_VERSION_2X to DDB to ensure all workers have consistent
|
||||
// behavior.
|
||||
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
|
||||
// so keeping it simple and not writing back to DDB, the error log below would help capture
|
||||
// any strange behavior if this happens.
|
||||
log.error(
|
||||
"Migration state has invalid client version {}. Transition from {} is not supported",
|
||||
newState,
|
||||
CLIENT_VERSION_2X);
|
||||
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
|
||||
}
|
||||
} catch (final InvalidStateException | DependencyException e) {
|
||||
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
throw e;
|
||||
} finally {
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
|
||||
private void cancelRollForwardMonitor() {
|
||||
if (rollForwardMonitor != null) {
|
||||
final ClientVersionChangeMonitor localRollForwardMonitor = rollForwardMonitor;
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
log.info("Cancelling roll-forward monitor");
|
||||
localRollForwardMonitor.cancel();
|
||||
return null;
|
||||
});
|
||||
rollForwardMonitor = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
|
||||
/**
|
||||
* State for CLIENT_VERSION_3X which enables KCL to run 3.x algorithms on new KCLv3.x application
|
||||
* or successfully upgraded application which upgraded from v2.x. This is a terminal state of the
|
||||
* state machine and no rollbacks are supported in this state.
|
||||
*/
|
||||
@KinesisClientInternalApi
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
public class MigrationClientVersion3xState implements MigrationClientVersionState {
|
||||
private final MigrationStateMachine stateMachine;
|
||||
private final DynamicMigrationComponentsInitializer initializer;
|
||||
private boolean entered = false;
|
||||
private boolean left = false;
|
||||
|
||||
@Override
|
||||
public ClientVersion clientVersion() {
|
||||
return ClientVersion.CLIENT_VERSION_3X;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
|
||||
if (!entered) {
|
||||
log.info("Entering {} from {}", this, fromClientVersion);
|
||||
initializer.initializeClientVersionFor3x(fromClientVersion);
|
||||
entered = true;
|
||||
} else {
|
||||
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void leave() {
|
||||
if (entered && !left) {
|
||||
log.info("Leaving {}", this);
|
||||
entered = false;
|
||||
left = true;
|
||||
} else {
|
||||
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||
|
||||
/**
|
||||
* State for CLIENT_VERSION_3X_WITH_ROLLBACK which enables KCL to run its 3.x compliant algorithms
|
||||
* during the upgrade process after all KCL workers in the fleet are 3.x complaint. Since this
|
||||
* is an instant switch from CLIENT_VERSION_UPGRADE_FROM_2X, it also supports rollback if customers
|
||||
* see regression to allow for instant rollbacks as well. This would be achieved by customers
|
||||
* running a KCL migration tool to update MigrationState in DDB. So this state monitors for
|
||||
* rollback triggers and performs state transitions accordingly.
|
||||
*/
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
@RequiredArgsConstructor
|
||||
@ThreadSafe
|
||||
public class MigrationClientVersion3xWithRollbackState implements MigrationClientVersionState {
|
||||
|
||||
private final MigrationStateMachine stateMachine;
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ScheduledExecutorService stateMachineThreadPool;
|
||||
private final DynamicMigrationComponentsInitializer initializer;
|
||||
private final Random random;
|
||||
|
||||
private ClientVersionChangeMonitor rollbackMonitor;
|
||||
private boolean entered;
|
||||
private boolean left;
|
||||
|
||||
@Override
|
||||
public ClientVersion clientVersion() {
|
||||
return ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
|
||||
if (!entered) {
|
||||
log.info("Entering {} from {}", this, fromClientVersion);
|
||||
initializer.initializeClientVersionFor3xWithRollback(fromClientVersion);
|
||||
// we need to run the rollback monitor
|
||||
log.info("Starting rollback monitor");
|
||||
rollbackMonitor = new ClientVersionChangeMonitor(
|
||||
initializer.metricsFactory(),
|
||||
coordinatorStateDAO,
|
||||
stateMachineThreadPool,
|
||||
this::onClientVersionChange,
|
||||
clientVersion(),
|
||||
random);
|
||||
rollbackMonitor.startMonitor();
|
||||
entered = true;
|
||||
} else {
|
||||
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void leave() {
|
||||
if (entered && !left) {
|
||||
log.info("Leaving {}", this);
|
||||
cancelRollbackMonitor();
|
||||
entered = false;
|
||||
left = true;
|
||||
} else {
|
||||
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void onClientVersionChange(final MigrationState newState)
|
||||
throws InvalidStateException, DependencyException {
|
||||
if (!entered || left) {
|
||||
log.warn("Received client version change notification on inactive state {}", this);
|
||||
return;
|
||||
}
|
||||
final MetricsScope scope =
|
||||
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||
try {
|
||||
switch (newState.getClientVersion()) {
|
||||
case CLIENT_VERSION_2X:
|
||||
log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X);
|
||||
stateMachine.transitionTo(ClientVersion.CLIENT_VERSION_2X, newState);
|
||||
break;
|
||||
case CLIENT_VERSION_3X:
|
||||
log.info("Customer has switched to 3.x after successful upgrade, state machine will move to a"
|
||||
+ "terminal state and stop monitoring. Rollbacks will no longer be supported anymore");
|
||||
stateMachine.transitionTo(CLIENT_VERSION_3X, newState);
|
||||
// This worker will still be running the migrationAdaptive components in 3.x mode which will
|
||||
// no longer dynamically switch back to 2.x mode, however to directly run 3.x component without
|
||||
// adaption to migration (i.e. move to CLIENT_VERSION_3X state), it requires this worker to go
|
||||
// through the current deployment which initiated the switch to 3.x mode.
|
||||
break;
|
||||
default:
|
||||
// This should not happen, so throw an exception that allows the monitor to continue monitoring
|
||||
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
|
||||
// state transition is possible.
|
||||
// However, there could be a split brain here, new workers will use DDB value as source of truth,
|
||||
// so we could also write back CLIENT_VERSION_3X_WITH_ROLLBACK to DDB to ensure all workers have
|
||||
// consistent behavior.
|
||||
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
|
||||
// so keeping it simple and not writing back to DDB, the error log below would help capture
|
||||
// any strange behavior if this happens.
|
||||
log.error("Migration state has invalid client version {}", newState);
|
||||
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
|
||||
}
|
||||
} catch (final InvalidStateException | DependencyException e) {
|
||||
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
throw e;
|
||||
} finally {
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
|
||||
private void cancelRollbackMonitor() {
|
||||
if (rollbackMonitor != null) {
|
||||
final ClientVersionChangeMonitor localRollbackMonitor = rollbackMonitor;
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
log.info("Cancelling rollback monitor");
|
||||
localRollbackMonitor.cancel();
|
||||
return null;
|
||||
});
|
||||
rollbackMonitor = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
|
||||
/**
|
||||
* Interface of a state implementation for the MigrationStateMachine
|
||||
*/
|
||||
public interface MigrationClientVersionState {
|
||||
|
||||
/**
|
||||
* The associated clientVersion this state corresponds to
|
||||
* @return ClientVersion that this state implements the logic for.
|
||||
*/
|
||||
ClientVersion clientVersion();
|
||||
|
||||
/**
|
||||
* Enter the state and perform the business logic of being in this state
|
||||
* which includes performing any monitoring that allows the next state
|
||||
* transition and also initializing the KCL based on the ClientVersion.
|
||||
* @param fromClientVersion from previous state if any specific action must
|
||||
* be taken based on the state from which this state
|
||||
* is being entered from.
|
||||
* @throws DependencyException if DDB fails in unexpected ways for those states
|
||||
* that create the GSI
|
||||
*/
|
||||
void enter(ClientVersion fromClientVersion) throws DependencyException;
|
||||
|
||||
/**
|
||||
* Invoked after the transition to another state has occurred
|
||||
* to allow printing any helpful logs or performing cleanup.
|
||||
*/
|
||||
void leave();
|
||||
}
|
||||
|
|
@ -0,0 +1,263 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorState;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X;
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
|
||||
|
||||
/**
|
||||
* Initializer to determine start state of the state machine which identifies the
|
||||
* state to initialize KCL when it is starting up. The initial state is determined based on the
|
||||
* customer configured {@link ClientVersionConfig} and the current {@link MigrationState} in DDB,
|
||||
* as follows
|
||||
* ClientVersionConfig | MigrationState (DDB) | initial client version
|
||||
* --------------------+---------------------------------+--------------------------------
|
||||
* COMPATIBLE_WITH_2X | Does not exist | CLIENT_VERSION_UPGRADE_FROM_2X
|
||||
* 3X | Does not exist | CLIENT_VERSION_3X
|
||||
* COMPATIBLE_WITH_2X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X_WITH_ROLLBACK
|
||||
* 3X | CLIENT_VERSION_3X_WITH_ROLLBACK | CLIENT_VERSION_3X
|
||||
* any | CLIENT_VERSION_2X | CLIENT_VERSION_2X
|
||||
* any | CLIENT_VERSION_UPGRADE_FROM_2X | CLIENT_VERSION_UPGRADE_FROM_2X
|
||||
* any | CLIENT_VERSION_3X | CLIENT_VERSION_3X
|
||||
*/
|
||||
@KinesisClientInternalApi
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
public class MigrationClientVersionStateInitializer {
|
||||
private static final int MAX_INITIALIZATION_RETRY = 10;
|
||||
private static final long INITIALIZATION_RETRY_DELAY_MILLIS = 1000L;
|
||||
/**
|
||||
* A jitter factor of 10% to stagger the retries.
|
||||
*/
|
||||
private static final double JITTER_FACTOR = 0.1;
|
||||
|
||||
private final Callable<Long> timeProvider;
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ClientVersionConfig clientVersionConfig;
|
||||
private final Random random;
|
||||
private final String workerIdentifier;
|
||||
|
||||
public SimpleEntry<ClientVersion, MigrationState> getInitialState() throws DependencyException {
|
||||
log.info("Initializing migration state machine starting state, configured version {}", clientVersionConfig);
|
||||
|
||||
try {
|
||||
MigrationState migrationState = getMigrationStateFromDynamo();
|
||||
int retryCount = 0;
|
||||
while (retryCount++ < MAX_INITIALIZATION_RETRY) {
|
||||
final ClientVersion initialClientVersion = getClientVersionForInitialization(migrationState);
|
||||
if (migrationState.getClientVersion() != initialClientVersion) {
|
||||
// If update fails, the value represents current state in dynamo
|
||||
migrationState = updateMigrationStateInDynamo(migrationState, initialClientVersion);
|
||||
if (migrationState.getClientVersion() == initialClientVersion) {
|
||||
// update succeeded. Transition to the state
|
||||
return new SimpleEntry<>(initialClientVersion, migrationState);
|
||||
}
|
||||
final long delay = getInitializationRetryDelay();
|
||||
log.warn(
|
||||
"Failed to update migration state with {}, retry after delay {}",
|
||||
initialClientVersion,
|
||||
delay);
|
||||
safeSleep(delay);
|
||||
} else {
|
||||
return new SimpleEntry<>(initialClientVersion, migrationState);
|
||||
}
|
||||
}
|
||||
} catch (final InvalidStateException e) {
|
||||
log.error("Unable to initialize state machine", e);
|
||||
}
|
||||
throw new DependencyException(
|
||||
new RuntimeException("Unable to determine initial state for migration state machine"));
|
||||
}
|
||||
|
||||
public ClientVersion getClientVersionForInitialization(final MigrationState migrationState) {
|
||||
final ClientVersion nextClientVersion;
|
||||
switch (migrationState.getClientVersion()) {
|
||||
case CLIENT_VERSION_INIT:
|
||||
// There is no state in DDB, set state to config version and transition to configured version.
|
||||
nextClientVersion = getNextClientVersionBasedOnConfigVersion();
|
||||
log.info("Application is starting in {}", nextClientVersion);
|
||||
break;
|
||||
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||
if (clientVersionConfig == ClientVersionConfig.CLIENT_VERSION_CONFIG_3X) {
|
||||
// upgrade successful, allow transition to 3x.
|
||||
log.info("Application has successfully upgraded, transitioning to {}", CLIENT_VERSION_3X);
|
||||
nextClientVersion = CLIENT_VERSION_3X;
|
||||
break;
|
||||
}
|
||||
log.info("Initialize with {}", CLIENT_VERSION_3X_WITH_ROLLBACK);
|
||||
nextClientVersion = migrationState.getClientVersion();
|
||||
break;
|
||||
case CLIENT_VERSION_2X:
|
||||
log.info("Application has rolled-back, initialize with {}", CLIENT_VERSION_2X);
|
||||
nextClientVersion = migrationState.getClientVersion();
|
||||
break;
|
||||
case CLIENT_VERSION_UPGRADE_FROM_2X:
|
||||
log.info("Application is upgrading, initialize with {}", CLIENT_VERSION_UPGRADE_FROM_2X);
|
||||
nextClientVersion = migrationState.getClientVersion();
|
||||
break;
|
||||
case CLIENT_VERSION_3X:
|
||||
log.info("Initialize with {}", CLIENT_VERSION_3X);
|
||||
nextClientVersion = migrationState.getClientVersion();
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException(String.format("Unknown version in DDB %s", migrationState));
|
||||
}
|
||||
return nextClientVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the migration state's client version in dynamo conditional on the current client version
|
||||
* in dynamo. So that if another worker updates the value first, the update fails. If the update fails,
|
||||
* the method will read the latest value and return so that initialization can be retried.
|
||||
* If the value does not exist in dynamo, it will creat it.
|
||||
*/
|
||||
private MigrationState updateMigrationStateInDynamo(
|
||||
final MigrationState migrationState, final ClientVersion nextClientVersion) throws InvalidStateException {
|
||||
try {
|
||||
if (migrationState.getClientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
|
||||
migrationState.update(nextClientVersion, workerIdentifier);
|
||||
log.info("Creating {}", migrationState);
|
||||
final boolean created = coordinatorStateDAO.createCoordinatorStateIfNotExists(migrationState);
|
||||
if (!created) {
|
||||
log.debug("Create {} did not succeed", migrationState);
|
||||
return getMigrationStateFromDynamo();
|
||||
}
|
||||
} else {
|
||||
log.info("Updating {} with {}", migrationState, nextClientVersion);
|
||||
final Map<String, ExpectedAttributeValue> expectations =
|
||||
migrationState.getDynamoClientVersionExpectation();
|
||||
migrationState.update(nextClientVersion, workerIdentifier);
|
||||
final boolean updated =
|
||||
coordinatorStateDAO.updateCoordinatorStateWithExpectation(migrationState, expectations);
|
||||
if (!updated) {
|
||||
log.debug("Update {} did not succeed", migrationState);
|
||||
return getMigrationStateFromDynamo();
|
||||
}
|
||||
}
|
||||
return migrationState;
|
||||
} catch (final ProvisionedThroughputException | DependencyException e) {
|
||||
log.debug(
|
||||
"Failed to update migration state {} with {}, return previous value to trigger a retry",
|
||||
migrationState,
|
||||
nextClientVersion,
|
||||
e);
|
||||
return migrationState;
|
||||
}
|
||||
}
|
||||
|
||||
private ClientVersion getNextClientVersionBasedOnConfigVersion() {
|
||||
switch (clientVersionConfig) {
|
||||
case CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2X:
|
||||
return CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||
case CLIENT_VERSION_CONFIG_3X:
|
||||
return CLIENT_VERSION_3X;
|
||||
}
|
||||
throw new IllegalStateException(String.format("Unknown configured Client version %s", clientVersionConfig));
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the current {@link MigrationState} from DDB with retries.
|
||||
* @return current Migration state from DDB, if none exists, an initial Migration State with CLIENT_VERSION_INIT
|
||||
* will be returned
|
||||
* @throws InvalidStateException, this occurs when dynamo table does not exist in which retrying is not useful.
|
||||
*/
|
||||
private MigrationState getMigrationStateFromDynamo() throws InvalidStateException {
|
||||
return executeCallableWithRetryAndJitter(
|
||||
() -> {
|
||||
final CoordinatorState state = coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
|
||||
if (state == null) {
|
||||
log.info("No Migration state available in DDB");
|
||||
return new MigrationState(MIGRATION_HASH_KEY, workerIdentifier);
|
||||
}
|
||||
if (state instanceof MigrationState) {
|
||||
log.info("Current migration state in DDB {}", state);
|
||||
return (MigrationState) state;
|
||||
}
|
||||
throw new InvalidStateException(
|
||||
String.format("Unexpected state found not confirming to MigrationState schema %s", state));
|
||||
},
|
||||
"get MigrationState from DDB");
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to retry a given callable upto MAX_INITIALIZATION_RETRY times for all retryable exceptions.
|
||||
* It considers InvalidStateException as non-retryable exception. During retry, it will compute a delay
|
||||
* with jitter before retrying.
|
||||
* @param callable callable to invoke either until it succeeds or max retry attempts exceed.
|
||||
* @param description a meaningful description to log exceptions
|
||||
* @return the value returned by the callable
|
||||
* @param <T> Return type of the callable
|
||||
* @throws InvalidStateException If the callable throws InvalidStateException, it will not be retried and will
|
||||
* be thrown back.
|
||||
*/
|
||||
private <T> T executeCallableWithRetryAndJitter(final Callable<T> callable, final String description)
|
||||
throws InvalidStateException {
|
||||
int retryCount = 0;
|
||||
while (retryCount++ < MAX_INITIALIZATION_RETRY) {
|
||||
try {
|
||||
return callable.call();
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InvalidStateException) {
|
||||
// throw the non-retryable exception
|
||||
throw (InvalidStateException) e;
|
||||
}
|
||||
final long delay = getInitializationRetryDelay();
|
||||
log.warn("Failed to {}, retry after delay {}", description, delay, e);
|
||||
|
||||
safeSleep(delay);
|
||||
}
|
||||
}
|
||||
throw new RuntimeException(
|
||||
String.format("Failed to %s after %d retries, giving up", description, MAX_INITIALIZATION_RETRY));
|
||||
}
|
||||
|
||||
private void safeSleep(final long delay) {
|
||||
try {
|
||||
Thread.sleep(delay);
|
||||
} catch (final InterruptedException ie) {
|
||||
log.debug("Interrupted sleep during state machine initialization retry");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a delay with jitter that is factor of the interval.
|
||||
* @return delay with jitter
|
||||
*/
|
||||
private long getInitializationRetryDelay() {
|
||||
final long jitter = (long) (random.nextDouble() * JITTER_FACTOR * INITIALIZATION_RETRY_DELAY_MILLIS);
|
||||
return INITIALIZATION_RETRY_DELAY_MILLIS + jitter;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2X;
|
||||
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3X_WITH_ROLLBACK;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||
|
||||
/**
|
||||
* State for CLIENT_VERSION_UPGRADE_FROM_2X. When state machine enters this state,
|
||||
* KCL is initialized to operate in dual mode for Lease assignment and Leader decider algorithms
|
||||
* which initially start in 2.x compatible mode and when all the KCL workers are 3.x compliant,
|
||||
* it dynamically switches to the 3.x algorithms. It also monitors for rollback
|
||||
* initiated from customer via the KCL migration tool and instantly switches back to the 2.x
|
||||
* complaint algorithms.
|
||||
* The allowed state transitions are to CLIENT_VERSION_3X_WITH_ROLLBACK when KCL workers are
|
||||
* 3.x complaint, and to CLIENT_VERSION_2X when customer has initiated a rollback.
|
||||
* Only the leader KCL worker performs migration ready monitor and notifies all workers (including
|
||||
* itself) via a MigrationState update. When all worker's monitor notice the MigrationState change
|
||||
* (including itself), it will transition to CLIENT_VERSION_3X_WITH_ROLLBACK.
|
||||
*/
|
||||
@KinesisClientInternalApi
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
public class MigrationClientVersionUpgradeFrom2xState implements MigrationClientVersionState {
|
||||
private final MigrationStateMachine stateMachine;
|
||||
private final Callable<Long> timeProvider;
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ScheduledExecutorService stateMachineThreadPool;
|
||||
private final DynamicMigrationComponentsInitializer initializer;
|
||||
private final Random random;
|
||||
private final MigrationState currentMigrationState;
|
||||
private final long flipTo3XStabilizerTimeInSeconds;
|
||||
|
||||
private MigrationReadyMonitor migrationMonitor;
|
||||
private ClientVersionChangeMonitor clientVersionChangeMonitor;
|
||||
private boolean entered = false;
|
||||
private boolean left = false;
|
||||
|
||||
@Override
|
||||
public ClientVersion clientVersion() {
|
||||
return ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2X;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
|
||||
if (!entered) {
|
||||
log.info("Entering state {} from {}", this, fromClientVersion);
|
||||
initializer.initializeClientVersionForUpgradeFrom2x(fromClientVersion);
|
||||
|
||||
log.info("Starting migration ready monitor to monitor 3.x compliance of the KCL workers");
|
||||
migrationMonitor = new MigrationReadyMonitor(
|
||||
initializer.metricsFactory(),
|
||||
timeProvider,
|
||||
initializer.leaderDecider(),
|
||||
initializer.workerIdentifier(),
|
||||
initializer.workerMetricsDAO(),
|
||||
initializer.workerMetricsExpirySeconds(),
|
||||
initializer.leaseRefresher(),
|
||||
stateMachineThreadPool,
|
||||
this::onMigrationReady,
|
||||
flipTo3XStabilizerTimeInSeconds);
|
||||
migrationMonitor.startMonitor();
|
||||
|
||||
log.info("Starting monitor for rollback and flip to 3.x");
|
||||
clientVersionChangeMonitor = new ClientVersionChangeMonitor(
|
||||
initializer.metricsFactory(),
|
||||
coordinatorStateDAO,
|
||||
stateMachineThreadPool,
|
||||
this::onClientVersionChange,
|
||||
clientVersion(),
|
||||
random);
|
||||
clientVersionChangeMonitor.startMonitor();
|
||||
entered = true;
|
||||
} else {
|
||||
log.info("Not entering {}", left ? "already exited state" : "already entered state");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void leave() {
|
||||
if (entered && !left) {
|
||||
log.info("Leaving {}", this);
|
||||
cancelMigrationReadyMonitor();
|
||||
cancelClientChangeVersionMonitor();
|
||||
entered = false;
|
||||
} else {
|
||||
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
private synchronized void onMigrationReady() {
|
||||
// this is invoked on the leader worker only
|
||||
if (!entered || left || migrationMonitor == null) {
|
||||
log.info("Ignoring migration ready monitor, state already transitioned");
|
||||
return;
|
||||
}
|
||||
// update dynamo with the state to toggle to 3.x
|
||||
// and let the clientVersionChange kick in to do state transition
|
||||
// this way both leader and non-leader worker all transition when
|
||||
// it discovers the update from ddb.
|
||||
if (updateDynamoStateForTransition()) {
|
||||
// successfully toggled the state, now we can cancel the monitor
|
||||
cancelMigrationReadyMonitor();
|
||||
}
|
||||
// else - either migration ready monitor will retry or
|
||||
// client Version change callback will initiate the next state transition.
|
||||
}
|
||||
|
||||
private void cancelMigrationReadyMonitor() {
|
||||
if (migrationMonitor != null) {
|
||||
final MigrationReadyMonitor localMigrationMonitor = migrationMonitor;
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
log.info("Cancelling migration ready monitor");
|
||||
localMigrationMonitor.cancel();
|
||||
return null;
|
||||
});
|
||||
migrationMonitor = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void cancelClientChangeVersionMonitor() {
|
||||
if (clientVersionChangeMonitor != null) {
|
||||
final ClientVersionChangeMonitor localClientVersionChangeMonitor = clientVersionChangeMonitor;
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
log.info("Cancelling client change version monitor");
|
||||
localClientVersionChangeMonitor.cancel();
|
||||
return null;
|
||||
});
|
||||
clientVersionChangeMonitor = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback handler to handle client version changes in MigrationState in DDB.
|
||||
* @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_UPGRADE_FROM_2X
|
||||
* @throws InvalidStateException during transition to the next state based on the new ClientVersion
|
||||
* or if the new state in DDB is unexpected.
|
||||
*/
|
||||
private synchronized void onClientVersionChange(final MigrationState newState)
|
||||
throws InvalidStateException, DependencyException {
|
||||
if (!entered || left) {
|
||||
log.warn("Received client version change notification on inactive state {}", this);
|
||||
return;
|
||||
}
|
||||
final MetricsScope scope =
|
||||
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||
try {
|
||||
switch (newState.getClientVersion()) {
|
||||
case CLIENT_VERSION_2X:
|
||||
log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2X);
|
||||
// cancel monitor asynchronously
|
||||
cancelMigrationReadyMonitor();
|
||||
stateMachine.transitionTo(CLIENT_VERSION_2X, newState);
|
||||
break;
|
||||
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||
log.info("KCL workers are v3.x compliant, transition to {}", CLIENT_VERSION_3X_WITH_ROLLBACK);
|
||||
cancelMigrationReadyMonitor();
|
||||
stateMachine.transitionTo(CLIENT_VERSION_3X_WITH_ROLLBACK, newState);
|
||||
break;
|
||||
default:
|
||||
// This should not happen, so throw an exception that allows the monitor to continue monitoring
|
||||
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
|
||||
// state transition is possible.
|
||||
// However, there could be a split brain here, new workers will use DDB value as source of truth,
|
||||
// so we could also write back CLIENT_VERSION_UPGRADE_FROM_2X to DDB to ensure all workers have
|
||||
// consistent behavior.
|
||||
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
|
||||
// so keeping it simple and not writing back to DDB, the error log below would help capture
|
||||
// any strange behavior if this happens.
|
||||
log.error("Migration state has invalid client version {}", newState);
|
||||
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
|
||||
}
|
||||
} catch (final DependencyException | InvalidStateException e) {
|
||||
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
throw e;
|
||||
} finally {
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean updateDynamoStateForTransition() {
|
||||
final MetricsScope scope =
|
||||
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
|
||||
try {
|
||||
final MigrationState newMigrationState = currentMigrationState
|
||||
.copy()
|
||||
.update(CLIENT_VERSION_3X_WITH_ROLLBACK, initializer.workerIdentifier());
|
||||
log.info("Updating Migration State in DDB with {} prev state {}", newMigrationState, currentMigrationState);
|
||||
return coordinatorStateDAO.updateCoordinatorStateWithExpectation(
|
||||
newMigrationState, currentMigrationState.getDynamoClientVersionExpectation());
|
||||
} catch (final Exception e) {
|
||||
log.warn(
|
||||
"Exception occurred when toggling to {}, upgradeReadyMonitor will retry the update"
|
||||
+ " if upgrade condition is still true",
|
||||
CLIENT_VERSION_3X_WITH_ROLLBACK,
|
||||
e);
|
||||
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
return false;
|
||||
} finally {
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,352 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
|
||||
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
|
||||
|
||||
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
|
||||
|
||||
/**
|
||||
* Monitor for KCL workers 3.x readiness. This monitor is started on all workers but only
|
||||
* executed on the leader of the fleet. The leader determines 3.x readiness if GSI of the lease
|
||||
* table is active and all lease owners are emitting WorkerMetricStats. The monitor performs this
|
||||
* check periodically and will invoke callback if the readiness conditions are true. Monitor
|
||||
* needs to be explicitly cancelled after the readiness trigger has successfully been handled.
|
||||
*
|
||||
* Thread safety - Guard for safety against public method invocation and internal runnable method.
|
||||
*/
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
public class MigrationReadyMonitor implements Runnable {
|
||||
private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
|
||||
private static final long LOG_INTERVAL_NANOS = Duration.ofMinutes(5).toNanos();
|
||||
|
||||
/**
|
||||
* Default retry attempt for loading leases and workers before giving up.
|
||||
*/
|
||||
private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
|
||||
|
||||
private final MetricsFactory metricsFactory;
|
||||
private final Callable<Long> timeProvider;
|
||||
private final LeaderDecider leaderDecider;
|
||||
private final String currentWorkerId;
|
||||
private final WorkerMetricStatsDAO workerMetricStatsDAO;
|
||||
private final long workerMetricStatsExpirySeconds;
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
private final ScheduledExecutorService stateMachineThreadPool;
|
||||
private final MonitorTriggerStabilizer triggerStabilizer;
|
||||
|
||||
private final LogRateLimiter rateLimitedStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
|
||||
private ScheduledFuture<?> scheduledFuture;
|
||||
private boolean gsiStatusReady;
|
||||
private boolean workerMetricsReady;
|
||||
private Set<String> lastKnownUniqueLeaseOwners = new HashSet<>();
|
||||
private Set<String> lastKnownWorkersWithActiveWorkerMetrics = new HashSet<>();
|
||||
|
||||
public MigrationReadyMonitor(
|
||||
final MetricsFactory metricsFactory,
|
||||
final Callable<Long> timeProvider,
|
||||
final LeaderDecider leaderDecider,
|
||||
final String currentWorkerId,
|
||||
final WorkerMetricStatsDAO workerMetricStatsDAO,
|
||||
final long workerMetricsExpirySeconds,
|
||||
final LeaseRefresher leaseRefresher,
|
||||
final ScheduledExecutorService stateMachineThreadPool,
|
||||
final Runnable callback,
|
||||
final long callbackStabilizationInSeconds) {
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.timeProvider = timeProvider;
|
||||
this.leaderDecider = leaderDecider;
|
||||
this.currentWorkerId = currentWorkerId;
|
||||
this.workerMetricStatsDAO = workerMetricStatsDAO;
|
||||
this.workerMetricStatsExpirySeconds = workerMetricsExpirySeconds;
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.stateMachineThreadPool = stateMachineThreadPool;
|
||||
this.triggerStabilizer =
|
||||
new MonitorTriggerStabilizer(timeProvider, callbackStabilizationInSeconds, callback, currentWorkerId);
|
||||
}
|
||||
|
||||
public synchronized void startMonitor() {
|
||||
if (Objects.isNull(scheduledFuture)) {
|
||||
|
||||
log.info("Starting migration ready monitor");
|
||||
scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
|
||||
this, MONITOR_INTERVAL_MILLIS, MONITOR_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
|
||||
} else {
|
||||
log.info("Ignoring monitor request, since it is already started");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel the monitor. Once the method returns callback will not be invoked,
|
||||
* but callback can be invoked reentrantly before this method returns.
|
||||
*/
|
||||
public synchronized void cancel() {
|
||||
if (Objects.nonNull(scheduledFuture)) {
|
||||
log.info("Cancelled migration ready monitor");
|
||||
scheduledFuture.cancel(true);
|
||||
scheduledFuture = null;
|
||||
} else {
|
||||
log.info("{} is currently not active", this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void run() {
|
||||
try {
|
||||
if (Thread.currentThread().isInterrupted()) {
|
||||
log.info("{} cancelled, exiting...", this);
|
||||
return;
|
||||
}
|
||||
if (!leaderDecider.isLeader(currentWorkerId)) {
|
||||
log.debug("Not the leader, not performing migration ready check {}", this);
|
||||
triggerStabilizer.reset();
|
||||
lastKnownUniqueLeaseOwners.clear();
|
||||
lastKnownWorkersWithActiveWorkerMetrics.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
triggerStabilizer.call(isReadyForUpgradeTo3x());
|
||||
rateLimitedStatusLogger.log(() -> log.info("Monitor ran successfully {}", this));
|
||||
} catch (final Throwable t) {
|
||||
log.warn("{} failed, will retry after {}", this, MONITOR_INTERVAL_MILLIS, t);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringBuilder("UpgradeReadyMonitor[")
|
||||
.append("G=")
|
||||
.append(gsiStatusReady)
|
||||
.append(",W=")
|
||||
.append(workerMetricsReady)
|
||||
.append("]")
|
||||
.toString();
|
||||
}
|
||||
|
||||
private boolean isReadyForUpgradeTo3x() throws DependencyException {
|
||||
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
|
||||
try {
|
||||
// If GSI is not ready, optimize to not check if worker metrics are being emitted
|
||||
final boolean localGsiReadyStatus = leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive();
|
||||
if (localGsiReadyStatus != gsiStatusReady) {
|
||||
gsiStatusReady = localGsiReadyStatus;
|
||||
log.info("Gsi ready status changed to {}", gsiStatusReady);
|
||||
} else {
|
||||
log.debug("GsiReady status {}", gsiStatusReady);
|
||||
}
|
||||
return gsiStatusReady && areLeaseOwnersEmittingWorkerMetrics();
|
||||
} finally {
|
||||
scope.addData("GsiReadyStatus", gsiStatusReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
scope.addData(
|
||||
"WorkerMetricsReadyStatus", workerMetricsReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean areLeaseOwnersEmittingWorkerMetrics() {
|
||||
final CompletableFuture<List<Lease>> leaseListFuture = loadLeaseListAsync();
|
||||
final CompletableFuture<List<WorkerMetricStats>> workerMetricsFuture = loadWorkerMetricStats();
|
||||
|
||||
final List<Lease> leaseList = leaseListFuture.join();
|
||||
final Set<String> leaseOwners = getUniqueLeaseOwnersFromLeaseTable(leaseList);
|
||||
final List<WorkerMetricStats> workerMetricStatsList = workerMetricsFuture.join();
|
||||
final Set<String> workersWithActiveWorkerMetrics = getWorkersWithActiveWorkerMetricStats(workerMetricStatsList);
|
||||
|
||||
// Leases are not checked for expired condition because:
|
||||
// If some worker has gone down and is not active, but has lease assigned to it, those leases
|
||||
// maybe expired. Since the worker is down, it may not have worker-metrics, or worker-metrics may not be active,
|
||||
// In that case, the migration condition is not considered to be met.
|
||||
// However, those leases should be assigned to another worker and so the check in the next
|
||||
// iteration could succeed. This is intentional to make sure all leases owners are accounted for
|
||||
// and the old owner does not come back up without worker metrics and reacquires the lease.
|
||||
final boolean localWorkerMetricsReady = leaseOwners.equals(workersWithActiveWorkerMetrics);
|
||||
if (localWorkerMetricsReady != workerMetricsReady) {
|
||||
workerMetricsReady = localWorkerMetricsReady;
|
||||
log.info("WorkerMetricStats status changed to {}", workerMetricsReady);
|
||||
log.info("Lease List {}", leaseList);
|
||||
log.info("WorkerMetricStats {}", workerMetricStatsList);
|
||||
} else {
|
||||
log.debug("WorkerMetricStats ready status {}", workerMetricsReady);
|
||||
}
|
||||
|
||||
if (lastKnownUniqueLeaseOwners == null) {
|
||||
log.info("Unique lease owners {}", leaseOwners);
|
||||
} else if (!lastKnownUniqueLeaseOwners.equals(leaseOwners)) {
|
||||
log.info("Unique lease owners changed to {}", leaseOwners);
|
||||
}
|
||||
lastKnownUniqueLeaseOwners = leaseOwners;
|
||||
|
||||
if (lastKnownWorkersWithActiveWorkerMetrics == null) {
|
||||
log.info("Workers with active worker metric stats {}", workersWithActiveWorkerMetrics);
|
||||
} else if (!lastKnownWorkersWithActiveWorkerMetrics.equals(workersWithActiveWorkerMetrics)) {
|
||||
log.info("Workers with active worker metric stats changed {}", workersWithActiveWorkerMetrics);
|
||||
}
|
||||
lastKnownWorkersWithActiveWorkerMetrics = workersWithActiveWorkerMetrics;
|
||||
|
||||
return workerMetricsReady;
|
||||
}
|
||||
|
||||
private Set<String> getUniqueLeaseOwnersFromLeaseTable(final List<Lease> leaseList) {
|
||||
return leaseList.stream().map(Lease::leaseOwner).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private Set<String> getWorkersWithActiveWorkerMetricStats(final List<WorkerMetricStats> workerMetricStats) {
|
||||
final long nowInSeconds = Duration.ofMillis(now(timeProvider)).getSeconds();
|
||||
return workerMetricStats.stream()
|
||||
.filter(metricStats -> isWorkerMetricStatsActive(metricStats, nowInSeconds))
|
||||
.map(WorkerMetricStats::getWorkerId)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private boolean isWorkerMetricStatsActive(final WorkerMetricStats metricStats, final long nowInSeconds) {
|
||||
return (metricStats.getLastUpdateTime() + workerMetricStatsExpirySeconds) > nowInSeconds;
|
||||
}
|
||||
|
||||
private CompletableFuture<List<WorkerMetricStats>> loadWorkerMetricStats() {
|
||||
return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricStatsDAO::getAllWorkerMetricStats));
|
||||
}
|
||||
|
||||
private CompletableFuture<List<Lease>> loadLeaseListAsync() {
|
||||
return CompletableFuture.supplyAsync(() -> loadWithRetry(leaseRefresher::listLeases));
|
||||
}
|
||||
|
||||
private <T> T loadWithRetry(final Callable<T> loadFunction) {
|
||||
int retryAttempt = 0;
|
||||
while (true) {
|
||||
try {
|
||||
return loadFunction.call();
|
||||
} catch (final Exception e) {
|
||||
if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
|
||||
log.warn(
|
||||
"Failed to load : {}, retrying",
|
||||
loadFunction.getClass().getName(),
|
||||
e);
|
||||
retryAttempt++;
|
||||
} else {
|
||||
throw new CompletionException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static long now(final Callable<Long> timeProvider) {
|
||||
try {
|
||||
return timeProvider.call();
|
||||
} catch (final Exception e) {
|
||||
log.debug("Time provider threw exception, using System.currentTimeMillis", e);
|
||||
return System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stabilize the monitor trigger before invoking the callback
|
||||
* to ensure we are consistently seeing the trigger for a configured
|
||||
* stabilizationDurationInMillis
|
||||
*/
|
||||
private static class MonitorTriggerStabilizer {
|
||||
private final Callable<Long> timeProvider;
|
||||
private final long stabilizationDurationInSeconds;
|
||||
private final Runnable callback;
|
||||
private final String currentWorkerId;
|
||||
private final LogRateLimiter rateLimitedTriggerStatusLogger;
|
||||
|
||||
private long lastToggleTimeInMillis;
|
||||
private boolean currentTriggerStatus;
|
||||
|
||||
public MonitorTriggerStabilizer(
|
||||
final Callable<Long> timeProvider,
|
||||
final long stabilizationDurationInSeconds,
|
||||
final Runnable callback,
|
||||
final String currentWorkerId) {
|
||||
this.timeProvider = timeProvider;
|
||||
this.stabilizationDurationInSeconds = stabilizationDurationInSeconds;
|
||||
this.callback = callback;
|
||||
this.currentWorkerId = currentWorkerId;
|
||||
this.rateLimitedTriggerStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
|
||||
}
|
||||
|
||||
public void call(final boolean isMonitorTriggered) {
|
||||
final long now = now(timeProvider);
|
||||
if (currentTriggerStatus != isMonitorTriggered) {
|
||||
log.info("Trigger status has changed to {}", isMonitorTriggered);
|
||||
currentTriggerStatus = isMonitorTriggered;
|
||||
lastToggleTimeInMillis = now;
|
||||
}
|
||||
|
||||
if (currentTriggerStatus) {
|
||||
final long deltaSeconds =
|
||||
Duration.ofMillis(now - lastToggleTimeInMillis).getSeconds();
|
||||
if (deltaSeconds >= stabilizationDurationInSeconds) {
|
||||
log.info("Trigger has been consistently true for {}s, invoking callback", deltaSeconds);
|
||||
callback.run();
|
||||
} else {
|
||||
rateLimitedTriggerStatusLogger.log(() -> log.info(
|
||||
"Trigger has been true for {}s, waiting for stabilization time of {}s",
|
||||
deltaSeconds,
|
||||
stabilizationDurationInSeconds));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
if (currentTriggerStatus) {
|
||||
log.info("This worker {} is no longer the leader, reset current status", currentWorkerId);
|
||||
}
|
||||
currentTriggerStatus = false;
|
||||
}
|
||||
}
|
||||
|
||||
@RequiredArgsConstructor
|
||||
private static class LogRateLimiter {
|
||||
private final long logIntervalInNanos;
|
||||
|
||||
private long nextLogTime = System.nanoTime();
|
||||
|
||||
public void log(final Runnable logger) {
|
||||
final long now = System.nanoTime();
|
||||
if (now >= nextLogTime) {
|
||||
logger.run();
|
||||
nextLogTime = now + logIntervalInNanos;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,231 @@
|
|||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.ToString;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
|
||||
import software.amazon.kinesis.common.StackTraceUtils;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorState;
|
||||
|
||||
/**
|
||||
* Data model of the Migration state. This is used to track the state related to migration
|
||||
* from KCLv2.x to KCLv3.x.
|
||||
*/
|
||||
@Getter
|
||||
@ToString(callSuper = true)
|
||||
@Slf4j
|
||||
public class MigrationState extends CoordinatorState {
|
||||
/**
|
||||
* Key value for the item in the CoordinatorState table
|
||||
*/
|
||||
public static final String MIGRATION_HASH_KEY = "Migration3.0";
|
||||
/**
|
||||
* Attribute name in migration state item, whose value is used during
|
||||
* the KCL v3.x migration process to know whether the workers need to
|
||||
* perform KCL v2.x compatible operations or can perform native KCL v3.x
|
||||
* operations.
|
||||
*/
|
||||
public static final String CLIENT_VERSION_ATTRIBUTE_NAME = "cv";
|
||||
|
||||
public static final String MODIFIED_BY_ATTRIBUTE_NAME = "mb";
|
||||
public static final String MODIFIED_TIMESTAMP_ATTRIBUTE_NAME = "mts";
|
||||
public static final String HISTORY_ATTRIBUTE_NAME = "h";
|
||||
private static final int MAX_HISTORY_ENTRIES = 10;
|
||||
|
||||
private ClientVersion clientVersion;
|
||||
private String modifiedBy;
|
||||
private long modifiedTimestamp;
|
||||
private final List<HistoryEntry> history;
|
||||
|
||||
private MigrationState(
|
||||
final String key,
|
||||
final ClientVersion clientVersion,
|
||||
final String modifiedBy,
|
||||
final long modifiedTimestamp,
|
||||
final List<HistoryEntry> historyEntries,
|
||||
final Map<String, AttributeValue> others) {
|
||||
setKey(key);
|
||||
setAttributes(others);
|
||||
this.clientVersion = clientVersion;
|
||||
this.modifiedBy = modifiedBy;
|
||||
this.modifiedTimestamp = modifiedTimestamp;
|
||||
this.history = historyEntries;
|
||||
}
|
||||
|
||||
public MigrationState(final String key, final String modifiedBy) {
|
||||
this(
|
||||
key,
|
||||
ClientVersion.CLIENT_VERSION_INIT,
|
||||
modifiedBy,
|
||||
System.currentTimeMillis(),
|
||||
new ArrayList<>(),
|
||||
new HashMap<>());
|
||||
}
|
||||
|
||||
public HashMap<String, AttributeValue> serialize() {
|
||||
final HashMap<String, AttributeValue> result = new HashMap<>();
|
||||
result.put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(clientVersion.name()));
|
||||
result.put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(modifiedBy));
|
||||
result.put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(modifiedTimestamp)));
|
||||
|
||||
if (!history.isEmpty()) {
|
||||
final List<AttributeValue> historyList = new ArrayList<>();
|
||||
for (final HistoryEntry entry : history) {
|
||||
historyList.add(AttributeValue.builder().m(entry.serialize()).build());
|
||||
}
|
||||
result.put(
|
||||
HISTORY_ATTRIBUTE_NAME,
|
||||
AttributeValue.builder().l(historyList).build());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static MigrationState deserialize(final String key, final HashMap<String, AttributeValue> attributes) {
|
||||
if (!MIGRATION_HASH_KEY.equals(key)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
final HashMap<String, AttributeValue> mutableAttributes = new HashMap<>(attributes);
|
||||
final ClientVersion clientVersion = ClientVersion.valueOf(
|
||||
mutableAttributes.remove(CLIENT_VERSION_ATTRIBUTE_NAME).s());
|
||||
final String modifiedBy =
|
||||
mutableAttributes.remove(MODIFIED_BY_ATTRIBUTE_NAME).s();
|
||||
final long modifiedTimestamp = Long.parseLong(
|
||||
mutableAttributes.remove(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n());
|
||||
|
||||
final List<HistoryEntry> historyList = new ArrayList<>();
|
||||
if (attributes.containsKey(HISTORY_ATTRIBUTE_NAME)) {
|
||||
mutableAttributes.remove(HISTORY_ATTRIBUTE_NAME).l().stream()
|
||||
.map(historyEntry -> HistoryEntry.deserialize(historyEntry.m()))
|
||||
.forEach(historyList::add);
|
||||
}
|
||||
final MigrationState migrationState = new MigrationState(
|
||||
MIGRATION_HASH_KEY, clientVersion, modifiedBy, modifiedTimestamp, historyList, mutableAttributes);
|
||||
|
||||
if (!mutableAttributes.isEmpty()) {
|
||||
log.info("Unknown attributes {} for state {}", mutableAttributes, migrationState);
|
||||
}
|
||||
return migrationState;
|
||||
|
||||
} catch (final Exception e) {
|
||||
log.warn("Unable to deserialize state with key {} and attributes {}", key, attributes, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Map<String, ExpectedAttributeValue> getDynamoClientVersionExpectation() {
|
||||
return new HashMap<String, ExpectedAttributeValue>() {
|
||||
{
|
||||
put(
|
||||
CLIENT_VERSION_ATTRIBUTE_NAME,
|
||||
ExpectedAttributeValue.builder()
|
||||
.value(AttributeValue.fromS(clientVersion.name()))
|
||||
.build());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public MigrationState copy() {
|
||||
return new MigrationState(
|
||||
getKey(),
|
||||
getClientVersion(),
|
||||
getModifiedBy(),
|
||||
getModifiedTimestamp(),
|
||||
new ArrayList<>(getHistory()),
|
||||
new HashMap<>(getAttributes()));
|
||||
}
|
||||
|
||||
public MigrationState update(final ClientVersion clientVersion, final String modifiedBy) {
|
||||
log.info(
|
||||
"Migration state is being updated to {} current state {} caller {}",
|
||||
clientVersion,
|
||||
this,
|
||||
StackTraceUtils.getPrintableStackTrace(Thread.currentThread().getStackTrace()));
|
||||
addHistoryEntry(this.clientVersion, this.modifiedBy, this.modifiedTimestamp);
|
||||
this.clientVersion = clientVersion;
|
||||
this.modifiedBy = modifiedBy;
|
||||
this.modifiedTimestamp = System.currentTimeMillis();
|
||||
return this;
|
||||
}
|
||||
|
||||
public void addHistoryEntry(
|
||||
final ClientVersion lastClientVersion, final String lastModifiedBy, final long lastModifiedTimestamp) {
|
||||
history.add(0, new HistoryEntry(lastClientVersion, lastModifiedBy, lastModifiedTimestamp));
|
||||
if (history.size() > MAX_HISTORY_ENTRIES) {
|
||||
log.info("Limit {} reached, dropping history {}", MAX_HISTORY_ENTRIES, history.remove(history.size() - 1));
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, AttributeValueUpdate> getDynamoUpdate() {
|
||||
final HashMap<String, AttributeValueUpdate> updates = new HashMap<>();
|
||||
updates.put(
|
||||
CLIENT_VERSION_ATTRIBUTE_NAME,
|
||||
AttributeValueUpdate.builder()
|
||||
.value(AttributeValue.fromS(clientVersion.name()))
|
||||
.action(AttributeAction.PUT)
|
||||
.build());
|
||||
updates.put(
|
||||
MODIFIED_BY_ATTRIBUTE_NAME,
|
||||
AttributeValueUpdate.builder()
|
||||
.value(AttributeValue.fromS(modifiedBy))
|
||||
.action(AttributeAction.PUT)
|
||||
.build());
|
||||
updates.put(
|
||||
MODIFIED_TIMESTAMP_ATTRIBUTE_NAME,
|
||||
AttributeValueUpdate.builder()
|
||||
.value(AttributeValue.fromN(String.valueOf(modifiedTimestamp)))
|
||||
.action(AttributeAction.PUT)
|
||||
.build());
|
||||
if (!history.isEmpty()) {
|
||||
updates.put(
|
||||
HISTORY_ATTRIBUTE_NAME,
|
||||
AttributeValueUpdate.builder()
|
||||
.value(AttributeValue.fromL(
|
||||
history.stream().map(HistoryEntry::toAv).collect(Collectors.toList())))
|
||||
.action(AttributeAction.PUT)
|
||||
.build());
|
||||
}
|
||||
return updates;
|
||||
}
|
||||
|
||||
@RequiredArgsConstructor
|
||||
@ToString
|
||||
public static class HistoryEntry {
|
||||
private final ClientVersion lastClientVersion;
|
||||
private final String lastModifiedBy;
|
||||
private final long lastModifiedTimestamp;
|
||||
|
||||
public AttributeValue toAv() {
|
||||
return AttributeValue.fromM(serialize());
|
||||
}
|
||||
|
||||
public Map<String, AttributeValue> serialize() {
|
||||
return new HashMap<String, AttributeValue>() {
|
||||
{
|
||||
put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(lastClientVersion.name()));
|
||||
put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(lastModifiedBy));
|
||||
put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(lastModifiedTimestamp)));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static HistoryEntry deserialize(final Map<String, AttributeValue> map) {
|
||||
return new HistoryEntry(
|
||||
ClientVersion.valueOf(map.get(CLIENT_VERSION_ATTRIBUTE_NAME).s()),
|
||||
map.get(MODIFIED_BY_ATTRIBUTE_NAME).s(),
|
||||
Long.parseLong(map.get(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
|
||||
/**
|
||||
* State machine that provides:
|
||||
* 1. Seamless upgrade from 2.x to 3.x - 3.x has introduced new algorithms that are not compatible with 2.x
|
||||
* workers, so the state machine allows to seamlessly run the 2.x functionality to be compliant with any
|
||||
* 2.x worker in the fleet, and also seamlessly switch to 3.x functionality when all KCL workers are
|
||||
* 3.x complaint.
|
||||
* 2. Instant rollbacks - Rollbacks are supported using the KCL Migration tool to revert back to 2.x functionality
|
||||
* if customer finds regressions in 3.x functionality.
|
||||
* 3. Instant roll-forwards - Once any issue has been mitigated, rollfowards are supported instantly
|
||||
* with KCL Migration tool.
|
||||
*/
|
||||
public interface MigrationStateMachine {
|
||||
/**
|
||||
* Initialize the state machine by identifying the initial state when the KCL worker comes up for the first time.
|
||||
* @throws DependencyException When unable to identify the initial state.
|
||||
*/
|
||||
void initialize() throws DependencyException;
|
||||
|
||||
/**
|
||||
* Shutdown state machine and perform necessary cleanup for the worker to gracefully shutdown
|
||||
*/
|
||||
void shutdown();
|
||||
|
||||
/**
|
||||
* Terminate the state machine when it reaches a terminal state, which is a successful upgrade
|
||||
* to v3.x.
|
||||
*/
|
||||
void terminate();
|
||||
|
||||
/**
|
||||
* Peform transition from current state to the given new ClientVersion
|
||||
* @param nextClientVersion clientVersion of the new state the state machine must transition to
|
||||
* @param state the current MigrationState in dynamo
|
||||
* @throws InvalidStateException when transition fails, this allows the state machine to stay
|
||||
* in the current state until a valid transition is possible
|
||||
* @throws DependencyException when transition fails due to dependency on DDB failing in
|
||||
* unexpected ways.
|
||||
*/
|
||||
void transitionTo(final ClientVersion nextClientVersion, final MigrationState state)
|
||||
throws InvalidStateException, DependencyException;
|
||||
|
||||
/**
|
||||
* Get the ClientVersion of current state machine state.
|
||||
* @return ClientVersion of current state machine state
|
||||
*/
|
||||
ClientVersion getCurrentClientVersion();
|
||||
}
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Copyright 2024 Amazon.com, Inc. or its affiliates.
|
||||
* Licensed under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package software.amazon.kinesis.coordinator.migration;
|
||||
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
/**
|
||||
* Implementation of {@link MigrationStateMachine}
|
||||
*/
|
||||
@KinesisClientInternalApi
|
||||
@Getter
|
||||
@Slf4j
|
||||
@ThreadSafe
|
||||
public class MigrationStateMachineImpl implements MigrationStateMachine {
|
||||
public static final String FAULT_METRIC = "Fault";
|
||||
public static final String METRICS_OPERATION = "Migration";
|
||||
|
||||
private static final long THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS = 5L;
|
||||
|
||||
private final MetricsFactory metricsFactory;
|
||||
private final Callable<Long> timeProvider;
|
||||
private final CoordinatorStateDAO coordinatorStateDAO;
|
||||
private final ScheduledExecutorService stateMachineThreadPool;
|
||||
private DynamicMigrationComponentsInitializer initializer;
|
||||
private final ClientVersionConfig clientVersionConfig;
|
||||
private final Random random;
|
||||
private final String workerId;
|
||||
private final long flipTo3XStabilizerTimeInSeconds;
|
||||
private MigrationState startingMigrationState;
|
||||
|
||||
@Getter
|
||||
private ClientVersion startingClientVersion;
|
||||
|
||||
private MigrationClientVersionState currentMigrationClientVersionState = new MigrationClientVersionState() {
|
||||
@Override
|
||||
public ClientVersion clientVersion() {
|
||||
return ClientVersion.CLIENT_VERSION_INIT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void enter(final ClientVersion fromClientVersion) {
|
||||
log.info("Entered {}...", clientVersion());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void leave() {
|
||||
log.info("Left {}...", clientVersion());
|
||||
}
|
||||
};
|
||||
private boolean terminated = false;
|
||||
|
||||
public MigrationStateMachineImpl(
|
||||
final MetricsFactory metricsFactory,
|
||||
final Callable<Long> timeProvider,
|
||||
final CoordinatorStateDAO coordinatorStateDAO,
|
||||
final ScheduledExecutorService stateMachineThreadPool,
|
||||
final ClientVersionConfig clientVersionConfig,
|
||||
final Random random,
|
||||
final DynamicMigrationComponentsInitializer initializer,
|
||||
final String workerId,
|
||||
final long flipTo3XStabilizerTimeInSeconds) {
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.timeProvider = timeProvider;
|
||||
this.coordinatorStateDAO = coordinatorStateDAO;
|
||||
this.stateMachineThreadPool = stateMachineThreadPool;
|
||||
this.clientVersionConfig = clientVersionConfig;
|
||||
this.random = random;
|
||||
this.initializer = initializer;
|
||||
this.workerId = workerId;
|
||||
this.flipTo3XStabilizerTimeInSeconds = flipTo3XStabilizerTimeInSeconds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() throws DependencyException {
|
||||
if (startingClientVersion == null) {
|
||||
log.info("Initializing MigrationStateMachine");
|
||||
coordinatorStateDAO.initialize();
|
||||
final MigrationClientVersionStateInitializer startingStateInitializer =
|
||||
new MigrationClientVersionStateInitializer(
|
||||
timeProvider, coordinatorStateDAO, clientVersionConfig, random, workerId);
|
||||
final SimpleEntry<ClientVersion, MigrationState> dataForInitialization =
|
||||
startingStateInitializer.getInitialState();
|
||||
initializer.initialize(dataForInitialization.getKey());
|
||||
transitionTo(dataForInitialization.getKey(), dataForInitialization.getValue());
|
||||
startingClientVersion = dataForInitialization.getKey();
|
||||
startingMigrationState = dataForInitialization.getValue();
|
||||
log.info("MigrationStateMachine initial clientVersion {}", startingClientVersion);
|
||||
} else {
|
||||
log.info("MigrationStateMachine already initialized with clientVersion {}", startingClientVersion);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
terminate();
|
||||
if (!stateMachineThreadPool.isShutdown()) {
|
||||
stateMachineThreadPool.shutdown();
|
||||
try {
|
||||
if (stateMachineThreadPool.awaitTermination(THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
|
||||
log.info(
|
||||
"StateMachineThreadPool did not shutdown within {} seconds, forcefully shutting down",
|
||||
THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS);
|
||||
stateMachineThreadPool.shutdownNow();
|
||||
}
|
||||
} catch (final InterruptedException e) {
|
||||
log.info("Interrupted when shutting down StateMachineThreadPool, forcefully shutting down");
|
||||
stateMachineThreadPool.shutdownNow();
|
||||
}
|
||||
}
|
||||
log.info("Shutdown successfully");
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void terminate() {
|
||||
if (!terminated && currentMigrationClientVersionState != null) {
|
||||
log.info("State machine is about to terminate");
|
||||
currentMigrationClientVersionState.leave();
|
||||
currentMigrationClientVersionState = null;
|
||||
log.info("State machine reached a terminal state.");
|
||||
terminated = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void transitionTo(final ClientVersion nextClientVersion, final MigrationState migrationState)
|
||||
throws DependencyException {
|
||||
if (terminated) {
|
||||
throw new IllegalStateException(String.format(
|
||||
"Cannot transition to %s after state machine is terminated, %s",
|
||||
nextClientVersion.name(), migrationState));
|
||||
}
|
||||
|
||||
final MigrationClientVersionState nextMigrationClientVersionState =
|
||||
createMigrationClientVersionState(nextClientVersion, migrationState);
|
||||
log.info(
|
||||
"Attempting to transition from {} to {}",
|
||||
currentMigrationClientVersionState.clientVersion(),
|
||||
nextClientVersion);
|
||||
currentMigrationClientVersionState.leave();
|
||||
|
||||
enter(nextMigrationClientVersionState);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enter with retry. When entering the state machine for the first time, the caller has retry so exceptions
|
||||
* will be re-thrown. Once the state machine has initialized all transitions will be an indefinite retry.
|
||||
* It is possible the DDB state has changed by the time enter succeeds but that will occur as a new
|
||||
* state transition after entering the state. Usually the failures are due to unexpected issues with
|
||||
* DDB which will be transitional and will recover on a retry.
|
||||
* @param nextMigrationClientVersionState the state to transition to
|
||||
* @throws DependencyException If entering fails during state machine initialization.
|
||||
*/
|
||||
private void enter(final MigrationClientVersionState nextMigrationClientVersionState) throws DependencyException {
|
||||
boolean success = false;
|
||||
while (!success) {
|
||||
try {
|
||||
// Enter should never fail unless it is the starting state and fails to create the GSI,
|
||||
// in which case it is an unrecoverable error that is bubbled up and KCL start up will fail.
|
||||
nextMigrationClientVersionState.enter(currentMigrationClientVersionState.clientVersion());
|
||||
|
||||
currentMigrationClientVersionState = nextMigrationClientVersionState;
|
||||
log.info("Successfully transitioned to {}", nextMigrationClientVersionState.clientVersion());
|
||||
if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_3X) {
|
||||
terminate();
|
||||
}
|
||||
success = true;
|
||||
} catch (final DependencyException e) {
|
||||
if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
|
||||
throw e;
|
||||
}
|
||||
log.info(
|
||||
"Transitioning from {} to {} failed, retrying after a minute",
|
||||
currentMigrationClientVersionState.clientVersion(),
|
||||
nextMigrationClientVersionState.clientVersion(),
|
||||
e);
|
||||
|
||||
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
|
||||
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
|
||||
MetricsUtil.endScope(scope);
|
||||
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (final InterruptedException ie) {
|
||||
log.info("Interrupted while sleeping before retrying state machine transition", ie);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private MigrationClientVersionState createMigrationClientVersionState(
|
||||
final ClientVersion clientVersion, final MigrationState migrationState) {
|
||||
switch (clientVersion) {
|
||||
case CLIENT_VERSION_2X:
|
||||
return new MigrationClientVersion2xState(
|
||||
this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
|
||||
case CLIENT_VERSION_UPGRADE_FROM_2X:
|
||||
return new MigrationClientVersionUpgradeFrom2xState(
|
||||
this,
|
||||
timeProvider,
|
||||
coordinatorStateDAO,
|
||||
stateMachineThreadPool,
|
||||
initializer,
|
||||
random,
|
||||
migrationState,
|
||||
flipTo3XStabilizerTimeInSeconds);
|
||||
case CLIENT_VERSION_3X_WITH_ROLLBACK:
|
||||
return new MigrationClientVersion3xWithRollbackState(
|
||||
this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
|
||||
case CLIENT_VERSION_3X:
|
||||
return new MigrationClientVersion3xState(this, initializer);
|
||||
}
|
||||
throw new IllegalStateException(String.format("Unknown client version %s", clientVersion));
|
||||
}
|
||||
|
||||
public ClientVersion getCurrentClientVersion() {
|
||||
if (currentMigrationClientVersionState != null) {
|
||||
return currentMigrationClientVersionState.clientVersion();
|
||||
} else if (terminated) {
|
||||
return ClientVersion.CLIENT_VERSION_3X;
|
||||
}
|
||||
throw new UnsupportedOperationException(
|
||||
"No current state when state machine is either not initialized" + " or already terminated");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,270 @@
|
|||
package software.amazon.kinesis.leader;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import com.amazonaws.services.dynamodbv2.AcquireLockOptions;
|
||||
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClient;
|
||||
import com.amazonaws.services.dynamodbv2.GetLockOptions;
|
||||
import com.amazonaws.services.dynamodbv2.LockItem;
|
||||
import com.amazonaws.services.dynamodbv2.model.LockCurrentlyUnavailableException;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
|
||||
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
|
||||
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static java.util.Objects.isNull;
|
||||
import static software.amazon.kinesis.coordinator.CoordinatorState.LEADER_HASH_KEY;
|
||||
|
||||
/**
|
||||
* Implementation for LeaderDecider to elect leader using lock on dynamo db table. This class uses
|
||||
* AmazonDynamoDBLockClient library to perform the leader election.
|
||||
*/
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class DynamoDBLockBasedLeaderDecider implements LeaderDecider {
|
||||
private static final Long DEFAULT_LEASE_DURATION_MILLIS =
|
||||
Duration.ofMinutes(2).toMillis();
|
||||
// Heartbeat frequency should be at-least 3 times smaller the lease duration according to LockClient documentation
|
||||
private static final Long DEFAULT_HEARTBEAT_PERIOD_MILLIS =
|
||||
Duration.ofSeconds(30).toMillis();
|
||||
|
||||
private final CoordinatorStateDAO coordinatorStateDao;
|
||||
private final AmazonDynamoDBLockClient dynamoDBLockClient;
|
||||
private final Long heartbeatPeriodMillis;
|
||||
private final String workerId;
|
||||
private final MetricsFactory metricsFactory;
|
||||
|
||||
private long lastCheckTimeInMillis = 0L;
|
||||
private boolean lastIsLeaderResult = false;
|
||||
private final AtomicBoolean isShutdown = new AtomicBoolean(false);
|
||||
|
||||
private long lastIsAnyLeaderElectedDDBReadTimeMillis = 0L;
|
||||
private boolean lastIsAnyLeaderElectedResult = false;
|
||||
/**
|
||||
* Key value pair of LockItem to the time when it was first discovered.
|
||||
* If a new LockItem fetched from ddb has different recordVersionNumber than the one in-memory,
|
||||
* its considered as new LockItem, and the time when it was fetched is stored in memory to identify lockItem
|
||||
* expiry. This is used only in the context of isAnyLeaderElected method.
|
||||
*/
|
||||
private AbstractMap.SimpleEntry<LockItem, Long> lastIsAnyLeaderCheckLockItemToFirstEncounterTime = null;
|
||||
|
||||
@VisibleForTesting
|
||||
static DynamoDBLockBasedLeaderDecider create(
|
||||
final CoordinatorStateDAO coordinatorStateDao,
|
||||
final String workerId,
|
||||
final Long leaseDuration,
|
||||
final Long heartbeatPeriod,
|
||||
final MetricsFactory metricsFactory) {
|
||||
final AmazonDynamoDBLockClient dynamoDBLockClient = new AmazonDynamoDBLockClient(coordinatorStateDao
|
||||
.getDDBLockClientOptionsBuilder()
|
||||
.withTimeUnit(TimeUnit.MILLISECONDS)
|
||||
.withLeaseDuration(leaseDuration)
|
||||
.withHeartbeatPeriod(heartbeatPeriod)
|
||||
.withCreateHeartbeatBackgroundThread(true)
|
||||
.withOwnerName(workerId)
|
||||
.build());
|
||||
|
||||
return new DynamoDBLockBasedLeaderDecider(
|
||||
coordinatorStateDao, dynamoDBLockClient, heartbeatPeriod, workerId, metricsFactory);
|
||||
}
|
||||
|
||||
public static DynamoDBLockBasedLeaderDecider create(
|
||||
final CoordinatorStateDAO coordinatorStateDao, final String workerId, final MetricsFactory metricsFactory) {
|
||||
return create(
|
||||
coordinatorStateDao,
|
||||
workerId,
|
||||
DEFAULT_LEASE_DURATION_MILLIS,
|
||||
DEFAULT_HEARTBEAT_PERIOD_MILLIS,
|
||||
metricsFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
log.info("Initializing DDB Lock based leader decider");
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the lockItem in storage and if the current worker is not leader worker, then tries to acquire lock and
|
||||
* returns true if it was able to acquire lock else false.
|
||||
* @param workerId ID of the worker
|
||||
* @return true if current worker is leader else false.
|
||||
*/
|
||||
@Override
|
||||
public synchronized Boolean isLeader(final String workerId) {
|
||||
// if the decider has shutdown, then return false and don't try acquireLock anymore.
|
||||
if (isShutdown.get()) {
|
||||
publishIsLeaderMetrics(false);
|
||||
return false;
|
||||
}
|
||||
// If the last time we tried to take lock and didnt get lock, don't try to take again for heartbeatPeriodMillis
|
||||
// this is to avoid unnecessary calls to dynamoDB.
|
||||
// Different modules in KCL can request for isLeader check within heartbeatPeriodMillis, and this optimization
|
||||
// will help in those cases.
|
||||
// In case the last call returned true, we want to check the source always to ensure the correctness of leader.
|
||||
if (!lastIsLeaderResult && lastCheckTimeInMillis + heartbeatPeriodMillis > System.currentTimeMillis()) {
|
||||
publishIsLeaderMetrics(lastIsLeaderResult);
|
||||
return lastIsLeaderResult;
|
||||
}
|
||||
boolean response;
|
||||
// Get the lockItem from storage (if present
|
||||
final Optional<LockItem> lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
|
||||
lockItem.ifPresent(item -> log.info("Worker : {} is the current leader.", item.getOwnerName()));
|
||||
|
||||
// If the lockItem is present and is expired, that means either current worker is not leader.
|
||||
if (!lockItem.isPresent() || lockItem.get().isExpired()) {
|
||||
try {
|
||||
// Current worker does not hold the lock, try to acquireOne.
|
||||
final Optional<LockItem> leaderLockItem =
|
||||
dynamoDBLockClient.tryAcquireLock(AcquireLockOptions.builder(LEADER_HASH_KEY)
|
||||
.withRefreshPeriod(heartbeatPeriodMillis)
|
||||
.withTimeUnit(TimeUnit.MILLISECONDS)
|
||||
.withShouldSkipBlockingWait(true)
|
||||
.build());
|
||||
leaderLockItem.ifPresent(item -> log.info("Worker : {} is new leader", item.getOwnerName()));
|
||||
// if leaderLockItem optional is empty, that means the lock is not acquired by this worker.
|
||||
response = leaderLockItem.isPresent();
|
||||
} catch (final InterruptedException e) {
|
||||
// Something bad happened, don't assume leadership and also release lock just in case the
|
||||
// lock was granted and still interrupt happened.
|
||||
releaseLeadershipIfHeld();
|
||||
log.error("Acquiring lock was interrupted in between", e);
|
||||
response = false;
|
||||
|
||||
} catch (final LockCurrentlyUnavailableException e) {
|
||||
response = false;
|
||||
}
|
||||
|
||||
} else {
|
||||
response = lockItem.get().getOwnerName().equals(workerId);
|
||||
}
|
||||
|
||||
lastCheckTimeInMillis = System.currentTimeMillis();
|
||||
lastIsLeaderResult = response;
|
||||
publishIsLeaderMetrics(response);
|
||||
return response;
|
||||
}
|
||||
|
||||
private void publishIsLeaderMetrics(final boolean response) {
|
||||
final MetricsScope metricsScope =
|
||||
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
|
||||
metricsScope.addData(
|
||||
METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
|
||||
MetricsUtil.endScope(metricsScope);
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases the lock if held by current worker when this method is invoked.
|
||||
*/
|
||||
@Override
|
||||
public void shutdown() {
|
||||
if (!isShutdown.getAndSet(true)) {
|
||||
releaseLeadershipIfHeld();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void releaseLeadershipIfHeld() {
|
||||
try {
|
||||
final Optional<LockItem> lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
|
||||
if (lockItem.isPresent()
|
||||
&& !lockItem.get().isExpired()
|
||||
&& lockItem.get().getOwnerName().equals(workerId)) {
|
||||
|
||||
log.info(
|
||||
"Current worker : {} holds the lock, releasing it.",
|
||||
lockItem.get().getOwnerName());
|
||||
// LockItem.close() will release the lock if current worker owns it else this call is no op.
|
||||
lockItem.get().close();
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
log.error("Failed to complete releaseLeadershipIfHeld call.", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns if any ACTIVE leader exists that is elected by the current implementation which can be outside the
|
||||
* scope of this worker. That is leader elected by this implementation in any worker in fleet.
|
||||
* DynamoDBLockClient does not provide an interface which can tell if an active lock exists or not, thus
|
||||
* we need to put custom implementation.
|
||||
* The implementation performs DDB get every heartbeatPeriodMillis to have low RCU consumption, which means that
|
||||
* the leader could have been elected from the last time the check happened and before check happens again.
|
||||
* The information returned from this method has eventual consistency (up to heartbeatPeriodMillis interval).
|
||||
*
|
||||
* @return true, if any leader is elected else false.
|
||||
*/
|
||||
@Override
|
||||
public synchronized boolean isAnyLeaderElected() {
|
||||
// Avoid going to ddb for every call and do it once every heartbeatPeriod to have low RCU usage.
|
||||
if (Duration.between(
|
||||
Instant.ofEpochMilli(lastIsAnyLeaderElectedDDBReadTimeMillis),
|
||||
Instant.ofEpochMilli(System.currentTimeMillis()))
|
||||
.toMillis()
|
||||
> heartbeatPeriodMillis) {
|
||||
final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(
|
||||
metricsFactory, this.getClass().getSimpleName() + ":isAnyLeaderElected");
|
||||
final long startTime = System.currentTimeMillis();
|
||||
try {
|
||||
lastIsAnyLeaderElectedDDBReadTimeMillis = System.currentTimeMillis();
|
||||
final Optional<LockItem> lockItem = dynamoDBLockClient.getLockFromDynamoDB(
|
||||
GetLockOptions.builder(LEADER_HASH_KEY).build());
|
||||
|
||||
if (!lockItem.isPresent()) {
|
||||
// There is no LockItem in the ddb table, that means no one is holding lock.
|
||||
lastIsAnyLeaderElectedResult = false;
|
||||
log.info("LockItem present : {}", false);
|
||||
} else {
|
||||
final LockItem ddbLockItem = lockItem.get();
|
||||
if (isNull(lastIsAnyLeaderCheckLockItemToFirstEncounterTime)
|
||||
|| !ddbLockItem
|
||||
.getRecordVersionNumber()
|
||||
.equals(lastIsAnyLeaderCheckLockItemToFirstEncounterTime
|
||||
.getKey()
|
||||
.getRecordVersionNumber())) {
|
||||
// This is the first isAnyLeaderElected call, so we can't evaluate if the LockItem has expired
|
||||
// or not yet so consider LOCK as ACTIVE.
|
||||
// OR LockItem in ddb and in-memory LockItem have different RecordVersionNumber
|
||||
// and thus the LOCK is still ACTIVE
|
||||
lastIsAnyLeaderElectedResult = true;
|
||||
lastIsAnyLeaderCheckLockItemToFirstEncounterTime =
|
||||
new AbstractMap.SimpleEntry<>(ddbLockItem, lastIsAnyLeaderElectedDDBReadTimeMillis);
|
||||
log.info(
|
||||
"LockItem present : {}, and this is either first call OR lockItem has had "
|
||||
+ "a heartbeat",
|
||||
true);
|
||||
} else {
|
||||
// There is no change in the ddb lock item, so if the last update time is more than
|
||||
// lease duration, the lock is expired else it is still ACTIVE,
|
||||
lastIsAnyLeaderElectedResult = lastIsAnyLeaderCheckLockItemToFirstEncounterTime.getValue()
|
||||
+ ddbLockItem.getLeaseDuration()
|
||||
> lastIsAnyLeaderElectedDDBReadTimeMillis;
|
||||
log.info("LockItem present : {}, and lease expiry: {}", true, lastIsAnyLeaderElectedResult);
|
||||
}
|
||||
}
|
||||
} catch (final ResourceNotFoundException exception) {
|
||||
log.info("Lock table does not exists...");
|
||||
// If the table itself doesn't exist, there is no elected leader.
|
||||
lastIsAnyLeaderElectedResult = false;
|
||||
} finally {
|
||||
metricsScope.addData(
|
||||
"Latency",
|
||||
System.currentTimeMillis() - startTime,
|
||||
StandardUnit.MILLISECONDS,
|
||||
MetricsLevel.DETAILED);
|
||||
MetricsUtil.endScope(metricsScope);
|
||||
}
|
||||
}
|
||||
return lastIsAnyLeaderElectedResult;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
package software.amazon.kinesis.leader;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.LeaderDecider;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static java.util.Objects.nonNull;
|
||||
|
||||
/**
|
||||
* MigrationAdaptiveLeaderDecider that wraps around the actual LeaderDecider which can dynamically
|
||||
* change based on the MigrationStateMachine.
|
||||
*/
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
@ThreadSafe
|
||||
public class MigrationAdaptiveLeaderDecider implements LeaderDecider {
|
||||
|
||||
private final MetricsFactory metricsFactory;
|
||||
private LeaderDecider currentLeaderDecider;
|
||||
|
||||
public MigrationAdaptiveLeaderDecider(final MetricsFactory metricsFactory) {
|
||||
this.metricsFactory = metricsFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Boolean isLeader(final String workerId) {
|
||||
if (currentLeaderDecider == null) {
|
||||
throw new IllegalStateException("LeaderDecider uninitialized");
|
||||
}
|
||||
|
||||
final MetricsScope scope =
|
||||
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
|
||||
try {
|
||||
publishSelectedLeaderDeciderMetrics(scope, currentLeaderDecider);
|
||||
return currentLeaderDecider.isLeader(workerId);
|
||||
} finally {
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
|
||||
private static void publishSelectedLeaderDeciderMetrics(
|
||||
final MetricsScope scope, final LeaderDecider leaderDecider) {
|
||||
scope.addData(
|
||||
String.format(leaderDecider.getClass().getSimpleName()), 1D, StandardUnit.COUNT, MetricsLevel.DETAILED);
|
||||
}
|
||||
|
||||
public synchronized void updateLeaderDecider(final LeaderDecider leaderDecider) {
|
||||
if (currentLeaderDecider != null) {
|
||||
currentLeaderDecider.shutdown();
|
||||
log.info(
|
||||
"Updating leader decider dynamically from {} to {}",
|
||||
this.currentLeaderDecider.getClass().getSimpleName(),
|
||||
leaderDecider.getClass().getSimpleName());
|
||||
} else {
|
||||
log.info(
|
||||
"Initializing dynamic leader decider with {}",
|
||||
leaderDecider.getClass().getSimpleName());
|
||||
}
|
||||
currentLeaderDecider = leaderDecider;
|
||||
currentLeaderDecider.initialize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
if (nonNull(currentLeaderDecider)) {
|
||||
log.info("Shutting down current {}", currentLeaderDecider.getClass().getSimpleName());
|
||||
currentLeaderDecider.shutdown();
|
||||
currentLeaderDecider = null;
|
||||
} else {
|
||||
log.info("LeaderDecider has already been shutdown");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -81,8 +81,20 @@ public class DynamoUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static AttributeValue createAttributeValue(Double doubleValue) {
|
||||
if (doubleValue == null) {
|
||||
throw new IllegalArgumentException("Double attributeValues cannot be null.");
|
||||
}
|
||||
|
||||
return AttributeValue.builder().n(doubleValue.toString()).build();
|
||||
}
|
||||
|
||||
public static String safeGetString(Map<String, AttributeValue> dynamoRecord, String key) {
|
||||
AttributeValue av = dynamoRecord.get(key);
|
||||
return safeGetString(av);
|
||||
}
|
||||
|
||||
public static String safeGetString(AttributeValue av) {
|
||||
if (av == null) {
|
||||
return null;
|
||||
} else {
|
||||
|
|
@ -99,4 +111,13 @@ public class DynamoUtils {
|
|||
return av.ss();
|
||||
}
|
||||
}
|
||||
|
||||
public static Double safeGetDouble(Map<String, AttributeValue> dynamoRecord, String key) {
|
||||
AttributeValue av = dynamoRecord.get(key);
|
||||
if (av == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new Double(av.n());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,26 +103,6 @@ public class KinesisShardDetector implements ShardDetector {
|
|||
|
||||
private static final Boolean THROW_RESOURCE_NOT_FOUND_EXCEPTION = true;
|
||||
|
||||
@Deprecated
|
||||
public KinesisShardDetector(
|
||||
KinesisAsyncClient kinesisClient,
|
||||
String streamName,
|
||||
long listShardsBackoffTimeInMillis,
|
||||
int maxListShardsRetryAttempts,
|
||||
long listShardsCacheAllowedAgeInSeconds,
|
||||
int maxCacheMissesBeforeReload,
|
||||
int cacheMissWarningModulus) {
|
||||
this(
|
||||
kinesisClient,
|
||||
StreamIdentifier.singleStreamInstance(streamName),
|
||||
listShardsBackoffTimeInMillis,
|
||||
maxListShardsRetryAttempts,
|
||||
listShardsCacheAllowedAgeInSeconds,
|
||||
maxCacheMissesBeforeReload,
|
||||
cacheMissWarningModulus,
|
||||
LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT);
|
||||
}
|
||||
|
||||
public KinesisShardDetector(
|
||||
KinesisAsyncClient kinesisClient,
|
||||
StreamIdentifier streamIdentifier,
|
||||
|
|
|
|||
|
|
@ -46,7 +46,11 @@ import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
|||
"lastCounterIncrementNanos",
|
||||
"childShardIds",
|
||||
"pendingCheckpointState",
|
||||
"isMarkedForLeaseSteal"
|
||||
"isMarkedForLeaseSteal",
|
||||
"throughputKBps",
|
||||
"checkpointOwner",
|
||||
"checkpointOwnerTimeoutTimestampMillis",
|
||||
"isExpiredOrUnassigned"
|
||||
})
|
||||
@ToString
|
||||
public class Lease {
|
||||
|
|
@ -104,6 +108,33 @@ public class Lease {
|
|||
@Setter
|
||||
private boolean isMarkedForLeaseSteal;
|
||||
|
||||
/**
|
||||
* If true, this indicates that lease is ready to be immediately reassigned.
|
||||
*/
|
||||
@Setter
|
||||
private boolean isExpiredOrUnassigned;
|
||||
|
||||
/**
|
||||
* Throughput in Kbps for the lease.
|
||||
*/
|
||||
private Double throughputKBps;
|
||||
|
||||
/**
|
||||
* Owner of the checkpoint. The attribute is used for graceful shutdowns to indicate the owner that
|
||||
* is allowed to write the checkpoint.
|
||||
*/
|
||||
@Setter
|
||||
private String checkpointOwner;
|
||||
|
||||
/**
|
||||
* This field is used for tracking when the shutdown was requested on the lease so we can expire it. This is
|
||||
* deliberately not persisted in DynamoDB because leaseOwner are expected to transfer lease from itself to the
|
||||
* next owner during shutdown. If the worker dies before shutdown the lease will just become expired then we can
|
||||
* pick it up. If for some reason worker is not able to shut down and continues holding onto the lease
|
||||
* this timeout will kick in and force a lease transfer.
|
||||
*/
|
||||
@Setter
|
||||
private Long checkpointOwnerTimeoutTimestampMillis;
|
||||
/**
|
||||
* Count of distinct lease holders between checkpoints.
|
||||
*/
|
||||
|
|
@ -242,6 +273,54 @@ public class Lease {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if checkpoint owner is set. Indicating a requested shutdown.
|
||||
*/
|
||||
public boolean shutdownRequested() {
|
||||
return checkpointOwner != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether lease should be blocked on pending checkpoint. We DON'T block if
|
||||
* - lease is expired (Expired lease should be assigned right away) OR
|
||||
* ----- at this point we know lease is assigned -----
|
||||
* - lease is shardEnd (No more processing possible) OR
|
||||
* - lease is NOT requested for shutdown OR
|
||||
* - lease shutdown expired
|
||||
*
|
||||
* @param currentTimeMillis current time in milliseconds
|
||||
* @return true if lease is blocked on pending checkpoint
|
||||
*/
|
||||
public boolean blockedOnPendingCheckpoint(long currentTimeMillis) {
|
||||
// using ORs and negate
|
||||
return !(isExpiredOrUnassigned
|
||||
|| ExtendedSequenceNumber.SHARD_END.equals(checkpoint)
|
||||
|| !shutdownRequested()
|
||||
// if shutdown requested then checkpointOwnerTimeoutTimestampMillis should present
|
||||
|| currentTimeMillis - checkpointOwnerTimeoutTimestampMillis >= 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether lease is eligible for graceful shutdown. It's eligible if
|
||||
* - lease is still assigned (not expired) AND
|
||||
* - lease is NOT shardEnd (No more processing possible AND
|
||||
* - lease is NOT requested for shutdown
|
||||
*
|
||||
* @return true if lease is eligible for graceful shutdown
|
||||
*/
|
||||
public boolean isEligibleForGracefulShutdown() {
|
||||
return !isExpiredOrUnassigned && !ExtendedSequenceNumber.SHARD_END.equals(checkpoint) && !shutdownRequested();
|
||||
}
|
||||
|
||||
/**
|
||||
* Need to handle the case during graceful shutdown where leaseOwner isn't the current owner
|
||||
*
|
||||
* @return the actual owner
|
||||
*/
|
||||
public String actualOwner() {
|
||||
return checkpointOwner == null ? leaseOwner : checkpointOwner;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if lease is not currently owned
|
||||
*/
|
||||
|
|
@ -343,6 +422,15 @@ public class Lease {
|
|||
this.childShardIds.addAll(childShardIds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets throughputKbps.
|
||||
*
|
||||
* @param throughputKBps may not be null
|
||||
*/
|
||||
public void throughputKBps(double throughputKBps) {
|
||||
this.throughputKBps = throughputKBps;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the hash range key for this shard.
|
||||
* @param hashKeyRangeForLease
|
||||
|
|
@ -370,6 +458,8 @@ public class Lease {
|
|||
* @return A deep copy of this object.
|
||||
*/
|
||||
public Lease copy() {
|
||||
return new Lease(this);
|
||||
final Lease lease = new Lease(this);
|
||||
lease.checkpointOwner(this.checkpointOwner);
|
||||
return lease;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
|
||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
|
|
@ -38,11 +39,14 @@ public interface LeaseCoordinator {
|
|||
|
||||
/**
|
||||
* Start background LeaseHolder and LeaseTaker threads.
|
||||
* @param leaseAssignmentModeProvider provider of Lease Assignment mode to determine whether to start components
|
||||
* for both V2 and V3 functionality or only V3 functionality
|
||||
* @throws ProvisionedThroughputException If we can't talk to DynamoDB due to insufficient capacity.
|
||||
* @throws InvalidStateException If the lease table doesn't exist
|
||||
* @throws DependencyException If we encountered exception taking to DynamoDB
|
||||
*/
|
||||
void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||
void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||
|
||||
/**
|
||||
* Runs a single iteration of the lease taker - used by integration tests.
|
||||
|
|
@ -152,4 +156,9 @@ public interface LeaseCoordinator {
|
|||
* @return LeaseCoordinator
|
||||
*/
|
||||
DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity);
|
||||
|
||||
/**
|
||||
* @return instance of {@link LeaseStatsRecorder}
|
||||
*/
|
||||
LeaseStatsRecorder leaseStatsRecorder();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
package software.amazon.kinesis.leases;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
|
||||
public interface LeaseDiscoverer {
|
||||
/**
|
||||
* Identifies the leases that are assigned to the current worker but are not being tracked and processed by the
|
||||
* current worker.
|
||||
*
|
||||
* @return list of leases assigned to worker which doesn't exist in {@param currentHeldLeaseKeys}
|
||||
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||
* @throws InvalidStateException if lease table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||
*/
|
||||
List<Lease> discoverNewLeases() throws ProvisionedThroughputException, InvalidStateException, DependencyException;
|
||||
}
|
||||
|
|
@ -16,7 +16,9 @@
|
|||
package software.amazon.kinesis.leases;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
|
@ -25,6 +27,7 @@ import java.util.concurrent.TimeUnit;
|
|||
import java.util.function.Function;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.experimental.Accessors;
|
||||
|
|
@ -34,14 +37,17 @@ import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
|||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.awssdk.services.dynamodb.model.Tag;
|
||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||
import software.amazon.kinesis.common.DdbTableConfig;
|
||||
import software.amazon.kinesis.common.InitialPositionInStream;
|
||||
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
|
||||
import software.amazon.kinesis.common.LeaseCleanupConfig;
|
||||
import software.amazon.kinesis.common.StreamConfig;
|
||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseManagementFactory;
|
||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseSerializer;
|
||||
import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.NullMetricsFactory;
|
||||
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||
|
||||
/**
|
||||
* Used by the KCL to configure lease management.
|
||||
|
|
@ -209,6 +215,9 @@ public class LeaseManagementConfig {
|
|||
|
||||
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
|
||||
|
||||
private WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig =
|
||||
new WorkerUtilizationAwareAssignmentConfig();
|
||||
|
||||
/**
|
||||
* Whether to enable deletion protection on the DynamoDB lease table created by KCL. This does not update
|
||||
* already existing tables.
|
||||
|
|
@ -276,14 +285,17 @@ public class LeaseManagementConfig {
|
|||
}
|
||||
|
||||
public LeaseManagementConfig(
|
||||
String tableName,
|
||||
DynamoDbAsyncClient dynamoDBClient,
|
||||
KinesisAsyncClient kinesisClient,
|
||||
String workerIdentifier) {
|
||||
final String tableName,
|
||||
final String applicationName,
|
||||
final DynamoDbAsyncClient dynamoDBClient,
|
||||
final KinesisAsyncClient kinesisClient,
|
||||
final String workerIdentifier) {
|
||||
this.tableName = tableName;
|
||||
this.dynamoDBClient = dynamoDBClient;
|
||||
this.kinesisClient = kinesisClient;
|
||||
this.workerIdentifier = workerIdentifier;
|
||||
this.workerUtilizationAwareAssignmentConfig.workerMetricsTableConfig =
|
||||
new WorkerMetricsTableConfig(applicationName);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -350,10 +362,18 @@ public class LeaseManagementConfig {
|
|||
*/
|
||||
private TableCreatorCallback tableCreatorCallback = TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK;
|
||||
|
||||
/**
|
||||
* @deprecated never used and will be removed in future releases
|
||||
*/
|
||||
@Deprecated
|
||||
private HierarchicalShardSyncer hierarchicalShardSyncer;
|
||||
|
||||
private LeaseManagementFactory leaseManagementFactory;
|
||||
|
||||
/**
|
||||
* @deprecated never used and will be removed in future releases
|
||||
*/
|
||||
@Deprecated
|
||||
public HierarchicalShardSyncer hierarchicalShardSyncer() {
|
||||
if (hierarchicalShardSyncer == null) {
|
||||
hierarchicalShardSyncer = new HierarchicalShardSyncer();
|
||||
|
|
@ -361,39 +381,63 @@ public class LeaseManagementConfig {
|
|||
return hierarchicalShardSyncer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration class for controlling the graceful handoff of leases.
|
||||
* This configuration allows tuning of the shutdown behavior during lease transfers.
|
||||
* <p>
|
||||
* It provides settings to control the timeout period for waiting on the record processor
|
||||
* to shut down and an option to enable or disable graceful lease handoff.
|
||||
* </p>
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@Accessors(fluent = true)
|
||||
public static class GracefulLeaseHandoffConfig {
|
||||
/**
|
||||
* The minimum amount of time (in milliseconds) to wait for the current shard's RecordProcessor
|
||||
* to gracefully shut down before forcefully transferring the lease to the next owner.
|
||||
* <p>
|
||||
* If each call to {@code processRecords} is expected to run longer than the default value,
|
||||
* it makes sense to set this to a higher value to ensure the RecordProcessor has enough
|
||||
* time to complete its processing.
|
||||
* </p>
|
||||
* <p>
|
||||
* Default value is 30,000 milliseconds (30 seconds).
|
||||
* </p>
|
||||
*/
|
||||
@Builder.Default
|
||||
private long gracefulLeaseHandoffTimeoutMillis = 30_000L;
|
||||
/**
|
||||
* Flag to enable or disable the graceful lease handoff mechanism.
|
||||
* <p>
|
||||
* When set to {@code true}, the KCL will attempt to gracefully transfer leases by
|
||||
* allowing the shard's RecordProcessor sufficient time to complete processing before
|
||||
* handing off the lease to another worker. When {@code false}, the lease will be
|
||||
* handed off without waiting for the RecordProcessor to shut down gracefully. Note
|
||||
* that checkpointing is expected to be implemented inside {@code shutdownRequested}
|
||||
* for this feature to work end to end.
|
||||
* </p>
|
||||
* <p>
|
||||
* Default value is {@code true}.
|
||||
* </p>
|
||||
*/
|
||||
@Builder.Default
|
||||
private boolean isGracefulLeaseHandoffEnabled = true;
|
||||
}
|
||||
|
||||
private GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
|
||||
GracefulLeaseHandoffConfig.builder().build();
|
||||
|
||||
/**
|
||||
* @deprecated This is no longer invoked, but {@code leaseManagementFactory(LeaseSerializer, boolean)}
|
||||
* is invoked instead. Please remove implementation for this method as future
|
||||
* releases will remove this API.
|
||||
*/
|
||||
@Deprecated
|
||||
public LeaseManagementFactory leaseManagementFactory() {
|
||||
if (leaseManagementFactory == null) {
|
||||
Validate.notEmpty(streamName(), "Stream name is empty");
|
||||
leaseManagementFactory = new DynamoDBLeaseManagementFactory(
|
||||
kinesisClient(),
|
||||
streamName(),
|
||||
dynamoDBClient(),
|
||||
tableName(),
|
||||
workerIdentifier(),
|
||||
executorService(),
|
||||
initialPositionInStream(),
|
||||
failoverTimeMillis(),
|
||||
epsilonMillis(),
|
||||
maxLeasesForWorker(),
|
||||
maxLeasesToStealAtOneTime(),
|
||||
maxLeaseRenewalThreads(),
|
||||
cleanupLeasesUponShardCompletion(),
|
||||
ignoreUnexpectedChildShards(),
|
||||
shardSyncIntervalMillis(),
|
||||
consistentReads(),
|
||||
listShardsBackoffTimeInMillis(),
|
||||
maxListShardsRetryAttempts(),
|
||||
maxCacheMissesBeforeReload(),
|
||||
listShardsCacheAllowedAgeInSeconds(),
|
||||
cacheMissWarningModulus(),
|
||||
initialLeaseTableReadCapacity(),
|
||||
initialLeaseTableWriteCapacity(),
|
||||
hierarchicalShardSyncer(),
|
||||
tableCreatorCallback(),
|
||||
dynamoDbRequestTimeout(),
|
||||
billingMode(),
|
||||
tags());
|
||||
leaseManagementFactory(new DynamoDBLeaseSerializer(), false);
|
||||
}
|
||||
return leaseManagementFactory;
|
||||
}
|
||||
|
|
@ -430,7 +474,6 @@ public class LeaseManagementConfig {
|
|||
cacheMissWarningModulus(),
|
||||
initialLeaseTableReadCapacity(),
|
||||
initialLeaseTableWriteCapacity(),
|
||||
hierarchicalShardSyncer(),
|
||||
tableCreatorCallback(),
|
||||
dynamoDbRequestTimeout(),
|
||||
billingMode(),
|
||||
|
|
@ -440,7 +483,9 @@ public class LeaseManagementConfig {
|
|||
leaseSerializer,
|
||||
customShardDetectorProvider(),
|
||||
isMultiStreamingMode,
|
||||
leaseCleanupConfig());
|
||||
leaseCleanupConfig(),
|
||||
workerUtilizationAwareAssignmentConfig(),
|
||||
gracefulLeaseHandoffConfig);
|
||||
}
|
||||
return leaseManagementFactory;
|
||||
}
|
||||
|
|
@ -454,4 +499,90 @@ public class LeaseManagementConfig {
|
|||
this.leaseManagementFactory = leaseManagementFactory;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Data
|
||||
@Accessors(fluent = true)
|
||||
public static class WorkerUtilizationAwareAssignmentConfig {
|
||||
/**
|
||||
* This defines the frequency of capturing worker metric stats in memory. Default is 1s
|
||||
*/
|
||||
private long inMemoryWorkerMetricsCaptureFrequencyMillis =
|
||||
Duration.ofSeconds(1L).toMillis();
|
||||
/**
|
||||
* This defines the frequency of reporting worker metric stats to storage. Default is 30s
|
||||
*/
|
||||
private long workerMetricsReporterFreqInMillis = Duration.ofSeconds(30).toMillis();
|
||||
/**
|
||||
* These are the no. of metrics that are persisted in storage in WorkerMetricStats ddb table.
|
||||
*/
|
||||
private int noOfPersistedMetricsPerWorkerMetrics = 10;
|
||||
/**
|
||||
* Option to disable workerMetrics to use in lease balancing.
|
||||
*/
|
||||
private boolean disableWorkerMetrics = false;
|
||||
/**
|
||||
* List of workerMetrics for the application.
|
||||
*/
|
||||
private List<WorkerMetric> workerMetricList = new ArrayList<>();
|
||||
/**
|
||||
* Max throughput per host KBps, default is unlimited.
|
||||
*/
|
||||
private double maxThroughputPerHostKBps = Double.MAX_VALUE;
|
||||
/**
|
||||
* Percentage of value to achieve critical dampening during this case
|
||||
*/
|
||||
private int dampeningPercentage = 60;
|
||||
/**
|
||||
* Percentage value used to trigger reBalance. If fleet has workers which are have metrics value more or less
|
||||
* than 10% of fleet level average then reBalance is triggered.
|
||||
* Leases are taken from workers with metrics value more than fleet level average. The load to take from these
|
||||
* workers is determined by evaluating how far they are with respect to fleet level average.
|
||||
*/
|
||||
private int reBalanceThresholdPercentage = 10;
|
||||
|
||||
/**
|
||||
* The allowThroughputOvershoot flag determines whether leases should still be taken even if
|
||||
* it causes the total assigned throughput to exceed the desired throughput to take for re-balance.
|
||||
* Enabling this flag provides more flexibility for the LeaseAssignmentManager to explore additional
|
||||
* assignment possibilities, which can lead to faster throughput convergence.
|
||||
*/
|
||||
private boolean allowThroughputOvershoot = true;
|
||||
|
||||
/**
|
||||
* Duration after which workerMetricStats entry from WorkerMetricStats table will be cleaned up. When an entry's
|
||||
* lastUpdateTime is older than staleWorkerMetricsEntryCleanupDuration from current time, entry will be removed
|
||||
* from the table.
|
||||
*/
|
||||
private Duration staleWorkerMetricsEntryCleanupDuration = Duration.ofDays(1);
|
||||
|
||||
/**
|
||||
* configuration to configure how to create the WorkerMetricStats table, such as table name,
|
||||
* billing mode, provisioned capacity. If no table name is specified, the table name will
|
||||
* default to applicationName-WorkerMetricStats. If no billing more is chosen, default is
|
||||
* On-Demand.
|
||||
*/
|
||||
private WorkerMetricsTableConfig workerMetricsTableConfig;
|
||||
|
||||
/**
|
||||
* Frequency to perform worker variance balancing. This value is used with respect to the LAM frequency,
|
||||
* that is every third (as default) iteration of LAM the worker variance balancing will be performed.
|
||||
* Setting it to 1 will make varianceBalancing run on every iteration of LAM and 2 on every 2nd iteration
|
||||
* and so on.
|
||||
* NOTE: LAM frequency = failoverTimeMillis
|
||||
*/
|
||||
private int varianceBalancingFrequency = 3;
|
||||
|
||||
/**
|
||||
* Alpha value used for calculating exponential moving average of worker's metricStats. Selecting
|
||||
* higher alpha value gives more weightage to recent value and thus low smoothing effect on computed average
|
||||
* and selecting smaller alpha values gives more weightage to past value and high smoothing effect.
|
||||
*/
|
||||
private double workerMetricsEMAAlpha = 0.5;
|
||||
}
|
||||
|
||||
public static class WorkerMetricsTableConfig extends DdbTableConfig {
|
||||
public WorkerMetricsTableConfig(final String applicationName) {
|
||||
super(applicationName, "WorkerMetricStats");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,9 +15,12 @@
|
|||
|
||||
package software.amazon.kinesis.leases;
|
||||
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
import software.amazon.kinesis.common.StreamConfig;
|
||||
import software.amazon.kinesis.coordinator.DeletedStreamListProvider;
|
||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher;
|
||||
import software.amazon.kinesis.lifecycle.ShardConsumer;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
|
||||
/**
|
||||
|
|
@ -26,10 +29,27 @@ import software.amazon.kinesis.metrics.MetricsFactory;
|
|||
public interface LeaseManagementFactory {
|
||||
LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory);
|
||||
|
||||
ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory);
|
||||
default LeaseCoordinator createLeaseCoordinator(
|
||||
MetricsFactory metricsFactory, ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated This method is never invoked, please remove implementation of this method
|
||||
* as it will be removed in future releases.
|
||||
*/
|
||||
@Deprecated
|
||||
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory) {
|
||||
throw new UnsupportedOperationException("Deprecated");
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated This method is never invoked, please remove implementation of this method
|
||||
* as it will be removed in future releases.
|
||||
*/
|
||||
@Deprecated
|
||||
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) {
|
||||
throw new UnsupportedOperationException();
|
||||
throw new UnsupportedOperationException("Deprecated");
|
||||
}
|
||||
|
||||
default ShardSyncTaskManager createShardSyncTaskManager(
|
||||
|
|
@ -41,10 +61,17 @@ public interface LeaseManagementFactory {
|
|||
|
||||
DynamoDBLeaseRefresher createLeaseRefresher();
|
||||
|
||||
ShardDetector createShardDetector();
|
||||
/**
|
||||
* @deprecated This method is never invoked, please remove implementation of this method
|
||||
* as it will be removed in future releases.
|
||||
*/
|
||||
@Deprecated
|
||||
default ShardDetector createShardDetector() {
|
||||
throw new UnsupportedOperationException("Deprecated");
|
||||
}
|
||||
|
||||
default ShardDetector createShardDetector(StreamConfig streamConfig) {
|
||||
throw new UnsupportedOperationException();
|
||||
throw new UnsupportedOperationException("Not implemented");
|
||||
}
|
||||
|
||||
LeaseCleanupManager createLeaseCleanupManager(MetricsFactory metricsFactory);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@
|
|||
package software.amazon.kinesis.leases;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import software.amazon.kinesis.common.StreamIdentifier;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
|
|
@ -75,6 +78,37 @@ public interface LeaseRefresher {
|
|||
*/
|
||||
boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutSeconds) throws DependencyException;
|
||||
|
||||
/**
|
||||
* Creates the LeaseOwnerToLeaseKey index on the lease table if it doesn't exist and returns the status of index.
|
||||
*
|
||||
* @return indexStatus status of the index.
|
||||
* @throws DependencyException if storage's describe API fails in an unexpected way
|
||||
*/
|
||||
default String createLeaseOwnerToLeaseKeyIndexIfNotExists() throws DependencyException {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Blocks until the index exists by polling storage till either the index is ACTIVE or else timeout has
|
||||
* happened.
|
||||
*
|
||||
* @param secondsBetweenPolls time to wait between polls in seconds
|
||||
* @param timeoutSeconds total time to wait in seconds
|
||||
*
|
||||
* @return true if index on the table exists and is ACTIVE, false if timeout was reached
|
||||
*/
|
||||
default boolean waitUntilLeaseOwnerToLeaseKeyIndexExists(
|
||||
final long secondsBetweenPolls, final long timeoutSeconds) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if leaseOwner GSI is ACTIVE
|
||||
* @return true if index is active, false otherwise
|
||||
* @throws DependencyException if storage's describe API fails in an unexpected way
|
||||
*/
|
||||
boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException;
|
||||
|
||||
/**
|
||||
* List all leases for a given stream synchronously.
|
||||
*
|
||||
|
|
@ -87,6 +121,24 @@ public interface LeaseRefresher {
|
|||
List<Lease> listLeasesForStream(StreamIdentifier streamIdentifier)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||
|
||||
/**
|
||||
* List all leases for a given workerIdentifier synchronously.
|
||||
* Default implementation calls listLeases() and filters the results.
|
||||
*
|
||||
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||
* @throws InvalidStateException if lease table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||
*
|
||||
* @return list of leases
|
||||
*/
|
||||
default List<String> listLeaseKeysForWorker(final String workerIdentifier)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
return listLeases().stream()
|
||||
.filter(lease -> lease.leaseOwner().equals(workerIdentifier))
|
||||
.map(Lease::leaseKey)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* List all objects in table synchronously.
|
||||
*
|
||||
|
|
@ -98,6 +150,23 @@ public interface LeaseRefresher {
|
|||
*/
|
||||
List<Lease> listLeases() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||
|
||||
/**
|
||||
* List all leases from the storage parallely and deserialize into Lease objects. Returns the list of leaseKey
|
||||
* that failed deserialize separately.
|
||||
*
|
||||
* @param threadPool threadpool to use for parallel scan
|
||||
* @param parallelismFactor no. of parallel scans
|
||||
* @return Pair of List of leases from the storage and List of items failed to deserialize
|
||||
* @throws DependencyException if DynamoDB scan fails in an unexpected way
|
||||
* @throws InvalidStateException if lease table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
|
||||
*/
|
||||
default Map.Entry<List<Lease>, List<String>> listLeasesParallely(
|
||||
final ExecutorService threadPool, final int parallelismFactor)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
throw new UnsupportedOperationException("listLeasesParallely is not implemented");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new lease. Conditional on a lease not already existing with this shardId.
|
||||
*
|
||||
|
|
@ -154,6 +223,47 @@ public interface LeaseRefresher {
|
|||
boolean takeLease(Lease lease, String owner)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
|
||||
|
||||
/**
|
||||
* Assigns given lease to newOwner owner by incrementing its leaseCounter and setting its owner field. Conditional
|
||||
* on the leaseOwner in DynamoDB matching the leaseOwner of the input lease. Mutates the leaseCounter and owner of
|
||||
* the passed-in lease object after updating DynamoDB.
|
||||
*
|
||||
* @param lease the lease to be assigned
|
||||
* @param newOwner the new owner
|
||||
*
|
||||
* @return true if lease was successfully assigned, false otherwise
|
||||
*
|
||||
* @throws InvalidStateException if lease table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
|
||||
* @throws DependencyException if DynamoDB update fails in an unexpected way
|
||||
*/
|
||||
default boolean assignLease(final Lease lease, final String newOwner)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
|
||||
throw new UnsupportedOperationException("assignLease is not implemented");
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiates a graceful handoff of the given lease to the specified new owner, allowing the current owner
|
||||
* to complete its processing before transferring ownership.
|
||||
* <p>
|
||||
* This method updates the lease with the new owner information but ensures that the current owner
|
||||
* is given time to gracefully finish its work (e.g., processing records) before the lease is reassigned.
|
||||
* </p>
|
||||
*
|
||||
* @param lease the lease to be assigned
|
||||
* @param newOwner the new owner
|
||||
* @return true if a graceful handoff was successfully initiated
|
||||
* @throws InvalidStateException if lease table does not exist
|
||||
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
|
||||
* @throws DependencyException if DynamoDB update fails in an unexpected way
|
||||
*/
|
||||
default boolean initiateGracefulLeaseHandoff(final Lease lease, final String newOwner)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
|
||||
throw new UnsupportedOperationException("assignLeaseWithWait is not implemented");
|
||||
}
|
||||
|
||||
/**
|
||||
* Evict the current owner of lease by setting owner to null. Conditional on the owner in DynamoDB matching the owner of
|
||||
* the input. Mutates the lease counter and owner of the passed-in lease object after updating the record in DynamoDB.
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
package software.amazon.kinesis.leases;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
||||
|
|
@ -100,6 +101,15 @@ public interface LeaseSerializer {
|
|||
*/
|
||||
Map<String, AttributeValueUpdate> getDynamoTakeLeaseUpdate(Lease lease, String newOwner);
|
||||
|
||||
/**
|
||||
* @param lease lease that needs to be assigned
|
||||
* @param newOwner newLeaseOwner
|
||||
* @return the attribute value map that takes a lease for a new owner
|
||||
*/
|
||||
default Map<String, AttributeValueUpdate> getDynamoAssignLeaseUpdate(Lease lease, String newOwner) {
|
||||
throw new UnsupportedOperationException("getDynamoAssignLeaseUpdate is not implemented");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param lease
|
||||
* @return the attribute value map that voids a lease
|
||||
|
|
@ -127,8 +137,22 @@ public interface LeaseSerializer {
|
|||
*/
|
||||
Collection<KeySchemaElement> getKeySchema();
|
||||
|
||||
default Collection<KeySchemaElement> getWorkerIdToLeaseKeyIndexKeySchema() {
|
||||
return Collections.EMPTY_LIST;
|
||||
}
|
||||
|
||||
default Collection<AttributeDefinition> getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
|
||||
return Collections.EMPTY_LIST;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return attribute definitions for creating a DynamoDB table to store leases
|
||||
*/
|
||||
Collection<AttributeDefinition> getAttributeDefinitions();
|
||||
|
||||
/**
|
||||
* @param lease
|
||||
* @return the attribute value map that includes lease throughput
|
||||
*/
|
||||
Map<String, AttributeValueUpdate> getDynamoLeaseThroughputKbpsUpdate(Lease lease);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,158 @@
|
|||
package software.amazon.kinesis.leases;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.ToString;
|
||||
import software.amazon.awssdk.annotations.ThreadSafe;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.utils.ExponentialMovingAverage;
|
||||
|
||||
import static java.util.Objects.isNull;
|
||||
|
||||
/**
|
||||
* This class records the stats for the leases.
|
||||
* The stats are recorded in a thread safe queue, and the throughput is calculated by summing up the bytes and dividing
|
||||
* by interval in seconds.
|
||||
* This class is thread safe and backed by thread safe data structures.
|
||||
*/
|
||||
@RequiredArgsConstructor
|
||||
@KinesisClientInternalApi
|
||||
@ThreadSafe
|
||||
public class LeaseStatsRecorder {
|
||||
|
||||
/**
|
||||
* This default alpha is chosen based on the testing so far between simple average and moving average with 0.5.
|
||||
* In the future, if one value does not fit all use cases, inject this via config.
|
||||
*/
|
||||
private static final double DEFAULT_ALPHA = 0.5;
|
||||
|
||||
public static final int BYTES_PER_KB = 1024;
|
||||
|
||||
private final Long renewerFrequencyInMillis;
|
||||
private final Map<String, Queue<LeaseStats>> leaseStatsMap = new ConcurrentHashMap<>();
|
||||
private final Map<String, ExponentialMovingAverage> leaseKeyToExponentialMovingAverageMap =
|
||||
new ConcurrentHashMap<>();
|
||||
private final Callable<Long> timeProviderInMillis;
|
||||
|
||||
/**
|
||||
* This method provides happens-before semantics (i.e., the action (access or removal) from a thread happens
|
||||
* before the action from subsequent thread) for the stats recording in multithreaded environment.
|
||||
*/
|
||||
public void recordStats(@NonNull final LeaseStats leaseStats) {
|
||||
final Queue<LeaseStats> leaseStatsQueue =
|
||||
leaseStatsMap.computeIfAbsent(leaseStats.getLeaseKey(), lease -> new ConcurrentLinkedQueue<>());
|
||||
leaseStatsQueue.add(leaseStats);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the throughput in KBps for the given leaseKey.
|
||||
* Method first clears the items that are older than {@link #renewerFrequencyInMillis} from the queue and then
|
||||
* calculates the throughput per second during {@link #renewerFrequencyInMillis} interval and then returns the
|
||||
* ExponentialMovingAverage of the throughput. If method is called in quick succession with or without new stats
|
||||
* the result can be different as ExponentialMovingAverage decays old values on every new call.
|
||||
* This method is thread safe.
|
||||
* @param leaseKey leaseKey for which stats are required
|
||||
* @return throughput in Kbps, returns null if there is no stats available for the leaseKey.
|
||||
*/
|
||||
public Double getThroughputKBps(final String leaseKey) {
|
||||
final Queue<LeaseStats> leaseStatsQueue = leaseStatsMap.get(leaseKey);
|
||||
|
||||
if (isNull(leaseStatsQueue)) {
|
||||
// This means there is no entry for this leaseKey yet
|
||||
return null;
|
||||
}
|
||||
|
||||
filterExpiredEntries(leaseStatsQueue);
|
||||
|
||||
// Convert bytes into KB and divide by interval in second to get throughput per second.
|
||||
final ExponentialMovingAverage exponentialMovingAverage = leaseKeyToExponentialMovingAverageMap.computeIfAbsent(
|
||||
leaseKey, leaseId -> new ExponentialMovingAverage(DEFAULT_ALPHA));
|
||||
|
||||
// Specifically dividing by 1000.0 rather than using Duration class to get seconds, because Duration class
|
||||
// implementation rounds off to seconds and precision is lost.
|
||||
final double frequency = renewerFrequencyInMillis / 1000.0;
|
||||
final double throughput = readQueue(leaseStatsQueue).stream()
|
||||
.mapToDouble(LeaseStats::getBytes)
|
||||
.sum()
|
||||
/ BYTES_PER_KB
|
||||
/ frequency;
|
||||
exponentialMovingAverage.add(throughput);
|
||||
return exponentialMovingAverage.getValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the currentTimeMillis and then iterates over the queue to get the stats with creation time less than
|
||||
* currentTimeMillis.
|
||||
* This is specifically done to avoid potential race between with high-frequency put thread blocking get thread.
|
||||
*/
|
||||
private Queue<LeaseStats> readQueue(final Queue<LeaseStats> leaseStatsQueue) {
|
||||
final long currentTimeMillis = getCurrenTimeInMillis();
|
||||
final Queue<LeaseStats> response = new LinkedList<>();
|
||||
for (LeaseStats leaseStats : leaseStatsQueue) {
|
||||
if (leaseStats.creationTimeMillis > currentTimeMillis) {
|
||||
break;
|
||||
}
|
||||
response.add(leaseStats);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
private long getCurrenTimeInMillis() {
|
||||
try {
|
||||
return timeProviderInMillis.call();
|
||||
} catch (final Exception e) {
|
||||
// Fallback to using the System.currentTimeMillis if failed.
|
||||
return System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
private void filterExpiredEntries(final Queue<LeaseStats> leaseStatsQueue) {
|
||||
final long currentTime = getCurrenTimeInMillis();
|
||||
while (!leaseStatsQueue.isEmpty()) {
|
||||
final LeaseStats leaseStats = leaseStatsQueue.peek();
|
||||
if (isNull(leaseStats) || currentTime - leaseStats.getCreationTimeMillis() < renewerFrequencyInMillis) {
|
||||
break;
|
||||
}
|
||||
leaseStatsQueue.poll();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the in-memory stats for the lease when a lease is reassigned (due to shut down or lease stealing)
|
||||
* @param leaseKey leaseKey, for which stats are supposed to be clear.
|
||||
*/
|
||||
public void dropLeaseStats(final String leaseKey) {
|
||||
leaseStatsMap.remove(leaseKey);
|
||||
leaseKeyToExponentialMovingAverageMap.remove(leaseKey);
|
||||
}
|
||||
|
||||
@Builder
|
||||
@Getter
|
||||
@ToString
|
||||
@KinesisClientInternalApi
|
||||
public static final class LeaseStats {
|
||||
/**
|
||||
* Lease key for which this leaseStats object is created.
|
||||
*/
|
||||
private final String leaseKey;
|
||||
/**
|
||||
* Bytes that are processed for a lease
|
||||
*/
|
||||
private final long bytes;
|
||||
/**
|
||||
* Wall time in epoch millis at which this leaseStats object was created. This time is used to determine the
|
||||
* expiry of the lease stats.
|
||||
*/
|
||||
@Builder.Default
|
||||
private final long creationTimeMillis = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
|
@ -71,7 +71,7 @@ public class ShardSyncTaskManager {
|
|||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* <p>NOTE: This constructor is deprecated and will be removed in a future release.</p>
|
||||
* @deprecated This constructor is deprecated and will be removed in a future release.
|
||||
*
|
||||
* @param shardDetector
|
||||
* @param leaseRefresher
|
||||
|
|
@ -92,18 +92,16 @@ public class ShardSyncTaskManager {
|
|||
long shardSyncIdleTimeMillis,
|
||||
ExecutorService executorService,
|
||||
MetricsFactory metricsFactory) {
|
||||
this.shardDetector = shardDetector;
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.initialPositionInStream = initialPositionInStream;
|
||||
this.cleanupLeasesUponShardCompletion = cleanupLeasesUponShardCompletion;
|
||||
this.garbageCollectLeases = true;
|
||||
this.ignoreUnexpectedChildShards = ignoreUnexpectedChildShards;
|
||||
this.shardSyncIdleTimeMillis = shardSyncIdleTimeMillis;
|
||||
this.executorService = executorService;
|
||||
this.hierarchicalShardSyncer = new HierarchicalShardSyncer();
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.shardSyncRequestPending = new AtomicBoolean(false);
|
||||
this.lock = new ReentrantLock();
|
||||
this(
|
||||
shardDetector,
|
||||
leaseRefresher,
|
||||
initialPositionInStream,
|
||||
cleanupLeasesUponShardCompletion,
|
||||
ignoreUnexpectedChildShards,
|
||||
shardSyncIdleTimeMillis,
|
||||
executorService,
|
||||
new HierarchicalShardSyncer(),
|
||||
metricsFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.LinkedTransferQueue;
|
||||
|
|
@ -30,13 +31,17 @@ import java.util.concurrent.TimeUnit;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||
import software.amazon.kinesis.leases.LeaseDiscoverer;
|
||||
import software.amazon.kinesis.leases.LeaseManagementConfig;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.LeaseRenewer;
|
||||
import software.amazon.kinesis.leases.LeaseStatsRecorder;
|
||||
import software.amazon.kinesis.leases.LeaseTaker;
|
||||
import software.amazon.kinesis.leases.MultiStreamLease;
|
||||
import software.amazon.kinesis.leases.ShardInfo;
|
||||
|
|
@ -44,6 +49,8 @@ import software.amazon.kinesis.leases.exceptions.DependencyException;
|
|||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.LeasingException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
import software.amazon.kinesis.lifecycle.LeaseGracefulShutdownHandler;
|
||||
import software.amazon.kinesis.lifecycle.ShardConsumer;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
|
|
@ -70,115 +77,34 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
.setNameFormat("LeaseRenewer-%04d")
|
||||
.setDaemon(true)
|
||||
.build();
|
||||
private static final ThreadFactory LEASE_DISCOVERY_THREAD_FACTORY = new ThreadFactoryBuilder()
|
||||
.setNameFormat("LeaseDiscovery-%04d")
|
||||
.setDaemon(true)
|
||||
.build();
|
||||
|
||||
private final LeaseRenewer leaseRenewer;
|
||||
private final LeaseTaker leaseTaker;
|
||||
private final LeaseDiscoverer leaseDiscoverer;
|
||||
private final long renewerIntervalMillis;
|
||||
private final long takerIntervalMillis;
|
||||
private final long leaseDiscovererIntervalMillis;
|
||||
private final ExecutorService leaseRenewalThreadpool;
|
||||
private final ExecutorService leaseDiscoveryThreadPool;
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
private final LeaseStatsRecorder leaseStatsRecorder;
|
||||
private final LeaseGracefulShutdownHandler leaseGracefulShutdownHandler;
|
||||
private long initialLeaseTableReadCapacity;
|
||||
private long initialLeaseTableWriteCapacity;
|
||||
protected final MetricsFactory metricsFactory;
|
||||
|
||||
private final Object shutdownLock = new Object();
|
||||
|
||||
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
|
||||
private ScheduledExecutorService leaseCoordinatorThreadPool;
|
||||
private ScheduledFuture<?> leaseDiscoveryFuture;
|
||||
private ScheduledFuture<?> takerFuture;
|
||||
|
||||
private volatile boolean running = false;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* <p>NOTE: This constructor is deprecated and will be removed in a future release.</p>
|
||||
*
|
||||
* @param leaseRefresher
|
||||
* LeaseRefresher instance to use
|
||||
* @param workerIdentifier
|
||||
* Identifies the worker (e.g. useful to track lease ownership)
|
||||
* @param leaseDurationMillis
|
||||
* Duration of a lease
|
||||
* @param epsilonMillis
|
||||
* Allow for some variance when calculating lease expirations
|
||||
* @param maxLeasesForWorker
|
||||
* Max leases this Worker can handle at a time
|
||||
* @param maxLeasesToStealAtOneTime
|
||||
* Steal up to these many leases at a time (for load balancing)
|
||||
* @param metricsFactory
|
||||
* Used to publish metrics about lease operations
|
||||
*/
|
||||
@Deprecated
|
||||
public DynamoDBLeaseCoordinator(
|
||||
final LeaseRefresher leaseRefresher,
|
||||
final String workerIdentifier,
|
||||
final long leaseDurationMillis,
|
||||
final long epsilonMillis,
|
||||
final int maxLeasesForWorker,
|
||||
final int maxLeasesToStealAtOneTime,
|
||||
final int maxLeaseRenewerThreadCount,
|
||||
final MetricsFactory metricsFactory) {
|
||||
this(
|
||||
leaseRefresher,
|
||||
workerIdentifier,
|
||||
leaseDurationMillis,
|
||||
epsilonMillis,
|
||||
maxLeasesForWorker,
|
||||
maxLeasesToStealAtOneTime,
|
||||
maxLeaseRenewerThreadCount,
|
||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
|
||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
|
||||
metricsFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param leaseRefresher
|
||||
* LeaseRefresher instance to use
|
||||
* @param workerIdentifier
|
||||
* Identifies the worker (e.g. useful to track lease ownership)
|
||||
* @param leaseDurationMillis
|
||||
* Duration of a lease
|
||||
* @param epsilonMillis
|
||||
* Allow for some variance when calculating lease expirations
|
||||
* @param maxLeasesForWorker
|
||||
* Max leases this Worker can handle at a time
|
||||
* @param maxLeasesToStealAtOneTime
|
||||
* Steal up to these many leases at a time (for load balancing)
|
||||
* @param initialLeaseTableReadCapacity
|
||||
* Initial dynamodb lease table read iops if creating the lease table
|
||||
* @param initialLeaseTableWriteCapacity
|
||||
* Initial dynamodb lease table write iops if creating the lease table
|
||||
* @param metricsFactory
|
||||
* Used to publish metrics about lease operations
|
||||
*/
|
||||
@Deprecated
|
||||
public DynamoDBLeaseCoordinator(
|
||||
final LeaseRefresher leaseRefresher,
|
||||
final String workerIdentifier,
|
||||
final long leaseDurationMillis,
|
||||
final long epsilonMillis,
|
||||
final int maxLeasesForWorker,
|
||||
final int maxLeasesToStealAtOneTime,
|
||||
final int maxLeaseRenewerThreadCount,
|
||||
final long initialLeaseTableReadCapacity,
|
||||
final long initialLeaseTableWriteCapacity,
|
||||
final MetricsFactory metricsFactory) {
|
||||
this(
|
||||
leaseRefresher,
|
||||
workerIdentifier,
|
||||
leaseDurationMillis,
|
||||
LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT,
|
||||
epsilonMillis,
|
||||
maxLeasesForWorker,
|
||||
maxLeasesToStealAtOneTime,
|
||||
maxLeaseRenewerThreadCount,
|
||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
|
||||
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
|
||||
metricsFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
|
|
@ -214,17 +140,35 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
final int maxLeaseRenewerThreadCount,
|
||||
final long initialLeaseTableReadCapacity,
|
||||
final long initialLeaseTableWriteCapacity,
|
||||
final MetricsFactory metricsFactory) {
|
||||
final MetricsFactory metricsFactory,
|
||||
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
|
||||
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig,
|
||||
final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.leaseRenewalThreadpool = getLeaseRenewalExecutorService(maxLeaseRenewerThreadCount);
|
||||
this.leaseRenewalThreadpool = createExecutorService(maxLeaseRenewerThreadCount, LEASE_RENEWAL_THREAD_FACTORY);
|
||||
this.leaseTaker = new DynamoDBLeaseTaker(leaseRefresher, workerIdentifier, leaseDurationMillis, metricsFactory)
|
||||
.withMaxLeasesForWorker(maxLeasesForWorker)
|
||||
.withMaxLeasesToStealAtOneTime(maxLeasesToStealAtOneTime)
|
||||
.withEnablePriorityLeaseAssignment(enablePriorityLeaseAssignment);
|
||||
this.leaseRenewer = new DynamoDBLeaseRenewer(
|
||||
leaseRefresher, workerIdentifier, leaseDurationMillis, leaseRenewalThreadpool, metricsFactory);
|
||||
this.renewerIntervalMillis = getRenewerTakerIntervalMillis(leaseDurationMillis, epsilonMillis);
|
||||
this.takerIntervalMillis = (leaseDurationMillis + epsilonMillis) * 2;
|
||||
// Should run once every leaseDurationMillis to identify new leases before expiry.
|
||||
this.leaseDiscovererIntervalMillis = leaseDurationMillis - epsilonMillis;
|
||||
this.leaseStatsRecorder = new LeaseStatsRecorder(renewerIntervalMillis, System::currentTimeMillis);
|
||||
this.leaseGracefulShutdownHandler = LeaseGracefulShutdownHandler.create(
|
||||
gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis(), shardInfoShardConsumerMap, this);
|
||||
this.leaseRenewer = new DynamoDBLeaseRenewer(
|
||||
leaseRefresher,
|
||||
workerIdentifier,
|
||||
leaseDurationMillis,
|
||||
leaseRenewalThreadpool,
|
||||
metricsFactory,
|
||||
leaseStatsRecorder,
|
||||
leaseGracefulShutdownHandler::enqueueShutdown);
|
||||
this.leaseDiscoveryThreadPool =
|
||||
createExecutorService(maxLeaseRenewerThreadCount, LEASE_DISCOVERY_THREAD_FACTORY);
|
||||
this.leaseDiscoverer = new DynamoDBLeaseDiscoverer(
|
||||
this.leaseRefresher, this.leaseRenewer, metricsFactory, workerIdentifier, leaseDiscoveryThreadPool);
|
||||
if (initialLeaseTableReadCapacity <= 0) {
|
||||
throw new IllegalArgumentException("readCapacity should be >= 1");
|
||||
}
|
||||
|
|
@ -234,6 +178,7 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
}
|
||||
this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity;
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
|
||||
|
||||
log.info(
|
||||
"With failover time {} ms and epsilon {} ms, LeaseCoordinator will renew leases every {} ms, take"
|
||||
|
|
@ -246,11 +191,49 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
maxLeasesToStealAtOneTime);
|
||||
}
|
||||
|
||||
private class TakerRunnable implements Runnable {
|
||||
@RequiredArgsConstructor
|
||||
private class LeaseDiscoveryRunnable implements Runnable {
|
||||
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
// LeaseDiscoverer is run in WORKER_UTILIZATION_AWARE_ASSIGNMENT mode only
|
||||
synchronized (shutdownLock) {
|
||||
if (!leaseAssignmentModeProvider
|
||||
.getLeaseAssignmentMode()
|
||||
.equals(
|
||||
MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
|
||||
.WORKER_UTILIZATION_AWARE_ASSIGNMENT)) {
|
||||
return;
|
||||
}
|
||||
if (running) {
|
||||
leaseRenewer.addLeasesToRenew(leaseDiscoverer.discoverNewLeases());
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to execute lease discovery", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@RequiredArgsConstructor
|
||||
private class TakerRunnable implements Runnable {
|
||||
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
// LeaseTaker is run in DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT mode only
|
||||
synchronized (shutdownLock) {
|
||||
if (!leaseAssignmentModeProvider
|
||||
.getLeaseAssignmentMode()
|
||||
.equals(
|
||||
MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
|
||||
.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
runLeaseTaker();
|
||||
} catch (LeasingException e) {
|
||||
log.error("LeasingException encountered in lease taking thread", e);
|
||||
|
|
@ -290,18 +273,35 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
public void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
|
||||
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
leaseRenewer.initialize();
|
||||
// At max, we need 3 threads - lease renewer, lease taker, lease discoverer - to run without contention.
|
||||
leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(3, LEASE_COORDINATOR_THREAD_FACTORY);
|
||||
|
||||
// 2 because we know we'll have at most 2 concurrent tasks at a time.
|
||||
leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(2, LEASE_COORDINATOR_THREAD_FACTORY);
|
||||
// During migration to KCLv3.x from KCLv2.x, lease assignment mode can change dynamically, so
|
||||
// both lease assignment algorithms will be started but only one will execute based on
|
||||
// leaseAssignmentModeProvider.getLeaseAssignmentMode(). However for new applications starting in
|
||||
// KCLv3.x or applications successfully migrated to KCLv3.x, lease assignment mode will not
|
||||
// change dynamically and will always be WORKER_UTILIZATION_AWARE_ASSIGNMENT, therefore
|
||||
// don't initialize KCLv2.x lease assignment algorithm components that are not needed.
|
||||
if (leaseAssignmentModeProvider.dynamicModeChangeSupportNeeded()) {
|
||||
// Taker runs with fixed DELAY because we want it to run slower in the event of performance degradation.
|
||||
takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
|
||||
new TakerRunnable(leaseAssignmentModeProvider), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
// Taker runs with fixed DELAY because we want it to run slower in the event of performance degredation.
|
||||
takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
|
||||
new TakerRunnable(), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
|
||||
// Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degredation.
|
||||
leaseDiscoveryFuture = leaseCoordinatorThreadPool.scheduleAtFixedRate(
|
||||
new LeaseDiscoveryRunnable(leaseAssignmentModeProvider),
|
||||
0L,
|
||||
leaseDiscovererIntervalMillis,
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
// Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degradation.
|
||||
leaseCoordinatorThreadPool.scheduleAtFixedRate(
|
||||
new RenewerRunnable(), 0L, renewerIntervalMillis, TimeUnit.MILLISECONDS);
|
||||
|
||||
leaseGracefulShutdownHandler.start();
|
||||
running = true;
|
||||
}
|
||||
|
||||
|
|
@ -383,6 +383,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
}
|
||||
|
||||
leaseRenewalThreadpool.shutdownNow();
|
||||
leaseCoordinatorThreadPool.shutdownNow();
|
||||
leaseGracefulShutdownHandler.stop();
|
||||
synchronized (shutdownLock) {
|
||||
leaseRenewer.clearCurrentlyHeldLeases();
|
||||
running = false;
|
||||
|
|
@ -393,6 +395,10 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
public void stopLeaseTaker() {
|
||||
if (takerFuture != null) {
|
||||
takerFuture.cancel(false);
|
||||
leaseDiscoveryFuture.cancel(false);
|
||||
// the method is called in worker graceful shutdown. We want to stop any further lease shutdown
|
||||
// so we don't interrupt worker shutdown.
|
||||
leaseGracefulShutdownHandler.stop();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -418,20 +424,15 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns executor service that should be used for lease renewal.
|
||||
* Returns executor service for given ThreadFactory.
|
||||
* @param maximumPoolSize Maximum allowed thread pool size
|
||||
* @return Executor service that should be used for lease renewal.
|
||||
* @return Executor service
|
||||
*/
|
||||
private static ExecutorService getLeaseRenewalExecutorService(int maximumPoolSize) {
|
||||
private static ExecutorService createExecutorService(final int maximumPoolSize, final ThreadFactory threadFactory) {
|
||||
int coreLeaseCount = Math.max(maximumPoolSize / 4, 2);
|
||||
|
||||
return new ThreadPoolExecutor(
|
||||
coreLeaseCount,
|
||||
maximumPoolSize,
|
||||
60,
|
||||
TimeUnit.SECONDS,
|
||||
new LinkedTransferQueue<>(),
|
||||
LEASE_RENEWAL_THREAD_FACTORY);
|
||||
coreLeaseCount, maximumPoolSize, 60, TimeUnit.SECONDS, new LinkedTransferQueue<>(), threadFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -472,6 +473,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
* {@inheritDoc}
|
||||
*
|
||||
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
|
||||
*
|
||||
* This is a method of the public lease coordinator interface.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
|
|
@ -487,6 +490,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
* {@inheritDoc}
|
||||
*
|
||||
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
|
||||
*
|
||||
* This is a method of the public lease coordinator interface.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
|
|
@ -497,4 +502,9 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
|
|||
initialLeaseTableWriteCapacity = writeCapacity;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeaseStatsRecorder leaseStatsRecorder() {
|
||||
return leaseStatsRecorder;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,120 @@
|
|||
package software.amazon.kinesis.leases.dynamodb;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseDiscoverer;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.LeaseRenewer;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.metrics.MetricsLevel;
|
||||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static java.util.Objects.isNull;
|
||||
|
||||
/**
|
||||
* An implementation of {@link LeaseDiscoverer}, it uses {@link LeaseRefresher} to query
|
||||
* {@link DynamoDBLeaseRefresher#LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME } and find the leases assigned
|
||||
* to current worker and then filter and returns the leases that have not started processing (looks at
|
||||
* {@link LeaseRenewer#getCurrentlyHeldLeases()} to find out which leases are currently held leases).
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class DynamoDBLeaseDiscoverer implements LeaseDiscoverer {
|
||||
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
private final LeaseRenewer leaseRenewer;
|
||||
private final MetricsFactory metricsFactory;
|
||||
private final String workerIdentifier;
|
||||
private final ExecutorService executorService;
|
||||
|
||||
@Override
|
||||
public List<Lease> discoverNewLeases()
|
||||
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||
final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(metricsFactory, "LeaseDiscovery");
|
||||
long startTime = System.currentTimeMillis();
|
||||
boolean success = false;
|
||||
try {
|
||||
final Set<String> currentHeldLeaseKeys =
|
||||
leaseRenewer.getCurrentlyHeldLeases().keySet();
|
||||
|
||||
final long listLeaseKeysForWorkerStartTime = System.currentTimeMillis();
|
||||
final List<String> leaseKeys = leaseRefresher.listLeaseKeysForWorker(workerIdentifier);
|
||||
MetricsUtil.addLatency(
|
||||
metricsScope, "ListLeaseKeysForWorker", listLeaseKeysForWorkerStartTime, MetricsLevel.DETAILED);
|
||||
|
||||
final List<String> newLeaseKeys = leaseKeys.stream()
|
||||
.filter(leaseKey -> !currentHeldLeaseKeys.contains(leaseKey))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
final long fetchNewLeasesStartTime = System.currentTimeMillis();
|
||||
final List<CompletableFuture<Lease>> completableFutures = newLeaseKeys.stream()
|
||||
.map(leaseKey ->
|
||||
CompletableFuture.supplyAsync(() -> fetchLease(leaseKey, metricsScope), executorService))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
final List<Lease> newLeases = completableFutures.stream()
|
||||
.map(CompletableFuture::join)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
log.info(
|
||||
"New leases assigned to worker : {}, count : {}, leases : {}",
|
||||
workerIdentifier,
|
||||
newLeases.size(),
|
||||
newLeases.stream().map(Lease::leaseKey).collect(Collectors.toList()));
|
||||
|
||||
MetricsUtil.addLatency(metricsScope, "FetchNewLeases", fetchNewLeasesStartTime, MetricsLevel.DETAILED);
|
||||
|
||||
success = true;
|
||||
MetricsUtil.addCount(metricsScope, "NewLeasesDiscovered", newLeases.size(), MetricsLevel.DETAILED);
|
||||
return newLeases;
|
||||
} finally {
|
||||
MetricsUtil.addWorkerIdentifier(metricsScope, workerIdentifier);
|
||||
MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
|
||||
MetricsUtil.endScope(metricsScope);
|
||||
}
|
||||
}
|
||||
|
||||
private Lease fetchLease(final String leaseKey, final MetricsScope metricsScope) {
|
||||
try {
|
||||
final Lease lease = leaseRefresher.getLease(leaseKey);
|
||||
if (isNull(lease)) {
|
||||
return null;
|
||||
}
|
||||
// GSI is eventually consistent thus, validate that the fetched lease is indeed assigned to this
|
||||
// worker, if not just pass in this run.
|
||||
if (!lease.leaseOwner().equals(workerIdentifier)) {
|
||||
MetricsUtil.addCount(metricsScope, "OwnerMismatch", 1, MetricsLevel.DETAILED);
|
||||
return null;
|
||||
}
|
||||
// if checkpointOwner is not null, it means that the lease is still pending shutdown for the last owner.
|
||||
// Don't add the lease to the in-memory map yet.
|
||||
if (lease.checkpointOwner() != null) {
|
||||
return null;
|
||||
}
|
||||
// when a new lease is discovered, set the lastCounterIncrementNanos to current time as the time
|
||||
// when it has become visible, on next renewer interval this will be updated by LeaseRenewer to
|
||||
// correct time.
|
||||
lease.lastCounterIncrementNanos(System.nanoTime());
|
||||
return lease;
|
||||
} catch (final Exception e) {
|
||||
// if getLease on some lease key fail, continue and fetch other leases, the one failed will
|
||||
// be fetched in the next iteration or will be reassigned if stayed idle for long.
|
||||
MetricsUtil.addCount(metricsScope, "GetLease:Error", 1, MetricsLevel.SUMMARY);
|
||||
log.error("GetLease failed for leaseKey : {}", leaseKey, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -14,6 +14,8 @@
|
|||
*/
|
||||
package software.amazon.kinesis.leases.dynamodb;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.RoundingMode;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -26,8 +28,10 @@ import java.util.concurrent.ConcurrentNavigableMap;
|
|||
import java.util.concurrent.ConcurrentSkipListMap;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
|
@ -39,6 +43,7 @@ import software.amazon.kinesis.common.StreamIdentifier;
|
|||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseRefresher;
|
||||
import software.amazon.kinesis.leases.LeaseRenewer;
|
||||
import software.amazon.kinesis.leases.LeaseStatsRecorder;
|
||||
import software.amazon.kinesis.leases.MultiStreamLease;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
|
|
@ -48,21 +53,32 @@ import software.amazon.kinesis.metrics.MetricsLevel;
|
|||
import software.amazon.kinesis.metrics.MetricsScope;
|
||||
import software.amazon.kinesis.metrics.MetricsUtil;
|
||||
|
||||
import static java.util.Objects.nonNull;
|
||||
import static software.amazon.kinesis.leases.LeaseStatsRecorder.BYTES_PER_KB;
|
||||
|
||||
/**
|
||||
* An implementation of {@link LeaseRenewer} that uses DynamoDB via {@link LeaseRefresher}.
|
||||
*/
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
||||
|
||||
/**
|
||||
* 6 digit after decimal gives the granularity of 0.001 byte per second.
|
||||
*/
|
||||
private static final int DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL = 6;
|
||||
|
||||
private static final int RENEWAL_RETRIES = 2;
|
||||
private static final String RENEW_ALL_LEASES_DIMENSION = "RenewAllLeases";
|
||||
private static final String LEASE_RENEWER_INITIALIZE = "LeaseRenewerInitialize";
|
||||
|
||||
private final LeaseRefresher leaseRefresher;
|
||||
private final String workerIdentifier;
|
||||
private final long leaseDurationNanos;
|
||||
private final ExecutorService executorService;
|
||||
private final MetricsFactory metricsFactory;
|
||||
|
||||
private final LeaseStatsRecorder leaseStatsRecorder;
|
||||
private final Consumer<Lease> leaseGracefulShutdownCallback;
|
||||
private final ConcurrentNavigableMap<String, Lease> ownedLeases = new ConcurrentSkipListMap<>();
|
||||
|
||||
/**
|
||||
|
|
@ -82,12 +98,16 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
|||
final String workerIdentifier,
|
||||
final long leaseDurationMillis,
|
||||
final ExecutorService executorService,
|
||||
final MetricsFactory metricsFactory) {
|
||||
final MetricsFactory metricsFactory,
|
||||
final LeaseStatsRecorder leaseStatsRecorder,
|
||||
final Consumer<Lease> leaseGracefulShutdownCallback) {
|
||||
this.leaseRefresher = leaseRefresher;
|
||||
this.workerIdentifier = workerIdentifier;
|
||||
this.leaseDurationNanos = TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis);
|
||||
this.executorService = executorService;
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.leaseStatsRecorder = leaseStatsRecorder;
|
||||
this.leaseGracefulShutdownCallback = leaseGracefulShutdownCallback;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -187,11 +207,21 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
|||
// ShutdownException).
|
||||
boolean isLeaseExpired = lease.isExpired(leaseDurationNanos, System.nanoTime());
|
||||
if (renewEvenIfExpired || !isLeaseExpired) {
|
||||
final Double throughputPerKBps = this.leaseStatsRecorder.getThroughputKBps(leaseKey);
|
||||
if (nonNull(throughputPerKBps)) {
|
||||
lease.throughputKBps(BigDecimal.valueOf(throughputPerKBps)
|
||||
.setScale(DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL, RoundingMode.HALF_UP)
|
||||
.doubleValue());
|
||||
}
|
||||
renewedLease = leaseRefresher.renewLease(lease);
|
||||
}
|
||||
if (renewedLease) {
|
||||
lease.lastCounterIncrementNanos(System.nanoTime());
|
||||
}
|
||||
if (lease.shutdownRequested()) {
|
||||
// the underlying function will dedup
|
||||
leaseGracefulShutdownCallback.accept(lease.copy());
|
||||
}
|
||||
}
|
||||
|
||||
if (renewedLease) {
|
||||
|
|
@ -391,6 +421,12 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
|||
* every time we acquire a lease, it gets a new concurrency token.
|
||||
*/
|
||||
authoritativeLease.concurrencyToken(UUID.randomUUID());
|
||||
if (nonNull(lease.throughputKBps())) {
|
||||
leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder()
|
||||
.leaseKey(lease.leaseKey())
|
||||
.bytes(Math.round(lease.throughputKBps() * BYTES_PER_KB)) // Convert KB to Bytes
|
||||
.build());
|
||||
}
|
||||
ownedLeases.put(authoritativeLease.leaseKey(), authoritativeLease);
|
||||
}
|
||||
}
|
||||
|
|
@ -409,6 +445,7 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
|||
*/
|
||||
@Override
|
||||
public void dropLease(Lease lease) {
|
||||
leaseStatsRecorder.dropLeaseStats(lease.leaseKey());
|
||||
ownedLeases.remove(lease.leaseKey());
|
||||
}
|
||||
|
||||
|
|
@ -417,26 +454,48 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
|
|||
*/
|
||||
@Override
|
||||
public void initialize() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
|
||||
Collection<Lease> leases = leaseRefresher.listLeases();
|
||||
List<Lease> myLeases = new LinkedList<>();
|
||||
boolean renewEvenIfExpired = true;
|
||||
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, LEASE_RENEWER_INITIALIZE);
|
||||
final ExecutorService singleThreadExecutorService = Executors.newSingleThreadExecutor();
|
||||
boolean success = false;
|
||||
try {
|
||||
final Map.Entry<List<Lease>, List<String>> response =
|
||||
leaseRefresher.listLeasesParallely(singleThreadExecutorService, 1);
|
||||
|
||||
for (Lease lease : leases) {
|
||||
if (workerIdentifier.equals(lease.leaseOwner())) {
|
||||
log.info(" Worker {} found lease {}", workerIdentifier, lease);
|
||||
// Okay to renew even if lease is expired, because we start with an empty list and we add the lease to
|
||||
// our list only after a successful renew. So we don't need to worry about the edge case where we could
|
||||
// continue renewing a lease after signaling a lease loss to the application.
|
||||
|
||||
if (renewLease(lease, renewEvenIfExpired)) {
|
||||
myLeases.add(lease);
|
||||
}
|
||||
} else {
|
||||
log.debug("Worker {} ignoring lease {} ", workerIdentifier, lease);
|
||||
if (!response.getValue().isEmpty()) {
|
||||
log.warn("List of leaseKeys failed to deserialize : {} ", response.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
addLeasesToRenew(myLeases);
|
||||
final List<Lease> myLeases = new LinkedList<>();
|
||||
boolean renewEvenIfExpired = true;
|
||||
|
||||
for (Lease lease : response.getKey()) {
|
||||
if (workerIdentifier.equals(lease.leaseOwner())) {
|
||||
log.info(" Worker {} found lease {}", workerIdentifier, lease);
|
||||
// Okay to renew even if lease is expired, because we start with an empty list and we add the lease
|
||||
// to
|
||||
// our list only after a successful renew. So we don't need to worry about the edge case where we
|
||||
// could
|
||||
// continue renewing a lease after signaling a lease loss to the application.
|
||||
|
||||
if (renewLease(lease, renewEvenIfExpired)) {
|
||||
myLeases.add(lease);
|
||||
}
|
||||
} else {
|
||||
log.debug("Worker {} ignoring lease {} ", workerIdentifier, lease);
|
||||
}
|
||||
}
|
||||
|
||||
addLeasesToRenew(myLeases);
|
||||
success = true;
|
||||
} catch (final Exception e) {
|
||||
// It's ok to swollow exception here fail to discover all leases here, as the assignment logic takes
|
||||
// care of reassignment if some lease is expired.
|
||||
log.warn("LeaseRefresher failed in initialization during renewing of pre assigned leases", e);
|
||||
} finally {
|
||||
singleThreadExecutorService.shutdown();
|
||||
MetricsUtil.addCount(scope, "Fault", success ? 0 : 1, MetricsLevel.DETAILED);
|
||||
MetricsUtil.endScope(scope);
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyNotNull(Object object, String message) {
|
||||
|
|
|
|||
|
|
@ -44,11 +44,8 @@ import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
|||
*/
|
||||
@KinesisClientInternalApi
|
||||
public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
||||
private static final String LEASE_KEY_KEY = "leaseKey";
|
||||
private static final String LEASE_OWNER_KEY = "leaseOwner";
|
||||
private static final String LEASE_COUNTER_KEY = "leaseCounter";
|
||||
private static final String OWNER_SWITCHES_KEY = "ownerSwitchesSinceCheckpoint";
|
||||
private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint";
|
||||
private static final String CHECKPOINT_SUBSEQUENCE_NUMBER_KEY = "checkpointSubSequenceNumber";
|
||||
private static final String PENDING_CHECKPOINT_SEQUENCE_KEY = "pendingCheckpoint";
|
||||
private static final String PENDING_CHECKPOINT_SUBSEQUENCE_KEY = "pendingCheckpointSubSequenceNumber";
|
||||
|
|
@ -57,6 +54,11 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
private static final String CHILD_SHARD_IDS_KEY = "childShardIds";
|
||||
private static final String STARTING_HASH_KEY = "startingHashKey";
|
||||
private static final String ENDING_HASH_KEY = "endingHashKey";
|
||||
private static final String THROUGHOUT_PUT_KBPS = "throughputKBps";
|
||||
private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint";
|
||||
static final String CHECKPOINT_OWNER = "checkpointOwner";
|
||||
static final String LEASE_OWNER_KEY = "leaseOwner";
|
||||
static final String LEASE_KEY_KEY = "leaseKey";
|
||||
|
||||
@Override
|
||||
public Map<String, AttributeValue> toDynamoRecord(final Lease lease) {
|
||||
|
|
@ -110,6 +112,13 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
lease.hashKeyRangeForLease().serializedEndingHashKey()));
|
||||
}
|
||||
|
||||
if (lease.throughputKBps() != null) {
|
||||
result.put(THROUGHOUT_PUT_KBPS, DynamoUtils.createAttributeValue(lease.throughputKBps()));
|
||||
}
|
||||
|
||||
if (lease.checkpointOwner() != null) {
|
||||
result.put(CHECKPOINT_OWNER, DynamoUtils.createAttributeValue(lease.checkpointOwner()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -146,6 +155,14 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
leaseToUpdate.hashKeyRange(HashKeyRangeForLease.deserialize(startingHashKey, endingHashKey));
|
||||
}
|
||||
|
||||
if (DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS) != null) {
|
||||
leaseToUpdate.throughputKBps(DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS));
|
||||
}
|
||||
|
||||
if (DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER) != null) {
|
||||
leaseToUpdate.checkpointOwner(DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER));
|
||||
}
|
||||
|
||||
return leaseToUpdate;
|
||||
}
|
||||
|
||||
|
|
@ -181,18 +198,9 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
|
||||
@Override
|
||||
public Map<String, ExpectedAttributeValue> getDynamoLeaseOwnerExpectation(final Lease lease) {
|
||||
Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||
|
||||
ExpectedAttributeValue.Builder eavBuilder = ExpectedAttributeValue.builder();
|
||||
|
||||
if (lease.leaseOwner() == null) {
|
||||
eavBuilder = eavBuilder.exists(false);
|
||||
} else {
|
||||
eavBuilder = eavBuilder.value(DynamoUtils.createAttributeValue(lease.leaseOwner()));
|
||||
}
|
||||
|
||||
result.put(LEASE_OWNER_KEY, eavBuilder.build());
|
||||
|
||||
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
|
||||
result.put(LEASE_OWNER_KEY, buildExpectedAttributeValueIfExistsOrValue(lease.leaseOwner()));
|
||||
result.put(CHECKPOINT_OWNER, buildExpectedAttributeValueIfExistsOrValue(lease.checkpointOwner()));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -247,9 +255,17 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
.value(DynamoUtils.createAttributeValue(owner))
|
||||
.action(AttributeAction.PUT)
|
||||
.build());
|
||||
// this method is currently used by assignLease and takeLease. In both case we want the checkpoint owner to be
|
||||
// deleted as this is a fresh assignment
|
||||
result.put(
|
||||
CHECKPOINT_OWNER,
|
||||
AttributeValueUpdate.builder().action(AttributeAction.DELETE).build());
|
||||
|
||||
String oldOwner = lease.leaseOwner();
|
||||
if (oldOwner != null && !oldOwner.equals(owner)) {
|
||||
String checkpointOwner = lease.checkpointOwner();
|
||||
// if checkpoint owner is not null, this update is supposed to remove the checkpoint owner
|
||||
// and transfer the lease ownership to the leaseOwner so incrementing the owner switch key
|
||||
if (oldOwner != null && !oldOwner.equals(owner) || (checkpointOwner != null && checkpointOwner.equals(owner))) {
|
||||
result.put(
|
||||
OWNER_SWITCHES_KEY,
|
||||
AttributeValueUpdate.builder()
|
||||
|
|
@ -261,18 +277,38 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* AssignLease performs the PUT action on the LeaseOwner and ADD (1) action on the leaseCounter.
|
||||
* @param lease lease that needs to be assigned
|
||||
* @param newOwner newLeaseOwner
|
||||
* @return Map of AttributeName to update operation
|
||||
*/
|
||||
@Override
|
||||
public Map<String, AttributeValueUpdate> getDynamoAssignLeaseUpdate(final Lease lease, final String newOwner) {
|
||||
Map<String, AttributeValueUpdate> result = getDynamoTakeLeaseUpdate(lease, newOwner);
|
||||
|
||||
result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, AttributeValueUpdate> getDynamoEvictLeaseUpdate(final Lease lease) {
|
||||
Map<String, AttributeValueUpdate> result = new HashMap<>();
|
||||
AttributeValue value = null;
|
||||
|
||||
final Map<String, AttributeValueUpdate> result = new HashMap<>();
|
||||
// if checkpointOwner is not null, it means lease handoff is initiated. In this case we just remove the
|
||||
// checkpoint owner so the next owner (leaseOwner) can pick up the lease without waiting for assignment.
|
||||
// Otherwise, remove the leaseOwner
|
||||
if (lease.checkpointOwner() == null) {
|
||||
result.put(
|
||||
LEASE_OWNER_KEY,
|
||||
AttributeValueUpdate.builder()
|
||||
.action(AttributeAction.DELETE)
|
||||
.build());
|
||||
}
|
||||
// We always want to remove checkpointOwner, it's ok even if it's null
|
||||
result.put(
|
||||
LEASE_OWNER_KEY,
|
||||
AttributeValueUpdate.builder()
|
||||
.value(value)
|
||||
.action(AttributeAction.DELETE)
|
||||
.build());
|
||||
|
||||
CHECKPOINT_OWNER,
|
||||
AttributeValueUpdate.builder().action(AttributeAction.DELETE).build());
|
||||
result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -394,4 +430,58 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
|
|||
|
||||
return definitions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<KeySchemaElement> getWorkerIdToLeaseKeyIndexKeySchema() {
|
||||
final List<KeySchemaElement> keySchema = new ArrayList<>();
|
||||
keySchema.add(KeySchemaElement.builder()
|
||||
.attributeName(LEASE_OWNER_KEY)
|
||||
.keyType(KeyType.HASH)
|
||||
.build());
|
||||
keySchema.add(KeySchemaElement.builder()
|
||||
.attributeName(LEASE_KEY_KEY)
|
||||
.keyType(KeyType.RANGE)
|
||||
.build());
|
||||
return keySchema;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<AttributeDefinition> getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
|
||||
final List<AttributeDefinition> definitions = new ArrayList<>();
|
||||
definitions.add(AttributeDefinition.builder()
|
||||
.attributeName(LEASE_OWNER_KEY)
|
||||
.attributeType(ScalarAttributeType.S)
|
||||
.build());
|
||||
definitions.add(AttributeDefinition.builder()
|
||||
.attributeName(LEASE_KEY_KEY)
|
||||
.attributeType(ScalarAttributeType.S)
|
||||
.build());
|
||||
return definitions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, AttributeValueUpdate> getDynamoLeaseThroughputKbpsUpdate(Lease lease) {
|
||||
final Map<String, AttributeValueUpdate> result = new HashMap<>();
|
||||
final AttributeValueUpdate avu = AttributeValueUpdate.builder()
|
||||
.value(DynamoUtils.createAttributeValue(lease.throughputKBps()))
|
||||
.action(AttributeAction.PUT)
|
||||
.build();
|
||||
result.put(THROUGHOUT_PUT_KBPS, avu);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static ExpectedAttributeValue buildExpectedAttributeValueIfExistsOrValue(String value) {
|
||||
return value == null
|
||||
? ExpectedAttributeValue.builder().exists(false).build()
|
||||
: ExpectedAttributeValue.builder()
|
||||
.value(DynamoUtils.createAttributeValue(value))
|
||||
.build();
|
||||
}
|
||||
|
||||
private static AttributeValueUpdate getAttributeValueUpdateForAdd() {
|
||||
return AttributeValueUpdate.builder()
|
||||
.value(DynamoUtils.createAttributeValue(1L))
|
||||
.action(AttributeAction.ADD)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,15 +106,6 @@ public class DynamoDBLeaseTaker implements LeaseTaker {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Misspelled method, use {@link DynamoDBLeaseTaker#withVeryOldLeaseDurationNanosMultiplier(int)}
|
||||
*/
|
||||
@Deprecated
|
||||
public DynamoDBLeaseTaker withVeryOldLeaseDurationNanosMultipler(long veryOldLeaseDurationNanosMultipler) {
|
||||
this.veryOldLeaseDurationNanosMultiplier = (int) veryOldLeaseDurationNanosMultipler;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overrides the default very old lease duration nanos multiplier to increase the threshold for taking very old leases.
|
||||
* Setting this to a higher value than 3 will increase the threshold for very old lease taking.
|
||||
|
|
|
|||
|
|
@ -266,7 +266,8 @@ class ConsumerStates {
|
|||
argument.idleTimeInMilliseconds(),
|
||||
argument.aggregatorUtil(),
|
||||
argument.metricsFactory(),
|
||||
argument.schemaRegistryDecoder());
|
||||
argument.schemaRegistryDecoder(),
|
||||
argument.leaseCoordinator().leaseStatsRecorder());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -336,7 +337,8 @@ class ConsumerStates {
|
|||
argument.shardRecordProcessor(),
|
||||
argument.recordProcessorCheckpointer(),
|
||||
consumer.shutdownNotification(),
|
||||
argument.shardInfo());
|
||||
argument.shardInfo(),
|
||||
consumer.shardConsumerArgument().leaseCoordinator());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -0,0 +1,213 @@
|
|||
package software.amazon.kinesis.lifecycle;
|
||||
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||
import software.amazon.kinesis.leases.ShardInfo;
|
||||
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
|
||||
/**
|
||||
* This class handles the graceful shutdown of shard consumers. When a lease is requested for shutdown, it will be
|
||||
* enqueued from the lease renewal thread which will call the shard consumer of the lease to enqueue a shutdown request.
|
||||
* The class monitors those leases and check if the shutdown is properly completed.
|
||||
* If the shard consumer doesn't shut down within the given timeout, it will trigger a lease transfer.
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@KinesisClientInternalApi
|
||||
public class LeaseGracefulShutdownHandler {
|
||||
|
||||
// Arbitrary number to run a similar frequency as the scheduler based on shardConsumerDispatchPollIntervalMillis
|
||||
// which is how fast scheduler triggers state change. It's ok to add few extra second delay to call shutdown since
|
||||
// the leases should still be processing by the current owner so there should not be processing delay due to this.
|
||||
private static final long SHUTDOWN_CHECK_INTERVAL_MILLIS = 2000;
|
||||
|
||||
private final long shutdownTimeoutMillis;
|
||||
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap;
|
||||
private final LeaseCoordinator leaseCoordinator;
|
||||
private final Supplier<Long> currentTimeSupplier;
|
||||
private final ConcurrentMap<ShardInfo, LeasePendingShutdown> shardInfoLeasePendingShutdownMap =
|
||||
new ConcurrentHashMap<>();
|
||||
private final ScheduledExecutorService executorService;
|
||||
|
||||
private volatile boolean isRunning = false;
|
||||
|
||||
/**
|
||||
* Factory method to create a new instance of LeaseGracefulShutdownHandler.
|
||||
*
|
||||
* @param shutdownTimeoutMillis Timeout for graceful shutdown of shard consumers.
|
||||
* @param shardInfoShardConsumerMap Map of shard info to shard consumer instances.
|
||||
* @param leaseCoordinator Lease coordinator instance to access lease information.
|
||||
* @return A new instance of LeaseGracefulShutdownHandler.
|
||||
*/
|
||||
public static LeaseGracefulShutdownHandler create(
|
||||
long shutdownTimeoutMillis,
|
||||
ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap,
|
||||
LeaseCoordinator leaseCoordinator) {
|
||||
return new LeaseGracefulShutdownHandler(
|
||||
shutdownTimeoutMillis,
|
||||
shardInfoShardConsumerMap,
|
||||
leaseCoordinator,
|
||||
System::currentTimeMillis,
|
||||
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder()
|
||||
.setNameFormat("LeaseGracefulShutdown-%04d")
|
||||
.setDaemon(true)
|
||||
.build()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts the shard consumer shutdown handler thread.
|
||||
*/
|
||||
public void start() {
|
||||
if (!isRunning) {
|
||||
log.info("Starting graceful lease handoff thread.");
|
||||
executorService.scheduleAtFixedRate(
|
||||
this::monitorGracefulShutdownLeases, 0, SHUTDOWN_CHECK_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
|
||||
isRunning = true;
|
||||
} else {
|
||||
log.info("Graceful lease handoff thread already running, no need to start.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops the shard consumer shutdown handler thread.
|
||||
*/
|
||||
public void stop() {
|
||||
if (isRunning) {
|
||||
log.info("Stopping graceful lease handoff thread.");
|
||||
executorService.shutdown();
|
||||
isRunning = false;
|
||||
} else {
|
||||
log.info("Graceful lease handoff thread already stopped.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enqueue a shutdown request for the given lease if the lease has requested shutdown and the shard consumer
|
||||
* is not already shutdown.
|
||||
*
|
||||
* @param lease The lease to enqueue a shutdown request for.
|
||||
*/
|
||||
public void enqueueShutdown(Lease lease) {
|
||||
if (lease == null || !lease.shutdownRequested() || !isRunning) {
|
||||
return;
|
||||
}
|
||||
final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
|
||||
final ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
|
||||
if (consumer == null || consumer.isShutdown()) {
|
||||
shardInfoLeasePendingShutdownMap.remove(shardInfo);
|
||||
} else {
|
||||
// there could be change shard get enqueued after getting removed. This should be okay because
|
||||
// this enqueue will be no-op and will be removed again because the shardConsumer associated with the
|
||||
// shardInfo is shutdown by then.
|
||||
shardInfoLeasePendingShutdownMap.computeIfAbsent(shardInfo, key -> {
|
||||
log.info("Calling graceful shutdown for lease {}", lease.leaseKey());
|
||||
LeasePendingShutdown leasePendingShutdown = new LeasePendingShutdown(lease, consumer);
|
||||
initiateShutdown(leasePendingShutdown);
|
||||
return leasePendingShutdown;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for shutdown to complete or transfer ownership of lease to the next owner if timeout is met.
|
||||
*/
|
||||
private void monitorGracefulShutdownLeases() {
|
||||
String leaseKey = null;
|
||||
try {
|
||||
for (ConcurrentMap.Entry<ShardInfo, LeasePendingShutdown> entry :
|
||||
shardInfoLeasePendingShutdownMap.entrySet()) {
|
||||
final LeasePendingShutdown leasePendingShutdown = entry.getValue();
|
||||
final ShardInfo shardInfo = entry.getKey();
|
||||
leaseKey = leasePendingShutdown.lease.leaseKey();
|
||||
|
||||
if (leasePendingShutdown.shardConsumer.isShutdown()
|
||||
|| shardInfoShardConsumerMap.get(shardInfo) == null
|
||||
|| leaseCoordinator.getCurrentlyHeldLease(leaseKey) == null) {
|
||||
logTimeoutMessage(leasePendingShutdown);
|
||||
shardInfoLeasePendingShutdownMap.remove(shardInfo);
|
||||
} else if (getCurrentTimeMillis() >= leasePendingShutdown.timeoutTimestampMillis
|
||||
&& !leasePendingShutdown.leaseTransferCalled) {
|
||||
try {
|
||||
log.info(
|
||||
"Timeout {} millisecond reached waiting for lease {} to graceful handoff."
|
||||
+ " Attempting to transfer the lease to {}",
|
||||
shutdownTimeoutMillis,
|
||||
leaseKey,
|
||||
leasePendingShutdown.lease.leaseOwner());
|
||||
transferLeaseIfOwner(leasePendingShutdown);
|
||||
} catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) {
|
||||
log.warn("Failed to transfer lease for key {}. Will retry", leaseKey, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Error in graceful shutdown for lease {}", leaseKey, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void initiateShutdown(LeasePendingShutdown tracker) {
|
||||
tracker.shardConsumer.gracefulShutdown(null);
|
||||
tracker.shutdownRequested = true;
|
||||
tracker.timeoutTimestampMillis = getCurrentTimeMillis() + shutdownTimeoutMillis;
|
||||
}
|
||||
|
||||
private void logTimeoutMessage(LeasePendingShutdown leasePendingShutdown) {
|
||||
if (leasePendingShutdown.leaseTransferCalled) {
|
||||
final long timeElapsedSinceShutdownInitiated =
|
||||
getCurrentTimeMillis() - leasePendingShutdown.timeoutTimestampMillis + shutdownTimeoutMillis;
|
||||
log.info(
|
||||
"Lease {} took {} milliseconds to complete the shutdown. "
|
||||
+ "Consider tuning the GracefulLeaseHandoffTimeoutMillis to prevent timeouts, "
|
||||
+ "if necessary.",
|
||||
leasePendingShutdown.lease.leaseKey(),
|
||||
timeElapsedSinceShutdownInitiated);
|
||||
}
|
||||
}
|
||||
|
||||
private void transferLeaseIfOwner(LeasePendingShutdown leasePendingShutdown)
|
||||
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||
final Lease lease = leasePendingShutdown.lease;
|
||||
if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) {
|
||||
// assignLease will increment the leaseCounter which will cause the heartbeat to stop on the current owner
|
||||
// for the lease
|
||||
leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner());
|
||||
} else {
|
||||
// the worker ID check is just for sanity. We don't expect it to be different from the current worker.
|
||||
log.error(
|
||||
"Lease {} checkpoint owner mismatch found {} but it should be {}",
|
||||
lease.leaseKey(),
|
||||
lease.checkpointOwner(),
|
||||
leaseCoordinator.workerIdentifier());
|
||||
}
|
||||
// mark it true because we don't want to enter the method again because update is not possible anymore.
|
||||
leasePendingShutdown.leaseTransferCalled = true;
|
||||
}
|
||||
|
||||
private long getCurrentTimeMillis() {
|
||||
return currentTimeSupplier.get();
|
||||
}
|
||||
|
||||
@Data
|
||||
private static class LeasePendingShutdown {
|
||||
final Lease lease;
|
||||
final ShardConsumer shardConsumer;
|
||||
long timeoutTimestampMillis;
|
||||
boolean shutdownRequested = false;
|
||||
boolean leaseTransferCalled = false;
|
||||
}
|
||||
}
|
||||
|
|
@ -24,6 +24,7 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
|
|||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
|
||||
import software.amazon.kinesis.common.StreamIdentifier;
|
||||
import software.amazon.kinesis.leases.LeaseStatsRecorder;
|
||||
import software.amazon.kinesis.leases.ShardDetector;
|
||||
import software.amazon.kinesis.leases.ShardInfo;
|
||||
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||
|
|
@ -65,6 +66,7 @@ public class ProcessTask implements ConsumerTask {
|
|||
private final AggregatorUtil aggregatorUtil;
|
||||
private final String shardInfoId;
|
||||
private final SchemaRegistryDecoder schemaRegistryDecoder;
|
||||
private final LeaseStatsRecorder leaseStatsRecorder;
|
||||
|
||||
public ProcessTask(
|
||||
@NonNull ShardInfo shardInfo,
|
||||
|
|
@ -79,7 +81,8 @@ public class ProcessTask implements ConsumerTask {
|
|||
long idleTimeInMilliseconds,
|
||||
@NonNull AggregatorUtil aggregatorUtil,
|
||||
@NonNull MetricsFactory metricsFactory,
|
||||
SchemaRegistryDecoder schemaRegistryDecoder) {
|
||||
SchemaRegistryDecoder schemaRegistryDecoder,
|
||||
@NonNull LeaseStatsRecorder leaseStatsRecorder) {
|
||||
this.shardInfo = shardInfo;
|
||||
this.shardInfoId = ShardInfo.getLeaseKey(shardInfo);
|
||||
this.shardRecordProcessor = shardRecordProcessor;
|
||||
|
|
@ -91,6 +94,7 @@ public class ProcessTask implements ConsumerTask {
|
|||
this.idleTimeInMilliseconds = idleTimeInMilliseconds;
|
||||
this.metricsFactory = metricsFactory;
|
||||
this.schemaRegistryDecoder = schemaRegistryDecoder;
|
||||
this.leaseStatsRecorder = leaseStatsRecorder;
|
||||
|
||||
if (!skipShardSyncAtWorkerInitializationIfLeasesExist) {
|
||||
this.shard = shardDetector.shard(shardInfo.shardId());
|
||||
|
|
@ -173,6 +177,7 @@ public class ProcessTask implements ConsumerTask {
|
|||
recordProcessorCheckpointer.largestPermittedCheckpointValue()));
|
||||
|
||||
if (shouldCallProcessRecords(records)) {
|
||||
publishLeaseStats(records);
|
||||
callProcessRecords(processRecordsInput, records);
|
||||
}
|
||||
success = true;
|
||||
|
|
@ -197,6 +202,15 @@ public class ProcessTask implements ConsumerTask {
|
|||
}
|
||||
}
|
||||
|
||||
private void publishLeaseStats(final List<KinesisClientRecord> records) {
|
||||
leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder()
|
||||
.bytes(records.stream()
|
||||
.mapToInt(record -> record.data().limit())
|
||||
.sum())
|
||||
.leaseKey(ShardInfo.getLeaseKey(shardInfo))
|
||||
.build());
|
||||
}
|
||||
|
||||
private List<KinesisClientRecord> deaggregateAnyKplRecords(List<KinesisClientRecord> records) {
|
||||
if (shard == null) {
|
||||
return aggregatorUtil.deaggregate(records);
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ import java.util.concurrent.CompletableFuture;
|
|||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
import java.util.function.Function;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import lombok.AccessLevel;
|
||||
|
|
@ -35,8 +34,6 @@ import software.amazon.kinesis.exceptions.internal.BlockedOnParentShardException
|
|||
import software.amazon.kinesis.leases.ShardInfo;
|
||||
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
|
||||
import software.amazon.kinesis.lifecycle.events.TaskExecutionListenerInput;
|
||||
import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator;
|
||||
import software.amazon.kinesis.metrics.MetricsFactory;
|
||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||
|
||||
/**
|
||||
|
|
@ -59,12 +56,6 @@ public class ShardConsumer {
|
|||
@NonNull
|
||||
private final Optional<Long> logWarningForTaskAfterMillis;
|
||||
|
||||
/**
|
||||
* @deprecated unused; to be removed in a "major" version bump
|
||||
*/
|
||||
@Deprecated
|
||||
private final Function<ConsumerTask, ConsumerTask> taskMetricsDecorator;
|
||||
|
||||
private final int bufferSize;
|
||||
private final TaskExecutionListener taskExecutionListener;
|
||||
private final String streamIdentifier;
|
||||
|
|
@ -95,27 +86,6 @@ public class ShardConsumer {
|
|||
|
||||
private ProcessRecordsInput shardEndProcessRecordsInput;
|
||||
|
||||
@Deprecated
|
||||
public ShardConsumer(
|
||||
RecordsPublisher recordsPublisher,
|
||||
ExecutorService executorService,
|
||||
ShardInfo shardInfo,
|
||||
Optional<Long> logWarningForTaskAfterMillis,
|
||||
ShardConsumerArgument shardConsumerArgument,
|
||||
TaskExecutionListener taskExecutionListener) {
|
||||
this(
|
||||
recordsPublisher,
|
||||
executorService,
|
||||
shardInfo,
|
||||
logWarningForTaskAfterMillis,
|
||||
shardConsumerArgument,
|
||||
ConsumerStates.INITIAL_STATE,
|
||||
ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()),
|
||||
8,
|
||||
taskExecutionListener,
|
||||
LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE);
|
||||
}
|
||||
|
||||
public ShardConsumer(
|
||||
RecordsPublisher recordsPublisher,
|
||||
ExecutorService executorService,
|
||||
|
|
@ -131,36 +101,11 @@ public class ShardConsumer {
|
|||
logWarningForTaskAfterMillis,
|
||||
shardConsumerArgument,
|
||||
ConsumerStates.INITIAL_STATE,
|
||||
ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()),
|
||||
8,
|
||||
taskExecutionListener,
|
||||
readTimeoutsToIgnoreBeforeWarning);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public ShardConsumer(
|
||||
RecordsPublisher recordsPublisher,
|
||||
ExecutorService executorService,
|
||||
ShardInfo shardInfo,
|
||||
Optional<Long> logWarningForTaskAfterMillis,
|
||||
ShardConsumerArgument shardConsumerArgument,
|
||||
ConsumerState initialState,
|
||||
Function<ConsumerTask, ConsumerTask> taskMetricsDecorator,
|
||||
int bufferSize,
|
||||
TaskExecutionListener taskExecutionListener) {
|
||||
this(
|
||||
recordsPublisher,
|
||||
executorService,
|
||||
shardInfo,
|
||||
logWarningForTaskAfterMillis,
|
||||
shardConsumerArgument,
|
||||
initialState,
|
||||
taskMetricsDecorator,
|
||||
bufferSize,
|
||||
taskExecutionListener,
|
||||
LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE);
|
||||
}
|
||||
|
||||
//
|
||||
// TODO: Make bufferSize configurable
|
||||
//
|
||||
|
|
@ -171,7 +116,6 @@ public class ShardConsumer {
|
|||
Optional<Long> logWarningForTaskAfterMillis,
|
||||
ShardConsumerArgument shardConsumerArgument,
|
||||
ConsumerState initialState,
|
||||
Function<ConsumerTask, ConsumerTask> taskMetricsDecorator,
|
||||
int bufferSize,
|
||||
TaskExecutionListener taskExecutionListener,
|
||||
int readTimeoutsToIgnoreBeforeWarning) {
|
||||
|
|
@ -183,7 +127,6 @@ public class ShardConsumer {
|
|||
this.logWarningForTaskAfterMillis = logWarningForTaskAfterMillis;
|
||||
this.taskExecutionListener = taskExecutionListener;
|
||||
this.currentState = initialState;
|
||||
this.taskMetricsDecorator = taskMetricsDecorator;
|
||||
subscriber = new ShardConsumerSubscriber(
|
||||
recordsPublisher, executorService, bufferSize, this, readTimeoutsToIgnoreBeforeWarning);
|
||||
this.bufferSize = bufferSize;
|
||||
|
|
@ -484,17 +427,18 @@ public class ShardConsumer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Requests the shutdown of the this ShardConsumer. This should give the record processor a chance to checkpoint
|
||||
* Requests the shutdown of the ShardConsumer. This should give the record processor a chance to checkpoint
|
||||
* before being shutdown.
|
||||
*
|
||||
* @param shutdownNotification
|
||||
* used to signal that the record processor has been given the chance to shutdown.
|
||||
* @param shutdownNotification used to signal that the record processor has been given the chance to shut down.
|
||||
*/
|
||||
public void gracefulShutdown(ShutdownNotification shutdownNotification) {
|
||||
if (subscriber != null) {
|
||||
subscriber.cancel();
|
||||
}
|
||||
this.shutdownNotification = shutdownNotification;
|
||||
if (shutdownNotification != null) {
|
||||
this.shutdownNotification = shutdownNotification;
|
||||
}
|
||||
markForShutdown(ShutdownReason.REQUESTED);
|
||||
}
|
||||
|
||||
|
|
@ -542,21 +486,4 @@ public class ShardConsumer {
|
|||
return shutdownReason != null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Default task wrapping function for metrics
|
||||
*
|
||||
* @param metricsFactory
|
||||
* the factory used for reporting metrics
|
||||
* @return a function that will wrap the task with a metrics reporter
|
||||
*/
|
||||
private static Function<ConsumerTask, ConsumerTask> metricsWrappingFunction(MetricsFactory metricsFactory) {
|
||||
return (task) -> {
|
||||
if (task == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new MetricsCollectingTaskDecorator(task, metricsFactory);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,12 @@ import lombok.AccessLevel;
|
|||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.leases.Lease;
|
||||
import software.amazon.kinesis.leases.LeaseCoordinator;
|
||||
import software.amazon.kinesis.leases.ShardInfo;
|
||||
import software.amazon.kinesis.leases.exceptions.DependencyException;
|
||||
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
|
||||
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
|
||||
import software.amazon.kinesis.lifecycle.events.ShutdownRequestedInput;
|
||||
import software.amazon.kinesis.processor.RecordProcessorCheckpointer;
|
||||
import software.amazon.kinesis.processor.ShardRecordProcessor;
|
||||
|
|
@ -33,23 +38,41 @@ public class ShutdownNotificationTask implements ConsumerTask {
|
|||
private final ShardRecordProcessor shardRecordProcessor;
|
||||
private final RecordProcessorCheckpointer recordProcessorCheckpointer;
|
||||
private final ShutdownNotification shutdownNotification;
|
||||
// TODO: remove if not used
|
||||
private final ShardInfo shardInfo;
|
||||
private final LeaseCoordinator leaseCoordinator;
|
||||
|
||||
@Override
|
||||
public TaskResult call() {
|
||||
final String leaseKey = ShardInfo.getLeaseKey(shardInfo);
|
||||
final Lease currentShardLease = leaseCoordinator.getCurrentlyHeldLease(leaseKey);
|
||||
try {
|
||||
try {
|
||||
shardRecordProcessor.shutdownRequested(ShutdownRequestedInput.builder()
|
||||
.checkpointer(recordProcessorCheckpointer)
|
||||
.build());
|
||||
attemptLeaseTransfer(currentShardLease);
|
||||
} catch (Exception ex) {
|
||||
return new TaskResult(ex);
|
||||
}
|
||||
|
||||
return new TaskResult(null);
|
||||
} finally {
|
||||
shutdownNotification.shutdownNotificationComplete();
|
||||
if (shutdownNotification != null) {
|
||||
shutdownNotification.shutdownNotificationComplete();
|
||||
} else {
|
||||
// shutdownNotification is null if this is a shard level graceful shutdown instead of a worker level
|
||||
// one. We need to drop lease like what's done in the shutdownNotificationComplete so we can
|
||||
// transition to next state.
|
||||
leaseCoordinator.dropLease(currentShardLease);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void attemptLeaseTransfer(Lease lease)
|
||||
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
|
||||
if (lease != null && lease.shutdownRequested()) {
|
||||
if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) {
|
||||
leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,7 +164,6 @@ public class ShutdownTask implements ConsumerTask {
|
|||
} else {
|
||||
throwOnApplicationException(leaseKey, leaseLostAction, scope, startTime);
|
||||
}
|
||||
|
||||
log.debug("Shutting down retrieval strategy for shard {}.", leaseKey);
|
||||
recordsPublisher.shutdown();
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ public class RetrievalConfig {
|
|||
*/
|
||||
public static final String KINESIS_CLIENT_LIB_USER_AGENT = "amazon-kinesis-client-library-java";
|
||||
|
||||
public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "2.6.1-SNAPSHOT";
|
||||
public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "3.0.0";
|
||||
|
||||
/**
|
||||
* Client used to make calls to Kinesis for records retrieval
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import lombok.NonNull;
|
|||
import lombok.Setter;
|
||||
import lombok.ToString;
|
||||
import lombok.experimental.Accessors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
|
||||
import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
|
||||
import software.amazon.kinesis.retrieval.DataFetcherProviderConfig;
|
||||
|
|
@ -38,12 +39,15 @@ import software.amazon.kinesis.retrieval.RetrievalSpecificConfig;
|
|||
@Setter
|
||||
@ToString
|
||||
@EqualsAndHashCode
|
||||
@Slf4j
|
||||
public class PollingConfig implements RetrievalSpecificConfig {
|
||||
|
||||
public static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofSeconds(30);
|
||||
|
||||
public static final int DEFAULT_MAX_RECORDS = 10000;
|
||||
|
||||
public static final long MIN_IDLE_MILLIS_BETWEEN_READS = 200L;
|
||||
|
||||
/**
|
||||
* Configurable functional interface to override the existing DataFetcher.
|
||||
*/
|
||||
|
|
@ -138,9 +142,18 @@ public class PollingConfig implements RetrievalSpecificConfig {
|
|||
/**
|
||||
* Set the value for how long the ShardConsumer should sleep in between calls to
|
||||
* {@link KinesisAsyncClient#getRecords(GetRecordsRequest)}. If this is not specified here the value provided in
|
||||
* {@link RecordsFetcherFactory} will be used.
|
||||
* {@link RecordsFetcherFactory} will be used. Cannot set value below MIN_IDLE_MILLIS_BETWEEN_READS.
|
||||
*/
|
||||
public PollingConfig idleTimeBetweenReadsInMillis(long idleTimeBetweenReadsInMillis) {
|
||||
if (idleTimeBetweenReadsInMillis < MIN_IDLE_MILLIS_BETWEEN_READS) {
|
||||
log.warn(
|
||||
"idleTimeBetweenReadsInMillis must be greater than or equal to {} but current value is {}."
|
||||
+ " Defaulting to minimum {}.",
|
||||
MIN_IDLE_MILLIS_BETWEEN_READS,
|
||||
idleTimeBetweenReadsInMillis,
|
||||
MIN_IDLE_MILLIS_BETWEEN_READS);
|
||||
idleTimeBetweenReadsInMillis = MIN_IDLE_MILLIS_BETWEEN_READS;
|
||||
}
|
||||
usePollingConfigIdleTimeValue = true;
|
||||
this.idleTimeBetweenReadsInMillis = idleTimeBetweenReadsInMillis;
|
||||
return this;
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ import software.amazon.kinesis.retrieval.RecordsDeliveryAck;
|
|||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||
import software.amazon.kinesis.retrieval.RecordsRetrieved;
|
||||
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
|
||||
import software.amazon.kinesis.retrieval.ThrottlingReporter;
|
||||
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
|
||||
|
||||
import static software.amazon.kinesis.common.DiagnosticUtils.takeDelayedDeliveryActionIfRequired;
|
||||
|
|
@ -109,6 +110,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
private boolean wasReset = false;
|
||||
private Instant lastEventDeliveryTime = Instant.EPOCH;
|
||||
private final RequestDetails lastSuccessfulRequestDetails = new RequestDetails();
|
||||
private final ThrottlingReporter throttlingReporter;
|
||||
|
||||
@Data
|
||||
@Accessors(fluent = true)
|
||||
|
|
@ -233,6 +235,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
@NonNull final MetricsFactory metricsFactory,
|
||||
@NonNull final String operation,
|
||||
@NonNull final String shardId,
|
||||
final ThrottlingReporter throttlingReporter,
|
||||
final long awaitTerminationTimeoutMillis) {
|
||||
this.getRecordsRetrievalStrategy = getRecordsRetrievalStrategy;
|
||||
this.maxRecordsPerCall = maxRecordsPerCall;
|
||||
|
|
@ -248,6 +251,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
this.idleMillisBetweenCalls = idleMillisBetweenCalls;
|
||||
this.defaultGetRecordsCacheDaemon = new DefaultGetRecordsCacheDaemon();
|
||||
Validate.notEmpty(operation, "Operation cannot be empty");
|
||||
this.throttlingReporter = throttlingReporter;
|
||||
this.operation = operation;
|
||||
this.streamId = this.getRecordsRetrievalStrategy.dataFetcher().getStreamIdentifier();
|
||||
this.streamAndShardId = this.streamId.serialize() + ":" + shardId;
|
||||
|
|
@ -279,7 +283,8 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
final long idleMillisBetweenCalls,
|
||||
final MetricsFactory metricsFactory,
|
||||
final String operation,
|
||||
final String shardId) {
|
||||
final String shardId,
|
||||
final ThrottlingReporter throttlingReporter) {
|
||||
this(
|
||||
maxPendingProcessRecordsInput,
|
||||
maxByteSize,
|
||||
|
|
@ -291,6 +296,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
metricsFactory,
|
||||
operation,
|
||||
shardId,
|
||||
throttlingReporter,
|
||||
DEFAULT_AWAIT_TERMINATION_TIMEOUT_MILLIS);
|
||||
}
|
||||
|
||||
|
|
@ -555,6 +561,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
recordsRetrieved.lastBatchSequenceNumber);
|
||||
addArrivedRecordsInput(recordsRetrieved);
|
||||
drainQueueForRequests();
|
||||
throttlingReporter.success();
|
||||
} catch (PositionResetException pse) {
|
||||
throw pse;
|
||||
} catch (RetryableRetrievalException rre) {
|
||||
|
|
@ -584,10 +591,11 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
|
|||
|
||||
publisherSession.dataFetcher().restartIterator();
|
||||
} catch (ProvisionedThroughputExceededException e) {
|
||||
// Update the lastSuccessfulCall if we get a throttling exception so that we back off idleMillis
|
||||
// for the next call
|
||||
lastSuccessfulCall = Instant.now();
|
||||
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
|
||||
log.error(
|
||||
"{} : ProvisionedThroughputExceededException thrown while fetching records from Kinesis",
|
||||
streamAndShardId,
|
||||
e);
|
||||
throttlingReporter.throttled();
|
||||
} catch (SdkException e) {
|
||||
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ import software.amazon.kinesis.retrieval.DataFetchingStrategy;
|
|||
import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy;
|
||||
import software.amazon.kinesis.retrieval.RecordsFetcherFactory;
|
||||
import software.amazon.kinesis.retrieval.RecordsPublisher;
|
||||
import software.amazon.kinesis.retrieval.ThrottlingReporter;
|
||||
|
||||
@Slf4j
|
||||
@KinesisClientInternalApi
|
||||
|
|
@ -32,6 +33,7 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory {
|
|||
private int maxByteSize = 8 * 1024 * 1024;
|
||||
private int maxRecordsCount = 30000;
|
||||
private long idleMillisBetweenCalls = 1500L;
|
||||
private int maxConsecutiveThrottles = 5;
|
||||
private DataFetchingStrategy dataFetchingStrategy = DataFetchingStrategy.DEFAULT;
|
||||
|
||||
@Override
|
||||
|
|
@ -56,7 +58,8 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory {
|
|||
idleMillisBetweenCalls,
|
||||
metricsFactory,
|
||||
"ProcessTask",
|
||||
shardId);
|
||||
shardId,
|
||||
new ThrottlingReporter(maxConsecutiveThrottles, shardId));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import java.util.List;
|
|||
import com.amazonaws.services.schemaregistry.common.Schema;
|
||||
import com.amazonaws.services.schemaregistry.deserializers.GlueSchemaRegistryDeserializer;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.common.KinesisClientLibraryPackage;
|
||||
import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
||||
|
||||
/**
|
||||
|
|
@ -15,7 +14,7 @@ import software.amazon.kinesis.retrieval.KinesisClientRecord;
|
|||
*/
|
||||
@Slf4j
|
||||
public class SchemaRegistryDecoder {
|
||||
private static final String USER_AGENT_APP_NAME = "kcl" + "-" + KinesisClientLibraryPackage.VERSION;
|
||||
private static final String USER_AGENT_APP_NAME = "kcl" + "-" + "3.0.0";
|
||||
private final GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer;
|
||||
|
||||
public SchemaRegistryDecoder(GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
package software.amazon.kinesis.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class Cgroup {
|
||||
|
||||
public static String readSingleLineFile(String path) {
|
||||
BufferedReader bufferedReader = null;
|
||||
try {
|
||||
final File file = new File(path);
|
||||
if (file.exists()) {
|
||||
bufferedReader = new BufferedReader(new FileReader(file));
|
||||
return bufferedReader.readLine();
|
||||
} else {
|
||||
throw new IllegalArgumentException(String.format("Failed to read file. %s does not exist", path));
|
||||
}
|
||||
} catch (final Throwable t) {
|
||||
if (t instanceof IllegalArgumentException) {
|
||||
throw (IllegalArgumentException) t;
|
||||
}
|
||||
throw new IllegalArgumentException("Failed to read file.", t);
|
||||
} finally {
|
||||
try {
|
||||
if (bufferedReader != null) {
|
||||
bufferedReader.close();
|
||||
}
|
||||
} catch (Throwable x) {
|
||||
log.warn("Failed to close bufferedReader ", x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of available cpus from the cpuset
|
||||
* See https://docs.kernel.org/admin-guide/cgroup-v2.html#cpuset for more information
|
||||
* "0-7" represents 8 cores
|
||||
* "0-4,6,8-10" represents 9 cores (cores 0,1,2,3,4 and core 6 and core 8,9,10)
|
||||
* @param cpuSet a single line from the cgroup cpuset file
|
||||
* @return the number of available cpus
|
||||
*/
|
||||
public static int getAvailableCpusFromEffectiveCpuSet(final String cpuSet) {
|
||||
final String[] cpuSetArr = cpuSet.split(",");
|
||||
|
||||
int sumCpus = 0;
|
||||
for (String cpuSetGroup : cpuSetArr) {
|
||||
if (cpuSetGroup.contains("-")) {
|
||||
final String[] cpuSetGroupSplit = cpuSetGroup.split("-");
|
||||
// Values are inclusive
|
||||
sumCpus += Integer.parseInt(cpuSetGroupSplit[1]) - Integer.parseInt(cpuSetGroupSplit[0]) + 1;
|
||||
} else {
|
||||
sumCpus += 1;
|
||||
}
|
||||
}
|
||||
return sumCpus;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
package software.amazon.kinesis.utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import lombok.NonNull;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
|
||||
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
|
||||
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
|
||||
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
|
||||
import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
|
||||
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
|
||||
import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsRequest;
|
||||
import software.amazon.awssdk.services.dynamodb.model.UpdateContinuousBackupsResponse;
|
||||
import software.amazon.kinesis.common.DdbTableConfig;
|
||||
|
||||
import static java.util.Objects.nonNull;
|
||||
|
||||
@Slf4j
|
||||
public final class DdbUtil {
|
||||
|
||||
@NonNull
|
||||
public static Supplier<CompletableFuture<CreateTableResponse>> tableCreator(
|
||||
final Supplier<List<KeySchemaElement>> keySchemaProvider,
|
||||
final Supplier<List<AttributeDefinition>> attributeDefinitionProvider,
|
||||
final DdbTableConfig tableConfig,
|
||||
final DynamoDbAsyncClient dynamoDbAsyncClient) {
|
||||
final CreateTableRequest.Builder createTableRequest = CreateTableRequest.builder()
|
||||
.tableName(tableConfig.tableName())
|
||||
.keySchema(keySchemaProvider.get())
|
||||
.attributeDefinitions(attributeDefinitionProvider.get())
|
||||
.deletionProtectionEnabled(tableConfig.deletionProtectionEnabled());
|
||||
|
||||
if (nonNull(tableConfig.tags()) && !tableConfig.tags().isEmpty()) {
|
||||
createTableRequest.tags(tableConfig.tags());
|
||||
}
|
||||
|
||||
if (tableConfig.billingMode() == BillingMode.PROVISIONED) {
|
||||
log.info(
|
||||
"Creating table {} in provisioned mode with {}wcu and {}rcu",
|
||||
tableConfig.tableName(),
|
||||
tableConfig.writeCapacity(),
|
||||
tableConfig.readCapacity());
|
||||
createTableRequest.provisionedThroughput(ProvisionedThroughput.builder()
|
||||
.readCapacityUnits(tableConfig.readCapacity())
|
||||
.writeCapacityUnits(tableConfig.writeCapacity())
|
||||
.build());
|
||||
}
|
||||
createTableRequest.billingMode(tableConfig.billingMode());
|
||||
return () -> dynamoDbAsyncClient.createTable(createTableRequest.build());
|
||||
}
|
||||
|
||||
public static CompletableFuture<UpdateContinuousBackupsResponse> pitrEnabler(
|
||||
final DdbTableConfig tableConfig, final DynamoDbAsyncClient dynamoDbAsyncClient) {
|
||||
if (tableConfig.pointInTimeRecoveryEnabled()) {
|
||||
final UpdateContinuousBackupsRequest request = UpdateContinuousBackupsRequest.builder()
|
||||
.tableName(tableConfig.tableName())
|
||||
.pointInTimeRecoverySpecification(builder -> builder.pointInTimeRecoveryEnabled(true))
|
||||
.build();
|
||||
return dynamoDbAsyncClient.updateContinuousBackups(request);
|
||||
}
|
||||
return CompletableFuture.completedFuture(null);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
package software.amazon.kinesis.utils;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
/**
|
||||
* Uses the formula mentioned below for simple ExponentialMovingAverage
|
||||
* <a href="https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average"/>
|
||||
*
|
||||
* Values of alpha close to 1 have less of a smoothing effect and give greater weight to recent changes in the data,
|
||||
* while values of alpha closer to 0 have a greater smoothing effect and are less responsive to recent changes.
|
||||
*/
|
||||
@RequiredArgsConstructor
|
||||
public class ExponentialMovingAverage {
|
||||
|
||||
private final double alpha;
|
||||
|
||||
@Getter
|
||||
private double value;
|
||||
|
||||
private boolean initialized = false;
|
||||
|
||||
public void add(final double newValue) {
|
||||
if (!initialized) {
|
||||
this.value = newValue;
|
||||
initialized = true;
|
||||
} else {
|
||||
this.value = alpha * newValue + (1 - alpha) * this.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package software.amazon.kinesis.utils;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class Statistics {
|
||||
|
||||
/**
|
||||
* Calculates the simple mean of the given values
|
||||
* @param values list of values (double)
|
||||
* @return mean of the given values, if the {@param values} is empty then returns 0;
|
||||
*/
|
||||
public static double calculateSimpleMean(final List<Double> values) {
|
||||
if (values.isEmpty()) {
|
||||
return 0D;
|
||||
}
|
||||
double sum = 0.0;
|
||||
for (final double i : values) {
|
||||
sum += i;
|
||||
}
|
||||
return sum / values.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* For the given values find the standard deviation (SD).
|
||||
* For details of SD calculation ref : <a href="https://en.wikipedia.org/wiki/Standard_deviation"/>
|
||||
* @param values list of values (double)
|
||||
* @return Map.Entry of mean to standard deviation for {@param values}, if {@param values} is empty then return
|
||||
* Map.Entry with 0 as mean and 0 as SD.
|
||||
*/
|
||||
public static Map.Entry<Double, Double> calculateStandardDeviationAndMean(final List<Double> values) {
|
||||
if (values.isEmpty()) {
|
||||
return new AbstractMap.SimpleEntry<>(0D, 0D);
|
||||
}
|
||||
final double mean = calculateSimpleMean(values);
|
||||
// calculate the standard deviation
|
||||
double standardDeviation = 0.0;
|
||||
for (final double num : values) {
|
||||
standardDeviation += Math.pow(num - mean, 2);
|
||||
}
|
||||
return new AbstractMap.SimpleEntry<>(mean, Math.sqrt(standardDeviation / values.size()));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
package software.amazon.kinesis.worker;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
|
||||
import software.amazon.kinesis.worker.metric.OperatingRange;
|
||||
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||
import software.amazon.kinesis.worker.metric.impl.container.Cgroupv1CpuWorkerMetric;
|
||||
import software.amazon.kinesis.worker.metric.impl.container.Cgroupv2CpuWorkerMetric;
|
||||
import software.amazon.kinesis.worker.metric.impl.container.EcsCpuWorkerMetric;
|
||||
import software.amazon.kinesis.worker.metric.impl.linux.LinuxCpuWorkerMetric;
|
||||
import software.amazon.kinesis.worker.platform.Ec2Resource;
|
||||
import software.amazon.kinesis.worker.platform.EcsResource;
|
||||
import software.amazon.kinesis.worker.platform.EksResource;
|
||||
import software.amazon.kinesis.worker.platform.OperatingRangeDataProvider;
|
||||
import software.amazon.kinesis.worker.platform.ResourceMetadataProvider;
|
||||
|
||||
/**
|
||||
* Class to select appropriate WorkerMetricStats based on the operating range provider that is available on the instance.
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@KinesisClientInternalApi
|
||||
public class WorkerMetricsSelector {
|
||||
|
||||
private static final OperatingRange DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE =
|
||||
OperatingRange.builder().maxUtilization(100).build();
|
||||
|
||||
private final List<ResourceMetadataProvider> workerComputePlatforms;
|
||||
|
||||
/**
|
||||
* Factory method to create an instance of WorkerMetricsSelector.
|
||||
*
|
||||
* @return WorkerMetricsSelector instance
|
||||
*/
|
||||
public static WorkerMetricsSelector create() {
|
||||
final List<ResourceMetadataProvider> resourceMetadataProviders = new ArrayList<>();
|
||||
resourceMetadataProviders.add(EcsResource.create());
|
||||
resourceMetadataProviders.add(EksResource.create());
|
||||
// ec2 has to be the last one to check
|
||||
resourceMetadataProviders.add(Ec2Resource.create());
|
||||
return new WorkerMetricsSelector(resourceMetadataProviders);
|
||||
}
|
||||
|
||||
private Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider() {
|
||||
for (ResourceMetadataProvider platform : workerComputePlatforms) {
|
||||
if (platform.isOnPlatform()) {
|
||||
final ResourceMetadataProvider.ComputePlatform computePlatform = platform.getPlatform();
|
||||
log.info("Worker is running on {}", computePlatform);
|
||||
return platform.getOperatingRangeDataProvider();
|
||||
}
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of WorkerMetricStats based on the operating range provider the worker uses.
|
||||
*
|
||||
* @return List of WorkerMetricStats
|
||||
*/
|
||||
public List<WorkerMetric> getDefaultWorkerMetrics() {
|
||||
final List<WorkerMetric> workerMetrics = new ArrayList<>();
|
||||
final Optional<OperatingRangeDataProvider> optionalProvider = getOperatingRangeDataProvider();
|
||||
if (!optionalProvider.isPresent()) {
|
||||
log.warn("Did not find an operating range metadata provider.");
|
||||
return workerMetrics;
|
||||
}
|
||||
final OperatingRangeDataProvider dataProvider = optionalProvider.get();
|
||||
log.info("Worker has operating range metadata provider {} ", dataProvider);
|
||||
switch (dataProvider) {
|
||||
case LINUX_PROC:
|
||||
workerMetrics.add(new LinuxCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||
break;
|
||||
case LINUX_ECS_METADATA_KEY_V4:
|
||||
workerMetrics.add(new EcsCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||
break;
|
||||
case LINUX_EKS_CGROUP_V2:
|
||||
workerMetrics.add(new Cgroupv2CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||
break;
|
||||
case LINUX_EKS_CGROUP_V1:
|
||||
workerMetrics.add(new Cgroupv1CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return workerMetrics;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
package software.amazon.kinesis.worker.metric;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class OperatingRange {
|
||||
|
||||
/**
|
||||
* Max utilization percentage allowed for the workerMetrics.
|
||||
*/
|
||||
private final int maxUtilization;
|
||||
|
||||
private OperatingRange(final int maxUtilization) {
|
||||
Preconditions.checkArgument(!(maxUtilization < 0 || maxUtilization > 100), "Invalid maxUtilization value");
|
||||
this.maxUtilization = maxUtilization;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
package software.amazon.kinesis.worker.metric;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NonNull;
|
||||
|
||||
public interface WorkerMetric {
|
||||
/**
|
||||
* WorkerMetricStats short name that is used as attribute name for it in storage.
|
||||
* @return short name for the WorkerMetricStats
|
||||
*/
|
||||
String getShortName();
|
||||
|
||||
/**
|
||||
* Current WorkerMetricValue. WorkerMetricValue is a normalized percentage value to its max configured limits.
|
||||
* E.g., if for a worker max network bandwidth is 10Gbps and current used bandwidth is 2Gbps, then WorkerMetricValue for
|
||||
* NetworkWorkerMetrics will be 20 (%).
|
||||
*
|
||||
* @return WorkerMetricValue between 0 and 100 (both inclusive)
|
||||
*/
|
||||
WorkerMetricValue capture();
|
||||
|
||||
/**
|
||||
* Gets the operating range for this workerMetrics
|
||||
* @return Operating range for this workerMetrics
|
||||
*/
|
||||
OperatingRange getOperatingRange();
|
||||
|
||||
/**
|
||||
* Type of the current WorkerMetricStats.
|
||||
* @return WorkerMetricType
|
||||
*/
|
||||
WorkerMetricType getWorkerMetricType();
|
||||
|
||||
/**
|
||||
* WorkerMetricValue model class is used as return type for the capture() method to have a strong checks at the build
|
||||
* time of the object itself.
|
||||
*/
|
||||
@Builder
|
||||
class WorkerMetricValue {
|
||||
|
||||
@Getter
|
||||
private final Double value;
|
||||
|
||||
private WorkerMetricValue(@NonNull final Double value) {
|
||||
Preconditions.checkArgument(
|
||||
!(value < 0 || value > 100), value + " is either less than 0 or greater than 100");
|
||||
this.value = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package software.amazon.kinesis.worker.metric;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@RequiredArgsConstructor
|
||||
public enum WorkerMetricType {
|
||||
CPU("C"),
|
||||
MEMORY("M"),
|
||||
NETWORK_IN("NI"),
|
||||
NETWORK_OUT("NO"),
|
||||
THROUGHPUT("T");
|
||||
|
||||
@Getter
|
||||
private final String shortName;
|
||||
}
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
package software.amazon.kinesis.worker.metric.impl.container;
|
||||
|
||||
import java.time.Clock;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.worker.metric.OperatingRange;
|
||||
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||
import software.amazon.kinesis.worker.metric.WorkerMetricType;
|
||||
|
||||
import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet;
|
||||
import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile;
|
||||
|
||||
/**
|
||||
* Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory.This works for Elastic
|
||||
* Kubernetes Service (EKS) containers running on Linux instances which use cgroupv1.
|
||||
*
|
||||
* EC2 instances must use a Linux instance that uses cgroupv1. Amazon Linux 2 uses cgroupv1.
|
||||
* Fargate versions 1.4.0 and 1.3.0 use Amazon Linux 2 and can use this.
|
||||
*
|
||||
* CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over
|
||||
* a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%.
|
||||
*
|
||||
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
|
||||
* to calculate the diff.
|
||||
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
|
||||
public class Cgroupv1CpuWorkerMetric implements WorkerMetric {
|
||||
|
||||
private static final Object LOCK_OBJECT = new Object();
|
||||
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
|
||||
private static final String CGROUP_ROOT = "/sys/fs/cgroup/";
|
||||
private static final String CPU_TIME_FILE = CGROUP_ROOT + "cpu/cpuacct.usage";
|
||||
private static final String CPU_CFS_QUOTA_FILE = CGROUP_ROOT + "cpu/cpu.cfs_quota_us";
|
||||
private static final String CPU_CFS_PERIOD_FILE = CGROUP_ROOT + "cpu/cpu.cfs_period_us";
|
||||
private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset/cpuset.effective_cpus";
|
||||
private final OperatingRange operatingRange;
|
||||
private final String cpuTimeFile;
|
||||
private final String cfsQuotaFile;
|
||||
private final String cfsPeriodFile;
|
||||
private final String effectiveCpuSetFile;
|
||||
private final Clock clock;
|
||||
private double cpuLimit = -1;
|
||||
private long lastCpuUseTimeNanos = 0;
|
||||
private long lastSystemTimeNanos = 0;
|
||||
|
||||
public Cgroupv1CpuWorkerMetric(final OperatingRange operatingRange) {
|
||||
this(
|
||||
operatingRange,
|
||||
CPU_TIME_FILE,
|
||||
CPU_CFS_QUOTA_FILE,
|
||||
CPU_CFS_PERIOD_FILE,
|
||||
EFFECTIVE_CPU_SET_FILE,
|
||||
Clock.systemUTC());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getShortName() {
|
||||
return CPU_WORKER_METRICS_TYPE.getShortName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkerMetricValue capture() {
|
||||
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
|
||||
}
|
||||
|
||||
private double calculateCpuUsage() {
|
||||
if (cpuLimit == -1) {
|
||||
cpuLimit = calculateCpuLimit();
|
||||
}
|
||||
|
||||
final long cpuTimeNanos = Long.parseLong(readSingleLineFile(cpuTimeFile));
|
||||
final long currentTimeNanos = TimeUnit.MILLISECONDS.toNanos(clock.millis());
|
||||
|
||||
boolean skip = false;
|
||||
double cpuCoreTimeUsed;
|
||||
synchronized (LOCK_OBJECT) {
|
||||
if (lastCpuUseTimeNanos == 0 && lastSystemTimeNanos == 0) {
|
||||
// Case where this is a first call so no diff available
|
||||
skip = true;
|
||||
}
|
||||
|
||||
final long nanoTimeDiff = currentTimeNanos - lastSystemTimeNanos;
|
||||
final long cpuUseDiff = cpuTimeNanos - lastCpuUseTimeNanos;
|
||||
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
|
||||
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
|
||||
// that less than 1 CPU core was used.
|
||||
cpuCoreTimeUsed = ((double) cpuUseDiff / nanoTimeDiff);
|
||||
|
||||
lastCpuUseTimeNanos = cpuTimeNanos;
|
||||
lastSystemTimeNanos = currentTimeNanos;
|
||||
}
|
||||
|
||||
if (skip) {
|
||||
return 0D;
|
||||
} else {
|
||||
// In case of rounding error, treat everything above 100% as 100%
|
||||
return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0);
|
||||
}
|
||||
}
|
||||
|
||||
private double calculateCpuLimit() {
|
||||
// Documentation on these values:
|
||||
// https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs
|
||||
final long cfsQuota = Long.parseLong(readSingleLineFile(cfsQuotaFile));
|
||||
final long cfsPeriod = Long.parseLong(readSingleLineFile(cfsPeriodFile));
|
||||
if (cfsQuota == -1) {
|
||||
// If quota is -1, a limit is not set on the container. The container can use all available cores.
|
||||
return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile));
|
||||
} else {
|
||||
return ((double) cfsQuota) / cfsPeriod;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatingRange getOperatingRange() {
|
||||
return operatingRange;
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkerMetricType getWorkerMetricType() {
|
||||
return CPU_WORKER_METRICS_TYPE;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
package software.amazon.kinesis.worker.metric.impl.container;
|
||||
|
||||
import java.time.Clock;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import software.amazon.kinesis.worker.metric.OperatingRange;
|
||||
import software.amazon.kinesis.worker.metric.WorkerMetric;
|
||||
import software.amazon.kinesis.worker.metric.WorkerMetricType;
|
||||
|
||||
import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet;
|
||||
import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile;
|
||||
|
||||
/**
|
||||
* Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory. This works for Elastic
|
||||
* Kubernetes Service (EKS) containers running on Linux instances which use cgroupv2.
|
||||
*
|
||||
* EC2 instances must use a Linux instance that uses cgroupv2. Amazon Linux 2023 uses cgroupv2.
|
||||
*
|
||||
* CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over
|
||||
* a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%.
|
||||
*
|
||||
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
|
||||
* to calculate the diff.
|
||||
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
|
||||
public class Cgroupv2CpuWorkerMetric implements WorkerMetric {
|
||||
|
||||
private static final Object LOCK_OBJECT = new Object();
|
||||
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
|
||||
private static final String CGROUP_ROOT = "/sys/fs/cgroup/";
|
||||
private static final String CPU_MAX_FILE = CGROUP_ROOT + "cpu.max";
|
||||
private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset.cpus.effective";
|
||||
private static final String CPU_STAT_FILE = CGROUP_ROOT + "cpu.stat";
|
||||
private final OperatingRange operatingRange;
|
||||
private final String cpuMaxFile;
|
||||
private final String effectiveCpuSetFile;
|
||||
private final String cpuStatFile;
|
||||
private final Clock clock;
|
||||
private double cpuLimit = -1;
|
||||
private long lastCpuUseTimeMicros = 0;
|
||||
private long lastSystemTimeMicros = 0;
|
||||
|
||||
public Cgroupv2CpuWorkerMetric(final OperatingRange operatingRange) {
|
||||
this(operatingRange, CPU_MAX_FILE, EFFECTIVE_CPU_SET_FILE, CPU_STAT_FILE, Clock.systemUTC());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getShortName() {
|
||||
return CPU_WORKER_METRICS_TYPE.getShortName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkerMetricValue capture() {
|
||||
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
|
||||
}
|
||||
|
||||
private double calculateCpuUsage() {
|
||||
if (cpuLimit == -1) {
|
||||
cpuLimit = calculateCpuLimit();
|
||||
}
|
||||
|
||||
// The first line of this file is of the format
|
||||
// usage_usec $MICROSECONDS
|
||||
// where $MICROSECONDS is always a number
|
||||
final String cpuUsageStat = readSingleLineFile(cpuStatFile);
|
||||
final long cpuTimeMicros = Long.parseLong(cpuUsageStat.split(" ")[1]);
|
||||
final long currentTimeMicros = TimeUnit.MILLISECONDS.toMicros(clock.millis());
|
||||
|
||||
boolean skip = false;
|
||||
double cpuCoreTimeUsed;
|
||||
synchronized (LOCK_OBJECT) {
|
||||
if (lastCpuUseTimeMicros == 0 && lastSystemTimeMicros == 0) {
|
||||
// Case where this is a first call so no diff available
|
||||
skip = true;
|
||||
}
|
||||
|
||||
final long microTimeDiff = currentTimeMicros - lastSystemTimeMicros;
|
||||
final long cpuUseDiff = cpuTimeMicros - lastCpuUseTimeMicros;
|
||||
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
|
||||
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
|
||||
// that less than 1 CPU core was used.
|
||||
cpuCoreTimeUsed = ((double) cpuUseDiff / microTimeDiff);
|
||||
|
||||
lastCpuUseTimeMicros = cpuTimeMicros;
|
||||
lastSystemTimeMicros = currentTimeMicros;
|
||||
}
|
||||
|
||||
if (skip) {
|
||||
return 0D;
|
||||
} else {
|
||||
// In case of rounding error, treat everything above 100% as 100%
|
||||
return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0);
|
||||
}
|
||||
}
|
||||
|
||||
private double calculateCpuLimit() {
|
||||
// This file contains two values separated by space ($MAX $PERIOD).
|
||||
// $MAX is either a number or "max"
|
||||
// $PERIOD is always a number
|
||||
final String cpuMax = readSingleLineFile(cpuMaxFile);
|
||||
final String[] cpuMaxArr = cpuMax.split(" ");
|
||||
final String max = cpuMaxArr[0];
|
||||
final String period = cpuMaxArr[1];
|
||||
|
||||
if (max.equals("max")) {
|
||||
// if first value in file is "max", a limit is not set on the container. The container can use all available
|
||||
// cores
|
||||
return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile));
|
||||
} else {
|
||||
return Double.parseDouble(max) / Long.parseLong(period);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatingRange getOperatingRange() {
|
||||
return operatingRange;
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkerMetricType getWorkerMetricType() {
|
||||
return CPU_WORKER_METRICS_TYPE;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue