KCLv3 merge

This commit is contained in:
Furqaan Ali 2024-10-30 18:02:28 -07:00 committed by Lucien Luc
parent a159fa31fb
commit a754364d29
175 changed files with 18424 additions and 2349 deletions

View file

@ -21,7 +21,7 @@
<parent>
<artifactId>amazon-kinesis-client-pom</artifactId>
<groupId>software.amazon.kinesis</groupId>
<version>2.6.1-SNAPSHOT</version>
<version>3.0.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@ -72,7 +72,7 @@
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.24</version>
<version>1.18.28</version>
<scope>provided</scope>
</dependency>
<dependency>

View file

@ -23,7 +23,7 @@
<parent>
<groupId>software.amazon.kinesis</groupId>
<artifactId>amazon-kinesis-client-pom</artifactId>
<version>2.6.1-SNAPSHOT</version>
<version>3.0.0</version>
</parent>
<artifactId>amazon-kinesis-client</artifactId>
@ -68,6 +68,18 @@
<artifactId>dynamodb</artifactId>
<version>${awssdk.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/dynamodb-enhanced -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>dynamodb-enhanced</artifactId>
<version>${awssdk.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.amazonaws/dynamodb-lock-client -->
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>dynamodb-lock-client</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>cloudwatch</artifactId>
@ -103,11 +115,23 @@
<artifactId>commons-lang3</artifactId>
<version>3.14.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-collections/commons-collections -->
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jetbrains/annotations -->
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>26.0.1</version>
</dependency>
<dependency>
<groupId>io.reactivex.rxjava3</groupId>
@ -123,35 +147,47 @@
</dependency>
<!-- Test -->
<!-- TODO: Migrate all tests to Junit5 -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.11.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>5.11.3</version>
<scope>test</scope>
</dependency>
<!-- Using older version to be compatible with Java 8 -->
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<version>3.12.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>1.3</version>
<scope>test</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>com.amazonaws</groupId>-->
<!--<artifactId>DynamoDBLocal</artifactId>-->
<!--<version>1.11.86</version>-->
<!--<scope>test</scope>-->
<!--</dependency>-->
<!-- Using older version to be compatible with Java 8 -->
<!-- https://mvnrepository.com/artifact/com.amazonaws/DynamoDBLocal -->
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>DynamoDBLocal</artifactId>
<version>1.25.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
@ -162,11 +198,11 @@
</dependencies>
<!--<repositories>-->
<!--<repository>-->
<!--<id>dynamodblocal</id>-->
<!--<name>AWS DynamoDB Local Release Repository</name>-->
<!--<url>https://s3-us-west-2.amazonaws.com/dynamodb-local/release</url>-->
<!--</repository>-->
<!--<repository>-->
<!--<id>dynamodblocal</id>-->
<!--<name>AWS DynamoDB Local Release Repository</name>-->
<!--<url>https://s3-us-west-2.amazonaws.com/dynamodb-local/release</url>-->
<!--</repository>-->
<!--</repositories>-->
<developers>
@ -203,20 +239,20 @@
</pluginManagement>
<plugins>
<plugin>
<groupId>org.xolstice.maven.plugins</groupId>
<artifactId>protobuf-maven-plugin</artifactId>
<version>0.6.1</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
<configuration>
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
</configuration>
<plugin>
<groupId>org.xolstice.maven.plugins</groupId>
<artifactId>protobuf-maven-plugin</artifactId>
<version>0.6.1</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
<configuration>
<protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>

View file

@ -256,7 +256,8 @@ public class ConfigsBuilder {
* @return LeaseManagementConfig
*/
public LeaseManagementConfig leaseManagementConfig() {
return new LeaseManagementConfig(tableName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
return new LeaseManagementConfig(
tableName(), applicationName(), dynamoDBClient(), kinesisClient(), workerIdentifier());
}
/**

View file

@ -0,0 +1,57 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.common;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
/**
* Configurations of a DDB table created by KCL for its internal operations.
*/
@Data
@Accessors(fluent = true)
@NoArgsConstructor
public class DdbTableConfig {
protected DdbTableConfig(final String applicationName, final String tableSuffix) {
this.tableName = applicationName + "-" + tableSuffix;
}
/**
* name to use for the DDB table. If null, it will default to
* applicationName-tableSuffix. If multiple KCL applications
* run in the same account, a unique tableName must be provided.
*/
private String tableName;
/**
* Billing mode used to create the DDB table.
*/
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
/**
* read capacity to provision during DDB table creation,
* if billing mode is PROVISIONED.
*/
private long readCapacity;
/**
* write capacity to provision during DDB table creation,
* if billing mode is PROVISIONED.
*/
private long writeCapacity;
}

View file

@ -15,10 +15,13 @@
package software.amazon.kinesis.common;
import java.time.Duration;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.function.Supplier;
public class FutureUtils {
@ -31,4 +34,15 @@ public class FutureUtils {
throw te;
}
}
public static <T> T unwrappingFuture(final Supplier<CompletableFuture<T>> supplier) {
try {
return supplier.get().join();
} catch (CompletionException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
}
throw e;
}
}
}

View file

@ -1,5 +1,5 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates.
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
@ -12,18 +12,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.common;
package software.amazon.kinesis.leases.dynamodb;
public class StackTraceUtils {
public static String getPrintableStackTrace(final StackTraceElement[] stackTrace) {
final StringBuilder stackTraceString = new StringBuilder();
import lombok.AccessLevel;
import lombok.NoArgsConstructor;
for (final StackTraceElement traceElement : stackTrace) {
stackTraceString.append("\tat ").append(traceElement).append("\n");
}
/**
* This class is just a holder for initial lease table IOPs units. This class will be removed in a future release.
*/
@Deprecated
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class TableConstants {
public static final long DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY = 10L;
public static final long DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY = 10L;
return stackTraceString.toString();
}
}

View file

@ -18,6 +18,7 @@ package software.amazon.kinesis.coordinator;
import lombok.Data;
import lombok.NonNull;
import lombok.experimental.Accessors;
import software.amazon.kinesis.common.DdbTableConfig;
import software.amazon.kinesis.leases.NoOpShardPrioritization;
import software.amazon.kinesis.leases.ShardPrioritization;
@ -27,6 +28,14 @@ import software.amazon.kinesis.leases.ShardPrioritization;
@Data
@Accessors(fluent = true)
public class CoordinatorConfig {
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
public CoordinatorConfig(final String applicationName) {
this.applicationName = applicationName;
this.coordinatorStateConfig = new CoordinatorStateTableConfig(applicationName);
}
/**
* Application name used by checkpointer to checkpoint.
*
@ -96,4 +105,53 @@ public class CoordinatorConfig {
* <p>Default value: 1000L</p>
*/
private long schedulerInitializationBackoffTimeMillis = 1000L;
/**
* Version the KCL needs to operate in. For more details check the KCLv3 migration
* documentation.
*/
public enum ClientVersionConfig {
/**
* For an application that was operating with previous KCLv2.x, during
* upgrade to KCLv3.x, a migration process is needed due to the incompatible
* changes between the 2 versions. During the migration process, application
* must use ClientVersion=CLIENT_VERSION_COMPATIBLE_WITH_2x so that it runs in
* a compatible mode until all workers in the cluster have upgraded to the version
* running 3.x version (which is determined based on workers emitting WorkerMetricStats)
* Once all known workers are in 3.x mode, the library auto toggles to 3.x mode;
* but prior to that it runs in a mode compatible with 2.x workers.
* This version also allows rolling back to the compatible mode from the
* auto-toggled 3.x mode.
*/
CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2x,
/**
* A new application operating with KCLv3.x will use this value. Also, an application
* that has successfully upgraded to 3.x version and no longer needs the ability
* for a rollback to a 2.x compatible version, will use this value. In this version,
* KCL will operate with new algorithms introduced in 3.x which is not compatible
* with prior versions. And once in this version, rollback to 2.x is not supported.
*/
CLIENT_VERSION_CONFIG_3x,
}
/**
* Client version KCL must operate in, by default it operates in 3.x version which is not
* compatible with prior versions.
*/
private ClientVersionConfig clientVersionConfig = ClientVersionConfig.CLIENT_VERSION_CONFIG_3x;
public static class CoordinatorStateTableConfig extends DdbTableConfig {
private CoordinatorStateTableConfig(final String applicationName) {
super(applicationName, "CoordinatorState");
}
}
/**
* Configuration to control how the CoordinatorState DDB table is created, such as table name,
* billing mode, provisioned capacity. If no table name is specified, the table name will
* default to applicationName-CoordinatorState. If no billing more is chosen, default is
* On-Demand.
*/
@NonNull
private final CoordinatorStateTableConfig coordinatorStateConfig;
}

View file

@ -0,0 +1,52 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator;
import java.util.Map;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
/**
* DataModel for CoordinatorState, this data model is used to store various state information required
* for coordination across the KCL worker fleet. Therefore, the model follows a flexible schema.
*/
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor(access = AccessLevel.PRIVATE)
@Slf4j
@KinesisClientInternalApi
public class CoordinatorState {
public static final String COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME = "key";
/**
* Key value for the item in the CoordinatorState table used for leader
* election among the KCL workers. The attributes relevant to this item
* is dictated by the DDB Lock client implementation that is used to
* provide mutual exclusion.
*/
public static final String LEADER_HASH_KEY = "Leader";
private String key;
private Map<String, AttributeValue> attributes;
}

View file

@ -0,0 +1,417 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClientOptions.AmazonDynamoDBLockClientOptionsBuilder;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import software.amazon.awssdk.core.waiters.WaiterResponse;
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException;
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
import software.amazon.awssdk.services.dynamodb.model.DynamoDbException;
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement;
import software.amazon.awssdk.services.dynamodb.model.KeyType;
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughputExceededException;
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType;
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
import software.amazon.awssdk.services.dynamodb.model.TableDescription;
import software.amazon.awssdk.services.dynamodb.model.TableStatus;
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter;
import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.common.FutureUtils;
import software.amazon.kinesis.coordinator.CoordinatorConfig.CoordinatorStateTableConfig;
import software.amazon.kinesis.coordinator.migration.MigrationState;
import software.amazon.kinesis.leases.DynamoUtils;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import static software.amazon.kinesis.common.FutureUtils.unwrappingFuture;
import static software.amazon.kinesis.coordinator.CoordinatorState.COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME;
/**
* Data Access Object to abstract accessing {@link CoordinatorState} from
* the CoordinatorState DDB table.
*/
@Slf4j
public class CoordinatorStateDAO {
private final DynamoDbAsyncClient dynamoDbAsyncClient;
private final DynamoDbClient dynamoDbSyncClient;
private final CoordinatorStateTableConfig config;
public CoordinatorStateDAO(
final DynamoDbAsyncClient dynamoDbAsyncClient, final CoordinatorStateTableConfig config) {
this.dynamoDbAsyncClient = dynamoDbAsyncClient;
this.config = config;
this.dynamoDbSyncClient = createDelegateClient();
}
public void initialize() throws DependencyException {
createTableIfNotExists();
}
private DynamoDbClient createDelegateClient() {
return new DynamoDbAsyncToSyncClientAdapter(dynamoDbAsyncClient);
}
public AmazonDynamoDBLockClientOptionsBuilder getDDBLockClientOptionsBuilder() {
return AmazonDynamoDBLockClientOptions.builder(dynamoDbSyncClient, config.tableName())
.withPartitionKeyName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME);
}
/**
* List all the {@link CoordinatorState} from the DDB table synchronously
*
* @throws DependencyException if DynamoDB scan fails in an unexpected way
* @throws InvalidStateException if ddb table does not exist
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
*
* @return list of state
*/
public List<CoordinatorState> listCoordinatorState()
throws ProvisionedThroughputException, DependencyException, InvalidStateException {
log.debug("Listing coordinatorState");
final ScanRequest request =
ScanRequest.builder().tableName(config.tableName()).build();
try {
ScanResponse response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(request));
final List<CoordinatorState> stateList = new ArrayList<>();
while (Objects.nonNull(response)) {
log.debug("Scan response {}", response);
response.items().stream().map(this::fromDynamoRecord).forEach(stateList::add);
if (!CollectionUtils.isNullOrEmpty(response.lastEvaluatedKey())) {
final ScanRequest continuationRequest = request.toBuilder()
.exclusiveStartKey(response.lastEvaluatedKey())
.build();
log.debug("Scan request {}", continuationRequest);
response = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.scan(continuationRequest));
} else {
log.debug("Scan finished");
response = null;
}
}
return stateList;
} catch (final ProvisionedThroughputExceededException e) {
log.warn(
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ " on the table.",
config.tableName());
throw new ProvisionedThroughputException(e);
} catch (final ResourceNotFoundException e) {
throw new InvalidStateException(
String.format("Cannot list coordinatorState, because table %s does not exist", config.tableName()));
} catch (final DynamoDbException e) {
throw new DependencyException(e);
}
}
/**
* Create a new {@link CoordinatorState} if it does not exist.
* @param state the state to create
* @return true if state was created, false if it already exists
*
* @throws DependencyException if DynamoDB put fails in an unexpected way
* @throws InvalidStateException if lease table does not exist
* @throws ProvisionedThroughputException if DynamoDB put fails due to lack of capacity
*/
public boolean createCoordinatorStateIfNotExists(final CoordinatorState state)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
log.debug("Creating coordinatorState {}", state);
final PutItemRequest request = PutItemRequest.builder()
.tableName(config.tableName())
.item(toDynamoRecord(state))
.expected(getDynamoNonExistentExpectation())
.build();
try {
FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.putItem(request));
} catch (final ConditionalCheckFailedException e) {
log.info("Not creating coordinator state because the key already exists");
return false;
} catch (final ProvisionedThroughputExceededException e) {
log.warn(
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ " on the table.",
config.tableName());
throw new ProvisionedThroughputException(e);
} catch (final ResourceNotFoundException e) {
throw new InvalidStateException(String.format(
"Cannot create coordinatorState %s, because table %s does not exist", state, config.tableName()));
} catch (final DynamoDbException e) {
throw new DependencyException(e);
}
log.info("Created CoordinatorState: {}", state);
return true;
}
/**
* @param key Get the CoordinatorState for this key
*
* @throws InvalidStateException if ddb table does not exist
* @throws ProvisionedThroughputException if DynamoDB get fails due to lack of capacity
* @throws DependencyException if DynamoDB get fails in an unexpected way
*
* @return state for the specified key, or null if one doesn't exist
*/
public CoordinatorState getCoordinatorState(@NonNull final String key)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
log.debug("Getting coordinatorState with key {}", key);
final GetItemRequest request = GetItemRequest.builder()
.tableName(config.tableName())
.key(getCoordinatorStateKey(key))
.consistentRead(true)
.build();
try {
final GetItemResponse result = FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.getItem(request));
final Map<String, AttributeValue> dynamoRecord = result.item();
if (CollectionUtils.isNullOrEmpty(dynamoRecord)) {
log.debug("No coordinatorState found with key {}, returning null.", key);
return null;
}
return fromDynamoRecord(dynamoRecord);
} catch (final ProvisionedThroughputExceededException e) {
log.warn(
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ " on the table.",
config.tableName());
throw new ProvisionedThroughputException(e);
} catch (final ResourceNotFoundException e) {
throw new InvalidStateException(String.format(
"Cannot get coordinatorState for key %s, because table %s does not exist",
key, config.tableName()));
} catch (final DynamoDbException e) {
throw new DependencyException(e);
}
}
/**
* Update fields of the given coordinator state in DynamoDB. Conditional on the provided expectation.
*
* @return true if update succeeded, false otherwise when expectations are not met
*
* @throws InvalidStateException if table does not exist
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
* @throws DependencyException if DynamoDB update fails in an unexpected way
*/
public boolean updateCoordinatorStateWithExpectation(
@NonNull final CoordinatorState state, final Map<String, ExpectedAttributeValue> expectations)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
final Map<String, ExpectedAttributeValue> expectationMap = getDynamoExistentExpectation(state.getKey());
expectationMap.putAll(MapUtils.emptyIfNull(expectations));
final Map<String, AttributeValueUpdate> updateMap = getDynamoCoordinatorStateUpdate(state);
final UpdateItemRequest request = UpdateItemRequest.builder()
.tableName(config.tableName())
.key(getCoordinatorStateKey(state.getKey()))
.expected(expectationMap)
.attributeUpdates(updateMap)
.build();
try {
FutureUtils.unwrappingFuture(() -> dynamoDbAsyncClient.updateItem(request));
} catch (final ConditionalCheckFailedException e) {
log.debug("CoordinatorState update {} failed because conditions were not met", state);
return false;
} catch (final ProvisionedThroughputExceededException e) {
log.warn(
"Provisioned throughput on {} has exceeded. It is recommended to increase the IOPs"
+ " on the table.",
config.tableName());
throw new ProvisionedThroughputException(e);
} catch (final ResourceNotFoundException e) {
throw new InvalidStateException(String.format(
"Cannot update coordinatorState for key %s, because table %s does not exist",
state.getKey(), config.tableName()));
} catch (final DynamoDbException e) {
throw new DependencyException(e);
}
log.info("Coordinator state updated {}", state);
return true;
}
private void createTableIfNotExists() throws DependencyException {
TableDescription tableDescription = getTableDescription();
if (tableDescription == null) {
final CreateTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.createTable(getRequest()));
tableDescription = response.tableDescription();
log.info("DDB Table: {} created", config.tableName());
} else {
log.info("Skipping DDB table {} creation as it already exists", config.tableName());
}
if (tableDescription.tableStatus() != TableStatus.ACTIVE) {
log.info("Waiting for DDB Table: {} to become active", config.tableName());
try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) {
final WaiterResponse<DescribeTableResponse> response =
unwrappingFuture(() -> waiter.waitUntilTableExists(
r -> r.tableName(config.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10))));
response.matched()
.response()
.orElseThrow(() -> new DependencyException(new IllegalStateException(
"Creating CoordinatorState table timed out",
response.matched().exception().orElse(null))));
}
}
}
private CreateTableRequest getRequest() {
final CreateTableRequest.Builder requestBuilder = CreateTableRequest.builder()
.tableName(config.tableName())
.keySchema(KeySchemaElement.builder()
.attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
.keyType(KeyType.HASH)
.build())
.attributeDefinitions(AttributeDefinition.builder()
.attributeName(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME)
.attributeType(ScalarAttributeType.S)
.build());
switch (config.billingMode()) {
case PAY_PER_REQUEST:
requestBuilder.billingMode(BillingMode.PAY_PER_REQUEST);
break;
case PROVISIONED:
requestBuilder.billingMode(BillingMode.PROVISIONED);
final ProvisionedThroughput throughput = ProvisionedThroughput.builder()
.readCapacityUnits(config.readCapacity())
.writeCapacityUnits(config.writeCapacity())
.build();
requestBuilder.provisionedThroughput(throughput);
break;
}
return requestBuilder.build();
}
private Map<String, AttributeValue> getCoordinatorStateKey(@NonNull final String key) {
return Collections.singletonMap(
COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(key));
}
private CoordinatorState fromDynamoRecord(final Map<String, AttributeValue> dynamoRecord) {
final HashMap<String, AttributeValue> attributes = new HashMap<>(dynamoRecord);
final String keyValue =
DynamoUtils.safeGetString(attributes.remove(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME));
final MigrationState migrationState = MigrationState.deserialize(keyValue, attributes);
if (migrationState != null) {
log.debug("Retrieved MigrationState {}", migrationState);
return migrationState;
}
final CoordinatorState c =
CoordinatorState.builder().key(keyValue).attributes(attributes).build();
log.debug("Retrieved coordinatorState {}", c);
return c;
}
private Map<String, AttributeValue> toDynamoRecord(final CoordinatorState state) {
final Map<String, AttributeValue> result = new HashMap<>();
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, DynamoUtils.createAttributeValue(state.getKey()));
if (state instanceof MigrationState) {
result.putAll(((MigrationState) state).serialize());
}
if (!CollectionUtils.isNullOrEmpty(state.getAttributes())) {
result.putAll(state.getAttributes());
}
return result;
}
private Map<String, ExpectedAttributeValue> getDynamoNonExistentExpectation() {
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
final ExpectedAttributeValue expectedAV =
ExpectedAttributeValue.builder().exists(false).build();
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
return result;
}
private Map<String, ExpectedAttributeValue> getDynamoExistentExpectation(final String keyValue) {
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
final ExpectedAttributeValue expectedAV = ExpectedAttributeValue.builder()
.value(AttributeValue.fromS(keyValue))
.build();
result.put(COORDINATOR_STATE_TABLE_HASH_KEY_ATTRIBUTE_NAME, expectedAV);
return result;
}
private Map<String, AttributeValueUpdate> getDynamoCoordinatorStateUpdate(final CoordinatorState state) {
final HashMap<String, AttributeValueUpdate> updates = new HashMap<>();
if (state instanceof MigrationState) {
updates.putAll(((MigrationState) state).getDynamoUpdate());
}
state.getAttributes()
.forEach((attribute, value) -> updates.put(
attribute,
AttributeValueUpdate.builder()
.value(value)
.action(AttributeAction.PUT)
.build()));
return updates;
}
private TableDescription getTableDescription() {
try {
final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(
DescribeTableRequest.builder().tableName(config.tableName()).build()));
return response.table();
} catch (final ResourceNotFoundException e) {
return null;
}
}
}

View file

@ -28,12 +28,17 @@ import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.awssdk.utils.CollectionUtils;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
/**
* An implementation of the {@code LeaderDecider} to elect leader(s) based on workerId.
@ -46,7 +51,7 @@ import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
* This ensures redundancy for shard-sync during host failures.
*/
@Slf4j
class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
public class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
// Fixed seed so that the shuffle order is preserved across workers
static final int DETERMINISTIC_SHUFFLE_SEED = 1947;
@ -59,6 +64,7 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
private final LeaseRefresher leaseRefresher;
private final int numPeriodicShardSyncWorkers;
private final ScheduledExecutorService leaderElectionThreadPool;
private final MetricsFactory metricsFactory;
private volatile Set<String> leaders;
@ -67,11 +73,17 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
* @param leaderElectionThreadPool Thread-pool to be used for leaderElection.
* @param numPeriodicShardSyncWorkers Number of leaders that will be elected to perform periodic shard syncs.
*/
DeterministicShuffleShardSyncLeaderDecider(
public DeterministicShuffleShardSyncLeaderDecider(
LeaseRefresher leaseRefresher,
ScheduledExecutorService leaderElectionThreadPool,
int numPeriodicShardSyncWorkers) {
this(leaseRefresher, leaderElectionThreadPool, numPeriodicShardSyncWorkers, new ReentrantReadWriteLock());
int numPeriodicShardSyncWorkers,
MetricsFactory metricsFactory) {
this(
leaseRefresher,
leaderElectionThreadPool,
numPeriodicShardSyncWorkers,
new ReentrantReadWriteLock(),
metricsFactory);
}
/**
@ -84,11 +96,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
LeaseRefresher leaseRefresher,
ScheduledExecutorService leaderElectionThreadPool,
int numPeriodicShardSyncWorkers,
ReadWriteLock readWriteLock) {
ReadWriteLock readWriteLock,
MetricsFactory metricsFactory) {
this.leaseRefresher = leaseRefresher;
this.leaderElectionThreadPool = leaderElectionThreadPool;
this.numPeriodicShardSyncWorkers = numPeriodicShardSyncWorkers;
this.readWriteLock = readWriteLock;
this.metricsFactory = metricsFactory;
}
/*
@ -146,8 +160,13 @@ class DeterministicShuffleShardSyncLeaderDecider implements LeaderDecider {
ELECTION_SCHEDULING_INTERVAL_MILLIS,
TimeUnit.MILLISECONDS);
}
return executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
final boolean response = executeConditionCheckWithReadLock(() -> isWorkerLeaderForShardSync(workerId));
final MetricsScope metricsScope =
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
metricsScope.addData(
METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
MetricsUtil.endScope(metricsScope);
return response;
}
@Override

View file

@ -0,0 +1,403 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator;
import java.time.Duration;
import java.util.Objects;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction;
import java.util.function.Supplier;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Getter;
import lombok.experimental.Accessors;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode;
import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
import software.amazon.kinesis.coordinator.migration.ClientVersion;
import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsReporter;
import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
import static software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode.WORKER_UTILIZATION_AWARE_ASSIGNMENT;
import static software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager.DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD;
/**
* This class is responsible for initializing the KCL components that supports
* seamless upgrade from v2.x to v3.x.
* During specific versions, it also dynamically switches the functionality
* to be either vanilla 3.x or 2.x compatible.
*
* It is responsible for creating:
* 1. LeaderDecider
* 2. LAM
* 3. WorkerMetricStatsReporter
*
* It manages initializing the following components at initialization time
* 1. workerMetricsDAO and workerMetricsManager
* 2. leaderDecider
* 3. MigrationAdaptiveLeaseAssignmentModeProvider
*
* It updates the following components dynamically:
* 1. starts/stops LAM
* 2. starts/stops WorkerMetricStatsReporter
* 3. updates LeaseAssignmentMode to either DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT or WORKER_UTILIZATION_AWARE_ASSIGNMENT
* 4. creates GSI (deletion is done by KclMigrationTool)
* 5. creates WorkerMetricStats table (deletion is done by KclMigrationTool)
* 6. updates LeaderDecider to either DeterministicShuffleShardSyncLeaderDecider or DynamoDBLockBasedLeaderDecider
*/
@Slf4j
@KinesisClientInternalApi
@ThreadSafe
@Accessors(fluent = true)
public final class DynamicMigrationComponentsInitializer {
private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L;
@Getter
private final MetricsFactory metricsFactory;
@Getter
private final LeaseRefresher leaseRefresher;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ScheduledExecutorService workerMetricsThreadPool;
@Getter
private final WorkerMetricStatsDAO workerMetricsDAO;
private final WorkerMetricStatsManager workerMetricsManager;
private final ScheduledExecutorService lamThreadPool;
private final BiFunction<ScheduledExecutorService, LeaderDecider, LeaseAssignmentManager> lamCreator;
private final Supplier<MigrationAdaptiveLeaderDecider> adaptiveLeaderDeciderCreator;
private final Supplier<DeterministicShuffleShardSyncLeaderDecider> deterministicLeaderDeciderCreator;
private final Supplier<DynamoDBLockBasedLeaderDecider> ddbLockBasedLeaderDeciderCreator;
@Getter
private final String workerIdentifier;
private final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
@Getter
private final long workerMetricsExpirySeconds;
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseModeChangeConsumer;
@Getter
private LeaderDecider leaderDecider;
private LeaseAssignmentManager leaseAssignmentManager;
private ScheduledFuture<?> workerMetricsReporterFuture;
private LeaseAssignmentMode currentAssignmentMode;
private boolean dualMode;
private boolean initialized;
@Builder(access = AccessLevel.PACKAGE)
DynamicMigrationComponentsInitializer(
final MetricsFactory metricsFactory,
final LeaseRefresher leaseRefresher,
final CoordinatorStateDAO coordinatorStateDAO,
final ScheduledExecutorService workerMetricsThreadPool,
final WorkerMetricStatsDAO workerMetricsDAO,
final WorkerMetricStatsManager workerMetricsManager,
final ScheduledExecutorService lamThreadPool,
final BiFunction<ScheduledExecutorService, LeaderDecider, LeaseAssignmentManager> lamCreator,
final Supplier<MigrationAdaptiveLeaderDecider> adaptiveLeaderDeciderCreator,
final Supplier<DeterministicShuffleShardSyncLeaderDecider> deterministicLeaderDeciderCreator,
final Supplier<DynamoDBLockBasedLeaderDecider> ddbLockBasedLeaderDeciderCreator,
final String workerIdentifier,
final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider) {
this.metricsFactory = metricsFactory;
this.leaseRefresher = leaseRefresher;
this.coordinatorStateDAO = coordinatorStateDAO;
this.workerIdentifier = workerIdentifier;
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
this.workerMetricsExpirySeconds = Duration.ofMillis(DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
* workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis())
.getSeconds();
this.workerMetricsManager = workerMetricsManager;
this.workerMetricsDAO = workerMetricsDAO;
this.workerMetricsThreadPool = workerMetricsThreadPool;
this.lamThreadPool = lamThreadPool;
this.lamCreator = lamCreator;
this.adaptiveLeaderDeciderCreator = adaptiveLeaderDeciderCreator;
this.deterministicLeaderDeciderCreator = deterministicLeaderDeciderCreator;
this.ddbLockBasedLeaderDeciderCreator = ddbLockBasedLeaderDeciderCreator;
this.leaseModeChangeConsumer = leaseAssignmentModeProvider;
}
public void initialize(final ClientVersion migrationStateMachineStartingClientVersion) throws DependencyException {
if (initialized) {
log.info("Already initialized, nothing to do");
return;
}
// always collect metrics so that when we flip to start reporting we will have accurate historical data.
log.info("Start collection of WorkerMetricStats");
workerMetricsManager.startManager();
if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3x) {
initializeComponentsFor3x();
} else {
initializeComponentsForMigration(migrationStateMachineStartingClientVersion);
}
log.info("Initialized dual mode {} current assignment mode {}", dualMode, currentAssignmentMode);
log.info("Creating LAM");
leaseAssignmentManager = lamCreator.apply(lamThreadPool, leaderDecider);
log.info("Initializing {}", leaseModeChangeConsumer.getClass().getSimpleName());
leaseModeChangeConsumer.initialize(dualMode, currentAssignmentMode);
initialized = true;
}
private void initializeComponentsFor3x() {
log.info("Initializing for 3x functionality");
dualMode = false;
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
log.info("Initializing {}", leaderDecider.getClass().getSimpleName());
leaderDecider.initialize();
}
private void initializeComponentsForMigration(final ClientVersion migrationStateMachineStartingClientVersion) {
log.info("Initializing for migration to 3x");
dualMode = true;
final LeaderDecider initialLeaderDecider;
if (migrationStateMachineStartingClientVersion == ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK) {
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
initialLeaderDecider = ddbLockBasedLeaderDeciderCreator.get();
} else {
currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
initialLeaderDecider = deterministicLeaderDeciderCreator.get();
}
log.info("Initializing dualMode {} assignmentMode {}", dualMode, currentAssignmentMode);
final MigrationAdaptiveLeaderDecider adaptiveLeaderDecider = adaptiveLeaderDeciderCreator.get();
log.info(
"Initializing MigrationAdaptiveLeaderDecider with {}",
initialLeaderDecider.getClass().getSimpleName());
adaptiveLeaderDecider.updateLeaderDecider(initialLeaderDecider);
this.leaderDecider = adaptiveLeaderDecider;
}
void shutdown() {
log.info("Shutting down components");
if (initialized) {
log.info("Stopping LAM, LeaderDecider, workerMetrics reporting and collection");
leaseAssignmentManager.stop();
// leader decider is shut down later when scheduler is doing a final shutdown
// since scheduler still accesses the leader decider while shutting down
stopWorkerMetricsReporter();
workerMetricsManager.stopManager();
}
// lam does not manage lifecycle of its threadpool to easily stop/start dynamically.
// once migration code is obsolete (i.e. all 3x functionality is the baseline and no
// migration is needed), it can be moved inside lam
log.info("Shutting down lamThreadPool and workerMetrics reporter thread pool");
lamThreadPool.shutdown();
workerMetricsThreadPool.shutdown();
try {
if (!lamThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
lamThreadPool.shutdownNow();
}
} catch (final InterruptedException e) {
log.warn("Interrupted while waiting for shutdown of LeaseAssignmentManager ThreadPool", e);
lamThreadPool.shutdownNow();
}
try {
if (!workerMetricsThreadPool.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
workerMetricsThreadPool.shutdownNow();
}
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("Interrupted while waiting for shutdown of WorkerMetricStatsManager ThreadPool", e);
workerMetricsThreadPool.shutdownNow();
}
}
private void startWorkerMetricsReporting() throws DependencyException {
if (workerMetricsReporterFuture != null) {
log.info("Worker metrics reporting is already running...");
return;
}
log.info("Initializing WorkerMetricStats");
this.workerMetricsDAO.initialize();
log.info("Starting worker metrics reporter");
// Start with a delay for workerStatsManager to capture some values and start reporting.
workerMetricsReporterFuture = workerMetricsThreadPool.scheduleAtFixedRate(
new WorkerMetricStatsReporter(metricsFactory, workerIdentifier, workerMetricsManager, workerMetricsDAO),
workerUtilizationAwareAssignmentConfig.inMemoryWorkerMetricsCaptureFrequencyMillis() * 2L,
workerUtilizationAwareAssignmentConfig.workerMetricsReporterFreqInMillis(),
TimeUnit.MILLISECONDS);
}
private void stopWorkerMetricsReporter() {
log.info("Stopping worker metrics reporter");
if (workerMetricsReporterFuture != null) {
workerMetricsReporterFuture.cancel(false);
workerMetricsReporterFuture = null;
}
}
/**
* Create LeaseOwnerToLeaseKey GSI for the lease table
* @param blockingWait whether to wait for the GSI creation or not, if false, the gsi creation will be initiated
* but this call will not block for its creation
* @throws DependencyException If DDB fails unexpectedly when creating the GSI
*/
private void createGsi(final boolean blockingWait) throws DependencyException {
log.info("Creating Lease table GSI if it does not exist");
// KCLv3.0 always starts with GSI available
leaseRefresher.createLeaseOwnerToLeaseKeyIndexIfNotExists();
if (blockingWait) {
log.info("Waiting for Lease table GSI creation");
final long secondsBetweenPolls = 10L;
final long timeoutSeconds = 600L;
final boolean isIndexActive =
leaseRefresher.waitUntilLeaseOwnerToLeaseKeyIndexExists(secondsBetweenPolls, timeoutSeconds);
if (!isIndexActive) {
throw new DependencyException(
new IllegalStateException("Creating LeaseOwnerToLeaseKeyIndex on Lease table timed out"));
}
}
}
/**
* Initialize KCL with components and configuration to support upgrade from 2x. This can happen
* at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2x.
* Or Dynamically during roll-forward from ClientVersion.CLIENT_VERSION_2x.
*/
public synchronized void initializeClientVersionForUpgradeFrom2x(final ClientVersion fromClientVersion)
throws DependencyException {
log.info("Initializing KCL components for upgrade from 2x from {}", fromClientVersion);
createGsi(false);
startWorkerMetricsReporting();
// LAM is not started until the dynamic flip to 3xWithRollback
}
/**
* Initialize KCL with components and configuration to run vanilla 3x functionality. This can happen
* at KCL Worker startup when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3x, or dynamically
* during a new deployment when existing worker are in ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK
*/
public synchronized void initializeClientVersionFor3x(final ClientVersion fromClientVersion)
throws DependencyException {
log.info("Initializing KCL components for 3x from {}", fromClientVersion);
log.info("Initializing LeaseAssignmentManager, DDB-lock-based leader decider, WorkerMetricStats manager"
+ " and creating the Lease table GSI if it does not exist");
if (fromClientVersion == ClientVersion.CLIENT_VERSION_INIT) {
// gsi may already exist and be active for migrated application.
createGsi(true);
startWorkerMetricsReporting();
log.info("Starting LAM");
leaseAssignmentManager.start();
}
// nothing to do when transitioning from CLIENT_VERSION_3x_WITH_ROLLBACK.
}
/**
* Initialize KCL with components and configuration to run 2x compatible functionality
* while allowing roll-forward. This can happen at KCL Worker startup when MigrationStateMachine
* starts in ClientVersion.CLIENT_VERSION_2x (after a rollback)
* Or Dynamically during rollback from CLIENT_VERSION_UPGRADE_FROM_2x or CLIENT_VERSION_3x_WITH_ROLLBACK.
*/
public synchronized void initializeClientVersionFor2x(final ClientVersion fromClientVersion) {
log.info("Initializing KCL components for rollback to 2x from {}", fromClientVersion);
if (fromClientVersion != ClientVersion.CLIENT_VERSION_INIT) {
// dynamic rollback
stopWorkerMetricsReporter();
// Migration Tool will delete the lease table LeaseOwner GSI
// and WorkerMetricStats table
}
if (fromClientVersion == ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK) {
// we are rolling back after flip
currentAssignmentMode = DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT;
notifyLeaseAssignmentModeChange();
log.info("Stopping LAM");
leaseAssignmentManager.stop();
final LeaderDecider leaderDecider = deterministicLeaderDeciderCreator.get();
if (this.leaderDecider instanceof MigrationAdaptiveLeaderDecider) {
log.info(
"Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
} else {
throw new IllegalStateException(String.format("Unexpected leader decider %s", this.leaderDecider));
}
}
}
/**
* Initialize KCL with components and configuration to run vanilla 3x functionality
* while allowing roll-back to 2x functionality. This can happen at KCL Worker startup
* when MigrationStateMachine starts in ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK (after the flip)
* Or Dynamically during flip from CLIENT_VERSION_UPGRADE_FROM_2x.
*/
public synchronized void initializeClientVersionFor3xWithRollback(final ClientVersion fromClientVersion)
throws DependencyException {
log.info("Initializing KCL components for 3x with rollback from {}", fromClientVersion);
if (fromClientVersion == ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2x) {
// dynamic flip
currentAssignmentMode = WORKER_UTILIZATION_AWARE_ASSIGNMENT;
notifyLeaseAssignmentModeChange();
final LeaderDecider leaderDecider = ddbLockBasedLeaderDeciderCreator.get();
log.info("Updating LeaderDecider to {}", leaderDecider.getClass().getSimpleName());
((MigrationAdaptiveLeaderDecider) this.leaderDecider).updateLeaderDecider(leaderDecider);
} else {
startWorkerMetricsReporting();
}
log.info("Starting LAM");
leaseAssignmentManager.start();
}
/**
* Synchronously invoke the consumer to change the lease assignment mode.
*/
private void notifyLeaseAssignmentModeChange() {
if (dualMode) {
log.info("Notifying {} of {}", leaseModeChangeConsumer, currentAssignmentMode);
if (Objects.nonNull(leaseModeChangeConsumer)) {
try {
leaseModeChangeConsumer.updateLeaseAssignmentMode(currentAssignmentMode);
} catch (final Exception e) {
log.warn("LeaseAssignmentMode change consumer threw exception", e);
}
}
} else {
throw new IllegalStateException("Unexpected assignment mode change");
}
}
}

View file

@ -0,0 +1,144 @@
package software.amazon.kinesis.coordinator;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.function.Supplier;
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
import software.amazon.awssdk.services.dynamodb.DynamoDbClient;
import software.amazon.awssdk.services.dynamodb.model.BatchGetItemRequest;
import software.amazon.awssdk.services.dynamodb.model.BatchGetItemResponse;
import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemRequest;
import software.amazon.awssdk.services.dynamodb.model.BatchWriteItemResponse;
import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest;
import software.amazon.awssdk.services.dynamodb.model.CreateTableResponse;
import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest;
import software.amazon.awssdk.services.dynamodb.model.DeleteItemResponse;
import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest;
import software.amazon.awssdk.services.dynamodb.model.DeleteTableResponse;
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
import software.amazon.awssdk.services.dynamodb.model.GetItemRequest;
import software.amazon.awssdk.services.dynamodb.model.GetItemResponse;
import software.amazon.awssdk.services.dynamodb.model.PutItemRequest;
import software.amazon.awssdk.services.dynamodb.model.PutItemResponse;
import software.amazon.awssdk.services.dynamodb.model.QueryRequest;
import software.amazon.awssdk.services.dynamodb.model.QueryResponse;
import software.amazon.awssdk.services.dynamodb.model.ScanRequest;
import software.amazon.awssdk.services.dynamodb.model.ScanResponse;
import software.amazon.awssdk.services.dynamodb.model.UpdateItemRequest;
import software.amazon.awssdk.services.dynamodb.model.UpdateItemResponse;
import software.amazon.awssdk.services.dynamodb.paginators.BatchGetItemIterable;
import software.amazon.awssdk.services.dynamodb.paginators.QueryIterable;
import software.amazon.awssdk.services.dynamodb.paginators.ScanIterable;
/**
* DDB Lock client depends on DynamoDbClient and KCL only has DynamoDbAsyncClient configured.
* This wrapper delegates APIs from sync client to async client internally so that it can
* be used with the DDB Lock client.
*/
public class DynamoDbAsyncToSyncClientAdapter implements DynamoDbClient {
private final DynamoDbAsyncClient asyncClient;
public DynamoDbAsyncToSyncClientAdapter(final DynamoDbAsyncClient asyncClient) {
this.asyncClient = asyncClient;
}
@Override
public String serviceName() {
return asyncClient.serviceName();
}
@Override
public void close() {
asyncClient.close();
}
private <T> T handleException(final Supplier<CompletableFuture<T>> task) {
try {
return task.get().join();
} catch (final CompletionException e) {
rethrow(e.getCause());
return null;
}
}
@Override
public CreateTableResponse createTable(final CreateTableRequest request) {
return handleException(() -> asyncClient.createTable(request));
}
@Override
public DescribeTableResponse describeTable(final DescribeTableRequest request) {
return handleException(() -> asyncClient.describeTable(request));
}
@Override
public DeleteTableResponse deleteTable(final DeleteTableRequest request) {
return handleException(() -> asyncClient.deleteTable(request));
}
@Override
public DeleteItemResponse deleteItem(final DeleteItemRequest request) {
return handleException(() -> asyncClient.deleteItem(request));
}
@Override
public GetItemResponse getItem(final GetItemRequest request) {
return handleException(() -> asyncClient.getItem(request));
}
@Override
public PutItemResponse putItem(final PutItemRequest request) {
return handleException(() -> asyncClient.putItem(request));
}
@Override
public UpdateItemResponse updateItem(final UpdateItemRequest request) {
return handleException(() -> asyncClient.updateItem(request));
}
@Override
public QueryResponse query(final QueryRequest request) {
return handleException(() -> asyncClient.query(request));
}
@Override
public ScanResponse scan(final ScanRequest request) {
return handleException(() -> asyncClient.scan(request));
}
@Override
public QueryIterable queryPaginator(final QueryRequest request) {
return new QueryIterable(this, request);
}
@Override
public ScanIterable scanPaginator(final ScanRequest request) {
return new ScanIterable(this, request);
}
@Override
public BatchGetItemResponse batchGetItem(final BatchGetItemRequest request) {
return handleException(() -> asyncClient.batchGetItem(request));
}
@Override
public BatchWriteItemResponse batchWriteItem(final BatchWriteItemRequest request) {
return handleException(() -> asyncClient.batchWriteItem(request));
}
@Override
public BatchGetItemIterable batchGetItemPaginator(final BatchGetItemRequest request) {
return new BatchGetItemIterable(this, request);
}
private static void rethrow(final Throwable e) {
castAndThrow(e);
}
@SuppressWarnings("unchecked")
private static <T extends Throwable> void castAndThrow(final Throwable e) throws T {
throw (T) e;
}
}

View file

@ -21,6 +21,8 @@ package software.amazon.kinesis.coordinator;
* worker is one of the leaders designated to execute shard-sync and then acts accordingly.
*/
public interface LeaderDecider {
String METRIC_OPERATION_LEADER_DECIDER = "LeaderDecider";
String METRIC_OPERATION_LEADER_DECIDER_IS_LEADER = METRIC_OPERATION_LEADER_DECIDER + ":IsLeader";
/**
* Method invoked to check the given workerId corresponds to one of the workers
@ -36,4 +38,32 @@ public interface LeaderDecider {
* being used in the LeaderDecider implementation.
*/
void shutdown();
/**
* Performs initialization tasks for decider if any.
*/
default void initialize() {
// No-op by default
}
/**
* Returns if any ACTIVE leader exists that is elected by the current implementation.
* Note: Some implementation (like DeterministicShuffleShardSyncLeaderDecider) will always have a leader and will
* return true always.
*/
default boolean isAnyLeaderElected() {
return true;
}
/**
* If the current worker is the leader, then releases the leadership else does nothing.
* This might not be relevant for some implementations, for e.g. DeterministicShuffleShardSyncLeaderDecider does
* not have mechanism to release leadership.
*
* Current worker if leader releases leadership, it's possible that the current worker assume leadership sometime
* later again in future elections.
*/
default void releaseLeadershipIfHeld() {
// No-op by default
}
}

View file

@ -0,0 +1,126 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
/**
* Provides the lease assignment mode KCL must operate in during migration
* from 2.x to 3.x.
* KCL v2.x lease assignment is based on distributed-worker-stealing algorithm
* which balances lease count across workers.
* KCL v3.x lease assignment is based on a centralized-lease-assignment algorithm
* which balances resource utilization metrics(e.g. CPU utilization) across workers.
*
* For a new application starting in KCL v3.x, there is no migration needed,
* so KCL will initialize with the lease assignment mode accordingly, and it will
* not change dynamically.
*
* During upgrade from 2.x to 3.x, KCL library needs an ability to
* start in v2.x assignment mode but dynamically change to v3.x assignment.
* In this case, both 2.x and 3.x lease assignment will be running but one
* of them will be a no-op based on the mode.
*
* The methods and internal state is guarded for concurrent access to allow
* both lease assignment algorithms to access the state concurrently while
* it could be dynamically updated.
*/
@KinesisClientInternalApi
@Slf4j
@ThreadSafe
@NoArgsConstructor
public final class MigrationAdaptiveLeaseAssignmentModeProvider {
public enum LeaseAssignmentMode {
/**
* This is the 2.x assignment mode.
* This mode assigns leases based on the number of leases.
* This mode involves each worker independently determining how many leases to pick or how many leases to steal
* from other workers.
*/
DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT,
/**
* This is the 3.x assigment mode.
* This mode uses each worker's resource utilization to perform lease assignment.
* Assignment is done by a single worker (elected leader), which looks at WorkerMetricStats for each worker to
* determine lease assignment.
*
* This mode primarily does
* 1. Starts WorkerMetricStatsManager on the worker which starts publishing WorkerMetricStats
* 2. Starts the LeaseDiscoverer
* 3. Creates if not already available the LeaseOwnerToLeaseKey GSI on the lease table and validate that is
* ACTIVE.
*/
WORKER_UTILIZATION_AWARE_ASSIGNMENT;
}
private LeaseAssignmentMode currentMode;
private boolean initialized = false;
private boolean dynamicModeChangeSupportNeeded;
/**
* Specify whether both lease assignment algorithms should be initialized to
* support dynamically changing lease mode.
* @return true if lease assignment mode can change dynamically
* false otherwise.
*/
public synchronized boolean dynamicModeChangeSupportNeeded() {
return dynamicModeChangeSupportNeeded;
}
/**
* Provide the current lease assignment mode in which KCL should perform lease assignment
* @return the current lease assignment mode
*/
public synchronized LeaseAssignmentMode getLeaseAssignmentMode() {
if (!initialized) {
throw new IllegalStateException("AssignmentMode is not initialized");
}
return currentMode;
}
synchronized void initialize(final boolean dynamicModeChangeSupportNeeded, final LeaseAssignmentMode mode) {
if (!initialized) {
log.info("Initializing dynamicModeChangeSupportNeeded {} mode {}", dynamicModeChangeSupportNeeded, mode);
this.dynamicModeChangeSupportNeeded = dynamicModeChangeSupportNeeded;
this.currentMode = mode;
this.initialized = true;
return;
}
log.info(
"Already initialized dynamicModeChangeSupportNeeded {} mode {}. Ignoring new values {}, {}",
this.dynamicModeChangeSupportNeeded,
this.currentMode,
dynamicModeChangeSupportNeeded,
mode);
}
synchronized void updateLeaseAssignmentMode(final LeaseAssignmentMode mode) {
if (!initialized) {
throw new IllegalStateException("Cannot change mode before initializing");
}
if (dynamicModeChangeSupportNeeded) {
log.info("Changing Lease assignment mode from {} to {}", currentMode, mode);
this.currentMode = mode;
return;
}
throw new IllegalStateException(String.format(
"Lease assignment mode already initialized to %s cannot" + " change to %s", this.currentMode, mode));
}
}

View file

@ -87,7 +87,7 @@ class PeriodicShardSyncManager {
private final Map<StreamIdentifier, HashRangeHoleTracker> hashRangeHoleTrackerMap = new HashMap<>();
private final String workerId;
private final LeaderDecider leaderDecider;
private LeaderDecider leaderDecider;
private final LeaseRefresher leaseRefresher;
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap;
private final Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider;
@ -105,7 +105,6 @@ class PeriodicShardSyncManager {
PeriodicShardSyncManager(
String workerId,
LeaderDecider leaderDecider,
LeaseRefresher leaseRefresher,
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
@ -117,7 +116,6 @@ class PeriodicShardSyncManager {
AtomicBoolean leaderSynced) {
this(
workerId,
leaderDecider,
leaseRefresher,
currentStreamConfigMap,
shardSyncTaskManagerProvider,
@ -132,7 +130,6 @@ class PeriodicShardSyncManager {
PeriodicShardSyncManager(
String workerId,
LeaderDecider leaderDecider,
LeaseRefresher leaseRefresher,
Map<StreamIdentifier, StreamConfig> currentStreamConfigMap,
Function<StreamConfig, ShardSyncTaskManager> shardSyncTaskManagerProvider,
@ -144,9 +141,7 @@ class PeriodicShardSyncManager {
int leasesRecoveryAuditorInconsistencyConfidenceThreshold,
AtomicBoolean leaderSynced) {
Validate.notBlank(workerId, "WorkerID is required to initialize PeriodicShardSyncManager.");
Validate.notNull(leaderDecider, "LeaderDecider is required to initialize PeriodicShardSyncManager.");
this.workerId = workerId;
this.leaderDecider = leaderDecider;
this.leaseRefresher = leaseRefresher;
this.currentStreamConfigMap = currentStreamConfigMap;
this.shardSyncTaskManagerProvider = shardSyncTaskManagerProvider;
@ -160,7 +155,9 @@ class PeriodicShardSyncManager {
this.leaderSynced = leaderSynced;
}
public synchronized TaskResult start() {
public synchronized TaskResult start(final LeaderDecider leaderDecider) {
Validate.notNull(leaderDecider, "LeaderDecider is required to start PeriodicShardSyncManager.");
this.leaderDecider = leaderDecider;
if (!isRunning) {
final Runnable periodicShardSyncer = () -> {
try {
@ -435,7 +432,7 @@ class PeriodicShardSyncManager {
leaseRefresher.updateLeaseWithMetaInfo(lease, UpdateField.HASH_KEY_RANGE);
} catch (Exception e) {
log.warn(
"Unable to update hash range key information for lease {} of stream {}."
"Unable to update hash range key information for lease {} of stream {}. "
+ "This may result in explicit lease sync.",
lease.leaseKey(),
streamIdentifier);

View file

@ -26,6 +26,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
@ -44,6 +45,7 @@ import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
import lombok.AccessLevel;
import lombok.Getter;
@ -55,15 +57,23 @@ import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.arns.Arn;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.utils.Validate;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.checkpoint.CheckpointConfig;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.StreamConfig;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.coordinator.assignment.LeaseAssignmentManager;
import software.amazon.kinesis.coordinator.migration.MigrationStateMachine;
import software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl;
import software.amazon.kinesis.leader.DynamoDBLockBasedLeaderDecider;
import software.amazon.kinesis.leader.MigrationAdaptiveLeaderDecider;
import software.amazon.kinesis.leases.HierarchicalShardSyncer;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCleanupManager;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseManagementConfig;
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig;
import software.amazon.kinesis.leases.LeaseManagementFactory;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.LeaseSerializer;
import software.amazon.kinesis.leases.MultiStreamLease;
@ -98,6 +108,9 @@ import software.amazon.kinesis.retrieval.AggregatorUtil;
import software.amazon.kinesis.retrieval.RecordsPublisher;
import software.amazon.kinesis.retrieval.RetrievalConfig;
import software.amazon.kinesis.schemaregistry.SchemaRegistryDecoder;
import software.amazon.kinesis.worker.WorkerMetricsSelector;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsManager;
import static software.amazon.kinesis.common.ArnUtil.constructStreamArn;
import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrategy.StreamsLeasesDeletionType;
@ -106,12 +119,14 @@ import static software.amazon.kinesis.processor.FormerStreamsLeasesDeletionStrat
/**
*
*/
@Getter
@Getter(AccessLevel.PRIVATE)
@Accessors(fluent = true)
@Slf4j
@KinesisClientInternalApi
public class Scheduler implements Runnable {
private static final int PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT = 1;
private static final long LEASE_TABLE_CHECK_FREQUENCY_MILLIS = 3 * 1000L;
private static final long MIN_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 1000L;
private static final long MAX_WAIT_TIME_FOR_LEASE_TABLE_CHECK_MILLIS = 30 * 1000L;
@ -133,7 +148,9 @@ public class Scheduler implements Runnable {
private final ProcessorConfig processorConfig;
private final RetrievalConfig retrievalConfig;
@Getter(AccessLevel.PACKAGE)
private final String applicationName;
private final int maxInitializationAttempts;
private final Checkpointer checkpoint;
private final long shardConsumerDispatchPollIntervalMillis;
@ -156,7 +173,10 @@ public class Scheduler implements Runnable {
private final long failoverTimeMillis;
private final long taskBackoffTimeMillis;
private final boolean isMultiStreamMode;
@Getter(AccessLevel.PACKAGE)
private final Map<StreamIdentifier, StreamConfig> currentStreamConfigMap = new StreamConfigMap();
private final StreamTracker streamTracker;
private final FormerStreamsLeasesDeletionStrategy formerStreamsLeasesDeletionStrategy;
private final long listShardsBackoffTimeMillis;
@ -167,19 +187,30 @@ public class Scheduler implements Runnable {
private final AggregatorUtil aggregatorUtil;
private final Function<StreamConfig, HierarchicalShardSyncer> hierarchicalShardSyncerProvider;
private final long schedulerInitializationBackoffTimeMillis;
private final LeaderDecider leaderDecider;
private LeaderDecider leaderDecider;
@Getter(AccessLevel.PACKAGE)
private final Map<StreamIdentifier, Instant> staleStreamDeletionMap = new HashMap<>();
private final LeaseCleanupManager leaseCleanupManager;
private final SchemaRegistryDecoder schemaRegistryDecoder;
@Getter(AccessLevel.PACKAGE)
private final DeletedStreamListProvider deletedStreamListProvider;
private final MigrationStateMachine migrationStateMachine;
private final DynamicMigrationComponentsInitializer migrationComponentsInitializer;
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
// Holds consumers for shards the worker is currently tracking. Key is shard
// info, value is ShardConsumer.
@Getter(AccessLevel.PACKAGE)
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap = new ConcurrentHashMap<>();
private volatile boolean shutdown;
private volatile long shutdownStartTimeMillis;
@Getter(AccessLevel.PACKAGE)
private volatile boolean shutdownComplete = false;
private final Object lock = new Object();
@ -187,8 +218,6 @@ public class Scheduler implements Runnable {
private final Stopwatch streamSyncWatch = Stopwatch.createUnstarted();
private boolean leasesSyncedOnAppInit = false;
@Getter(AccessLevel.NONE)
private final AtomicBoolean leaderSynced = new AtomicBoolean(false);
/**
@ -200,7 +229,6 @@ public class Scheduler implements Runnable {
* CountDownLatch used by the GracefulShutdownCoordinator. Reaching zero means that
* the scheduler's finalShutdown() call has completed.
*/
@Getter(AccessLevel.NONE)
private final CountDownLatch finalShutdownLatch = new CountDownLatch(1);
@VisibleForTesting
@ -259,11 +287,32 @@ public class Scheduler implements Runnable {
// Determine leaseSerializer based on availability of MultiStreamTracker.
final LeaseSerializer leaseSerializer =
isMultiStreamMode ? new DynamoDBMultiStreamLeaseSerializer() : new DynamoDBLeaseSerializer();
this.leaseCoordinator = this.leaseManagementConfig
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
.createLeaseCoordinator(this.metricsFactory);
final LeaseManagementFactory leaseManagementFactory =
this.leaseManagementConfig.leaseManagementFactory(leaseSerializer, isMultiStreamMode);
this.leaseCoordinator =
leaseManagementFactory.createLeaseCoordinator(this.metricsFactory, shardInfoShardConsumerMap);
this.leaseRefresher = this.leaseCoordinator.leaseRefresher();
final CoordinatorStateDAO coordinatorStateDAO = new CoordinatorStateDAO(
leaseManagementConfig.dynamoDBClient(), coordinatorConfig().coordinatorStateConfig());
this.leaseAssignmentModeProvider = new MigrationAdaptiveLeaseAssignmentModeProvider();
this.migrationComponentsInitializer = createDynamicMigrationComponentsInitializer(coordinatorStateDAO);
this.migrationStateMachine = new MigrationStateMachineImpl(
metricsFactory,
System::currentTimeMillis,
coordinatorStateDAO,
Executors.newScheduledThreadPool(
2,
new ThreadFactoryBuilder()
.setNameFormat("MigrationStateMachine-%04d")
.build()),
coordinatorConfig.clientVersionConfig(),
new Random(),
this.migrationComponentsInitializer,
leaseManagementConfig.workerIdentifier(),
Duration.ofMinutes(10).getSeconds());
//
// TODO: Figure out what to do with lease manage <=> checkpoint relationship
//
@ -280,9 +329,8 @@ public class Scheduler implements Runnable {
this.diagnosticEventFactory = diagnosticEventFactory;
this.diagnosticEventHandler = new DiagnosticEventLogger();
this.deletedStreamListProvider = new DeletedStreamListProvider();
this.shardSyncTaskManagerProvider = streamConfig -> this.leaseManagementConfig
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
.createShardSyncTaskManager(this.metricsFactory, streamConfig, this.deletedStreamListProvider);
this.shardSyncTaskManagerProvider = streamConfig -> leaseManagementFactory.createShardSyncTaskManager(
this.metricsFactory, streamConfig, this.deletedStreamListProvider);
this.shardPrioritization = this.coordinatorConfig.shardPrioritization();
this.cleanupLeasesUponShardCompletion = this.leaseManagementConfig.cleanupLeasesUponShardCompletion();
this.skipShardSyncAtWorkerInitializationIfLeasesExist =
@ -299,8 +347,6 @@ public class Scheduler implements Runnable {
this.workerStateChangeListener =
this.coordinatorConfig.coordinatorFactory().createWorkerStateChangeListener();
}
this.leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(
leaseRefresher, Executors.newSingleThreadScheduledExecutor(), PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT);
this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis();
this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis();
this.listShardsBackoffTimeMillis = this.retrievalConfig.listShardsBackoffTimeInMillis();
@ -315,7 +361,6 @@ public class Scheduler implements Runnable {
this.coordinatorConfig.schedulerInitializationBackoffTimeMillis();
this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager(
leaseManagementConfig.workerIdentifier(),
leaderDecider,
leaseRefresher,
currentStreamConfigMap,
shardSyncTaskManagerProvider,
@ -325,14 +370,69 @@ public class Scheduler implements Runnable {
leaseManagementConfig.leasesRecoveryAuditorExecutionFrequencyMillis(),
leaseManagementConfig.leasesRecoveryAuditorInconsistencyConfidenceThreshold(),
leaderSynced);
this.leaseCleanupManager = this.leaseManagementConfig
.leaseManagementFactory(leaseSerializer, isMultiStreamMode)
.createLeaseCleanupManager(metricsFactory);
this.leaseCleanupManager = leaseManagementFactory.createLeaseCleanupManager(metricsFactory);
this.schemaRegistryDecoder = this.retrievalConfig.glueSchemaRegistryDeserializer() == null
? null
: new SchemaRegistryDecoder(this.retrievalConfig.glueSchemaRegistryDeserializer());
}
/**
* Depends on LeaseCoordinator and LeaseRefresher to be created first
*/
private DynamicMigrationComponentsInitializer createDynamicMigrationComponentsInitializer(
final CoordinatorStateDAO coordinatorStateDAO) {
selectWorkerMetricsIfAvailable(leaseManagementConfig.workerUtilizationAwareAssignmentConfig());
final WorkerMetricStatsManager workerMetricsManager = new WorkerMetricStatsManager(
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().noOfPersistedMetricsPerWorkerMetrics(),
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricList(),
metricsFactory,
leaseManagementConfig
.workerUtilizationAwareAssignmentConfig()
.inMemoryWorkerMetricsCaptureFrequencyMillis());
final WorkerMetricStatsDAO workerMetricsDAO = new WorkerMetricStatsDAO(
leaseManagementConfig.dynamoDBClient(),
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsTableConfig(),
leaseManagementConfig.workerUtilizationAwareAssignmentConfig().workerMetricsReporterFreqInMillis());
return DynamicMigrationComponentsInitializer.builder()
.metricsFactory(metricsFactory)
.leaseRefresher(leaseRefresher)
.coordinatorStateDAO(coordinatorStateDAO)
.workerMetricsThreadPool(Executors.newScheduledThreadPool(
1,
new ThreadFactoryBuilder()
.setNameFormat("worker-metrics-reporter")
.build()))
.workerMetricsDAO(workerMetricsDAO)
.workerMetricsManager(workerMetricsManager)
.lamThreadPool(Executors.newScheduledThreadPool(
1,
new ThreadFactoryBuilder().setNameFormat("lam-thread").build()))
.lamCreator((lamThreadPool, leaderDecider) -> new LeaseAssignmentManager(
leaseRefresher,
workerMetricsDAO,
leaderDecider,
leaseManagementConfig.workerUtilizationAwareAssignmentConfig(),
leaseCoordinator.workerIdentifier(),
leaseManagementConfig.failoverTimeMillis(),
metricsFactory,
lamThreadPool,
System::nanoTime,
leaseManagementConfig.maxLeasesForWorker(),
leaseManagementConfig.gracefulLeaseHandoffConfig()))
.adaptiveLeaderDeciderCreator(() -> new MigrationAdaptiveLeaderDecider(metricsFactory))
.deterministicLeaderDeciderCreator(() -> new DeterministicShuffleShardSyncLeaderDecider(
leaseRefresher, Executors.newSingleThreadScheduledExecutor(), 1, metricsFactory))
.ddbLockBasedLeaderDeciderCreator(() -> DynamoDBLockBasedLeaderDecider.create(
coordinatorStateDAO, leaseCoordinator.workerIdentifier(), metricsFactory))
.workerIdentifier(leaseCoordinator.workerIdentifier())
.workerUtilizationAwareAssignmentConfig(leaseManagementConfig.workerUtilizationAwareAssignmentConfig())
.leaseAssignmentModeProvider(leaseAssignmentModeProvider)
.build();
}
/**
* Start consuming data from the stream, and pass it to the application record processors.
*/
@ -342,13 +442,19 @@ public class Scheduler implements Runnable {
return;
}
final MetricsScope metricsScope =
MetricsUtil.createMetricsWithOperation(metricsFactory, "Scheduler:Initialize");
boolean success = false;
try {
initialize();
success = true;
log.info("Initialization complete. Starting worker loop.");
} catch (RuntimeException e) {
log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e);
workerStateChangeListener.onAllInitializationAttemptsFailed(e);
shutdown();
} finally {
MetricsUtil.addSuccess(metricsScope, "Initialize", success, MetricsLevel.SUMMARY);
}
while (!shouldShutdown()) {
runProcessLoop();
@ -363,14 +469,13 @@ public class Scheduler implements Runnable {
synchronized (lock) {
registerErrorHandlerForUndeliverableAsyncTaskExceptions();
workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.INITIALIZING);
boolean isDone = false;
Exception lastException = null;
for (int i = 0; (!isDone) && (i < maxInitializationAttempts); i++) {
try {
log.info("Initializing LeaseCoordinator attempt {}", (i + 1));
leaseCoordinator.initialize();
if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) {
if (shouldInitiateLeaseSync()) {
log.info(
@ -382,21 +487,29 @@ public class Scheduler implements Runnable {
log.info("Skipping shard sync per configuration setting (and lease table is not empty)");
}
// Initialize the state machine after lease table has been initialized
// Migration state machine creates and waits for GSI if necessary,
// it must be initialized before starting leaseCoordinator, which runs LeaseDiscoverer
// and that requires GSI to be present and active. (migrationStateMachine.initialize is idempotent)
migrationStateMachine.initialize();
leaderDecider = migrationComponentsInitializer.leaderDecider();
leaseCleanupManager.start();
// If we reach this point, then we either skipped the lease sync or did not have any exception
// for any of the shard sync in the previous attempt.
if (!leaseCoordinator.isRunning()) {
log.info("Starting LeaseCoordinator");
leaseCoordinator.start();
leaseCoordinator.start(leaseAssignmentModeProvider);
} else {
log.info("LeaseCoordinator is already running. No need to start it.");
}
log.info("Scheduling periodicShardSync");
leaderElectedPeriodicShardSyncManager.start();
leaderElectedPeriodicShardSyncManager.start(leaderDecider);
streamSyncWatch.start();
isDone = true;
} catch (Exception e) {
} catch (final Exception e) {
log.error("Caught exception when initializing LeaseCoordinator", e);
lastException = e;
}
@ -863,7 +976,7 @@ public class Scheduler implements Runnable {
leaseCoordinator, lease, notificationCompleteLatch, shutdownCompleteLatch);
ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
if (consumer != null) {
if (consumer != null && !consumer.isShutdown()) {
consumer.gracefulShutdown(shutdownNotification);
} else {
//
@ -912,6 +1025,8 @@ public class Scheduler implements Runnable {
shutdown = true;
shutdownStartTimeMillis = System.currentTimeMillis();
migrationStateMachine.shutdown();
migrationComponentsInitializer.shutdown();
// Stop lease coordinator, so leases are not renewed or stolen from other workers.
// Lost leases will force Worker to begin shutdown process for all shard consumers in
// Worker.run().
@ -1228,4 +1343,23 @@ public class Scheduler implements Runnable {
public Future<Void> requestShutdown() {
return null;
}
/**
* If WorkerMetricStats list is empty and the disable flag is false, select WorkerMetricStats automatically.
*/
private void selectWorkerMetricsIfAvailable(
final WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig) {
try {
if (workerUtilizationAwareAssignmentConfig.workerMetricList().isEmpty()
&& !workerUtilizationAwareAssignmentConfig.disableWorkerMetrics()) {
workerUtilizationAwareAssignmentConfig.workerMetricList(
WorkerMetricsSelector.create().getDefaultWorkerMetrics());
}
} catch (final Exception e) {
log.warn(
"Exception encountered during WorkerMetricStats selection. If this is persistent please try setting the "
+ "WorkerMetricStats explicitly.",
e);
}
}
}

View file

@ -0,0 +1,21 @@
package software.amazon.kinesis.coordinator.assignment;
import java.util.List;
import software.amazon.kinesis.leases.Lease;
public interface LeaseAssignmentDecider {
/**
* Assigns expiredOrUnAssignedLeases to the available workers.
*/
void assignExpiredOrUnassignedLeases(final List<Lease> expiredOrUnAssignedLeases);
/**
* Balances the leases between workers in the fleet.
* Implementation can choose to balance leases based on lease count or throughput or to bring the variance in
* resource utilization to a minimum.
* Check documentation on implementation class to see how it balances the leases.
*/
void balanceWorkerVariance();
}

View file

@ -0,0 +1,719 @@
package software.amazon.kinesis.coordinator.assignment;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.LeaderDecider;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseManagementConfig;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import software.amazon.kinesis.metrics.NullMetricsScope;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
/**
* Performs the LeaseAssignment for the application. This starts by loading the leases and workerMetrics from the
* storage and then starts by assignment (in-memory) of expired and/or unassigned leases after which it tries to perform
* balancing of load among the workers by re-assign leases.
* In the end, performs actual assignment by writing to storage.
*/
@Slf4j
@RequiredArgsConstructor
@KinesisClientInternalApi
public final class LeaseAssignmentManager {
/**
* Default number of continuous failure execution after which leadership is released.
*/
private static final int DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER = 3;
/**
* Default multiplier for LAM frequency with respect to leaseDurationMillis (lease failover millis).
* If leaseDurationMillis is 10000 millis, default LAM frequency is 20000 millis.
*/
private static final int DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER = 2;
/**
* Default parallelism factor for scaling lease table.
*/
private static final int DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR = 10;
private static final String FORCE_LEADER_RELEASE_METRIC_NAME = "ForceLeaderRelease";
/**
* Default retry attempt for loading leases and workers before giving up.
*/
private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
/**
* Internal threadpool used to parallely perform assignment operation by calling storage.
*/
private static final ExecutorService LEASE_ASSIGNMENT_CALL_THREAD_POOL =
Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
private static final String METRICS_LEASE_ASSIGNMENT_MANAGER = "LeaseAssignmentManager";
private static final String METRICS_INCOMPLETE_EXPIRED_LEASES_ASSIGNMENT =
"LeaseAssignmentManager.IncompleteExpiredLeasesAssignment";
public static final int DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD = 2;
private final LeaseRefresher leaseRefresher;
private final WorkerMetricStatsDAO workerMetricsDAO;
private final LeaderDecider leaderDecider;
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig config;
private final String currentWorkerId;
private final Long leaseDurationMillis;
private final MetricsFactory metricsFactory;
private final ScheduledExecutorService executorService;
private final Supplier<Long> nanoTimeProvider;
private final int maxLeasesForWorker;
private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig;
private boolean tookOverLeadershipInThisRun = false;
private final Map<String, Lease> prevRunLeasesState = new HashMap<>();
private Future<?> managerFuture;
private int noOfContinuousFailedAttempts = 0;
private int lamRunCounter = 0;
public synchronized void start() {
if (isNull(managerFuture)) {
// LAM can be dynamically started/stopped and restarted during MigrationStateMachine execution
// so reset the flag to refresh the state before processing during a restart of LAM.
tookOverLeadershipInThisRun = false;
managerFuture = executorService.scheduleWithFixedDelay(
this::performAssignment,
0L,
leaseDurationMillis * DEFAULT_LEASE_ASSIGNMENT_MANAGER_FREQ_MULTIPLIER,
TimeUnit.MILLISECONDS);
log.info("Started LeaseAssignmentManager");
return;
}
log.info("LeaseAssignmentManager already running...");
}
public synchronized void stop() {
if (nonNull(managerFuture)) {
log.info("Completed shutdown of LeaseAssignmentManager");
managerFuture.cancel(true);
managerFuture = null;
return;
}
log.info("LeaseAssignmentManager is not running...");
}
/**
* Creates the MetricsScope for given {@param operation} by calling metricsFactory and falls back to
* NullMetricsScope if failed to create MetricsScope.
* @param operation Operation name for MetricsScope
* @return instance of MetricsScope
*/
private MetricsScope createMetricsScope(final String operation) {
try {
return MetricsUtil.createMetricsWithOperation(metricsFactory, operation);
} catch (final Exception e) {
log.error("Failed to create metrics scope defaulting to no metrics.", e);
return new NullMetricsScope();
}
}
private void performAssignment() {
final MetricsScope metricsScope = createMetricsScope(METRICS_LEASE_ASSIGNMENT_MANAGER);
final long startTime = System.currentTimeMillis();
boolean success = false;
try {
// If the current worker is not leader, then do nothing as assignment is executed on leader.
if (!leaderDecider.isLeader(currentWorkerId)) {
log.info("Current worker {} is not a leader, ignore", currentWorkerId);
this.tookOverLeadershipInThisRun = false;
success = true;
return;
}
if (!this.tookOverLeadershipInThisRun) {
// This means that there was leader change, perform cleanup of state as this is leader switch.
this.tookOverLeadershipInThisRun = true;
this.lamRunCounter = 0;
prepareAfterLeaderSwitch();
}
log.info("Current worker {} is a leader, performing assignment", currentWorkerId);
final InMemoryStorageView inMemoryStorageView = new InMemoryStorageView();
final long loadStartTime = System.currentTimeMillis();
inMemoryStorageView.loadInMemoryStorageView(metricsScope);
MetricsUtil.addLatency(metricsScope, "LeaseAndWorkerMetricsLoad", loadStartTime, MetricsLevel.DETAILED);
publishLeaseAndWorkerCountMetrics(metricsScope, inMemoryStorageView);
final LeaseAssignmentDecider leaseAssignmentDecider = new VarianceBasedLeaseAssignmentDecider(
inMemoryStorageView,
config.dampeningPercentage(),
config.reBalanceThresholdPercentage(),
config.allowThroughputOvershoot());
updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
inMemoryStorageView.getLeaseList(), inMemoryStorageView.getLeaseTableScanTime());
// This does not include the leases from the worker that has expired (based on WorkerMetricStats's
// lastUpdateTime)
// but the lease is not expired (based on the leaseCounter on lease).
// If a worker has died, the lease will be expired and assigned in next iteration.
final List<Lease> expiredOrUnAssignedLeases = inMemoryStorageView.getLeaseList().stream()
.filter(lease -> lease.isExpired(
TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis),
inMemoryStorageView.getLeaseTableScanTime()))
// marking them for direct reassignment.
.map(l -> l.isExpiredOrUnassigned(true))
.collect(Collectors.toList());
log.info("Total expiredOrUnassignedLeases count : {}", expiredOrUnAssignedLeases.size());
metricsScope.addData(
"ExpiredLeases", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
final long expiredAndUnassignedLeaseAssignmentStartTime = System.currentTimeMillis();
leaseAssignmentDecider.assignExpiredOrUnassignedLeases(expiredOrUnAssignedLeases);
MetricsUtil.addLatency(
metricsScope,
"AssignExpiredOrUnassignedLeases",
expiredAndUnassignedLeaseAssignmentStartTime,
MetricsLevel.DETAILED);
if (!expiredOrUnAssignedLeases.isEmpty()) {
// When expiredOrUnAssignedLeases is not empty, that means
// that we were not able to assign all expired or unassigned leases and hit the maxThroughput
// per worker for all workers.
log.warn("Not able to assign all expiredOrUnAssignedLeases");
metricsScope.addData(
"LeaseSpillover", expiredOrUnAssignedLeases.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
}
if (shouldRunVarianceBalancing()) {
final long balanceWorkerVarianceStartTime = System.currentTimeMillis();
final int totalNewAssignmentBeforeWorkerVarianceBalancing =
inMemoryStorageView.leaseToNewAssignedWorkerMap.size();
leaseAssignmentDecider.balanceWorkerVariance();
MetricsUtil.addLatency(
metricsScope, "BalanceWorkerVariance", balanceWorkerVarianceStartTime, MetricsLevel.DETAILED);
metricsScope.addData(
"NumOfLeasesReassignment",
inMemoryStorageView.leaseToNewAssignedWorkerMap.size()
- totalNewAssignmentBeforeWorkerVarianceBalancing,
StandardUnit.COUNT,
MetricsLevel.SUMMARY);
}
if (inMemoryStorageView.leaseToNewAssignedWorkerMap.isEmpty()) {
log.info("No new lease assignment performed in this iteration");
}
parallelyAssignLeases(inMemoryStorageView, metricsScope);
printPerWorkerLeases(inMemoryStorageView);
deleteStaleWorkerMetricsEntries(inMemoryStorageView, metricsScope);
success = true;
noOfContinuousFailedAttempts = 0;
} catch (final Exception e) {
log.error("LeaseAssignmentManager failed to perform lease assignment.", e);
noOfContinuousFailedAttempts++;
if (noOfContinuousFailedAttempts >= DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER) {
log.error(
"Failed to perform assignment {} times in a row, releasing leadership from worker : {}",
DEFAULT_FAILURE_COUNT_TO_SWITCH_LEADER,
currentWorkerId);
MetricsUtil.addCount(metricsScope, FORCE_LEADER_RELEASE_METRIC_NAME, 1, MetricsLevel.SUMMARY);
leaderDecider.releaseLeadershipIfHeld();
}
} finally {
MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
MetricsUtil.endScope(metricsScope);
}
}
private boolean shouldRunVarianceBalancing() {
final boolean response = this.lamRunCounter == 0;
/*
To avoid lamRunCounter grow large, keep it within [0,varianceBalancingFrequency).
If varianceBalancingFrequency is 5 lamRunCounter value will be within 0 to 4 and method return true when
lamRunCounter is 0.
*/
this.lamRunCounter = (this.lamRunCounter + 1) % config.varianceBalancingFrequency();
return response;
}
/**
* Deletes the WorkerMetricStats entries which are stale(not updated since long time, ref
* {@link LeaseAssignmentManager#isWorkerMetricsEntryStale} for the condition to evaluate staleness)
*/
private void deleteStaleWorkerMetricsEntries(
final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
final long startTime = System.currentTimeMillis();
try {
final List<WorkerMetricStats> staleWorkerMetricsList = inMemoryStorageView.getWorkerMetricsList().stream()
.filter(this::isWorkerMetricsEntryStale)
.collect(Collectors.toList());
MetricsUtil.addCount(
metricsScope, "TotalStaleWorkerMetricsEntry", staleWorkerMetricsList.size(), MetricsLevel.DETAILED);
log.info("Number of stale workerMetrics entries : {}", staleWorkerMetricsList.size());
log.info("Stale workerMetrics list : {}", staleWorkerMetricsList);
final List<CompletableFuture<Boolean>> completableFutures = staleWorkerMetricsList.stream()
.map(workerMetrics -> CompletableFuture.supplyAsync(
() -> workerMetricsDAO.deleteMetrics(workerMetrics), LEASE_ASSIGNMENT_CALL_THREAD_POOL))
.collect(Collectors.toList());
CompletableFuture.allOf(completableFutures.toArray(new CompletableFuture[0]))
.join();
} finally {
MetricsUtil.addLatency(metricsScope, "StaleWorkerMetricsCleanup", startTime, MetricsLevel.DETAILED);
}
}
/**
* WorkerMetricStats entry is considered stale if the lastUpdateTime of the workerMetrics is older than
* workerMetricsStalenessThreshold * workerMetricsReporterFreqInMillis.
*/
private boolean isWorkerMetricsEntryStale(final WorkerMetricStats workerMetrics) {
return Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now())
.toMillis()
> config.staleWorkerMetricsEntryCleanupDuration().toMillis();
}
private void printPerWorkerLeases(final InMemoryStorageView storageView) {
storageView.getActiveWorkerIdSet().forEach(activeWorkerId -> {
log.info(
"Worker : {} and total leases : {} and totalThroughput : {}",
activeWorkerId,
Optional.ofNullable(storageView.getWorkerToLeasesMap().get(activeWorkerId))
.orElse(Collections.EMPTY_SET)
.size(),
storageView.getWorkerToTotalAssignedThroughputMap().get(activeWorkerId));
});
}
private void parallelyAssignLeases(final InMemoryStorageView inMemoryStorageView, final MetricsScope metricsScope) {
final AtomicInteger failedAssignmentCounter = new AtomicInteger(0);
final long startTime = System.currentTimeMillis();
boolean success = false;
try {
CompletableFuture.allOf(inMemoryStorageView.getLeaseToNewAssignedWorkerMap().entrySet().stream()
// ignore leases that are heartbeating and pending graceful shutdown checkpoint.
.filter(entry -> !entry.getKey().blockedOnPendingCheckpoint(getNanoTimeMillis()))
.map(entry -> CompletableFuture.supplyAsync(
() -> {
try {
final Lease lease = entry.getKey();
if (gracefulLeaseHandoffConfig.isGracefulLeaseHandoffEnabled()
&& lease.isEligibleForGracefulShutdown()) {
return handleGracefulLeaseHandoff(
lease, entry.getValue(), failedAssignmentCounter);
} else {
return handleRegularLeaseAssignment(
lease, entry.getValue(), failedAssignmentCounter);
}
} catch (Exception e) {
throw new CompletionException(e);
}
},
LEASE_ASSIGNMENT_CALL_THREAD_POOL))
.toArray(CompletableFuture[]::new))
.join();
success = true;
} finally {
MetricsUtil.addCount(
metricsScope, "FailedAssignmentCount", failedAssignmentCounter.get(), MetricsLevel.DETAILED);
MetricsUtil.addSuccessAndLatency(
metricsScope, "ParallelyAssignLeases", success, startTime, MetricsLevel.DETAILED);
}
}
private boolean handleGracefulLeaseHandoff(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
final boolean response = leaseRefresher.initiateGracefulLeaseHandoff(lease, newOwner);
if (response) {
// new handoff assignment. add the timeout.
lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
} else {
failedAssignmentCounter.incrementAndGet();
}
return response;
}
private boolean handleRegularLeaseAssignment(Lease lease, String newOwner, AtomicInteger failedAssignmentCounter)
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
final boolean response = leaseRefresher.assignLease(lease, newOwner);
if (response) {
// Successful assignment updates the leaseCounter, update the nanoTime for counter update.
lease.lastCounterIncrementNanos(nanoTimeProvider.get());
} else {
failedAssignmentCounter.incrementAndGet();
}
return response;
}
private void publishLeaseAndWorkerCountMetrics(
final MetricsScope metricsScope, final InMemoryStorageView inMemoryStorageView) {
// Names of the metrics are kept in sync with what is published in LeaseTaker.
metricsScope.addData(
"TotalLeases", inMemoryStorageView.leaseList.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
metricsScope.addData(
"NumWorkers", inMemoryStorageView.activeWorkerMetrics.size(), StandardUnit.COUNT, MetricsLevel.SUMMARY);
}
// Method updates all new leases with currentTime if the counter is updated since last run else keeps whatever
// was prev and update the prevRunLeasesState
private void updateLeasesLastCounterIncrementNanosAndLeaseShutdownTimeout(
final List<Lease> leaseList, final Long scanTime) {
for (final Lease lease : leaseList) {
final Lease prevLease = prevRunLeasesState.get(lease.leaseKey());
// make sure lease shutdown timeouts are tracked.
if (lease.shutdownRequested()) {
// previous and current leases might have same next and checkpoint owners but there is no
// guarantee that the latest shutdown is the same shutdown in the previous lease for example
// some other leaders change the lease states while this worker waiting for it's LAM run.
// This is the best effort to prevent marking the incorrect timeout.
if (isNull(prevLease) || !prevLease.shutdownRequested() || !isSameOwners(lease, prevLease)) {
// Add new value if previous is null, previous lease is not shutdown pending or the owners
// don't match
lease.checkpointOwnerTimeoutTimestampMillis(getCheckpointOwnerTimeoutTimestampMillis());
} else {
lease.checkpointOwnerTimeoutTimestampMillis(prevLease.checkpointOwnerTimeoutTimestampMillis());
}
}
if (isNull(prevLease)) {
lease.lastCounterIncrementNanos(
isNull(lease.actualOwner())
// This is an unassigned lease, mark as 0L that puts this in first in assignment order
? 0L
: scanTime);
} else {
lease.lastCounterIncrementNanos(
lease.leaseCounter() > prevLease.leaseCounter()
? scanTime
: prevLease.lastCounterIncrementNanos());
}
}
prevRunLeasesState.clear();
prevRunLeasesState.putAll(leaseList.stream().collect(Collectors.toMap(Lease::leaseKey, Function.identity())));
}
private void prepareAfterLeaderSwitch() {
prevRunLeasesState.clear();
noOfContinuousFailedAttempts = 0;
}
/**
* In memory view of the leases and workerMetrics.
* This class supports queries (e.g., leases assigned to worker or total throughout assigned to worker).
*/
@Getter
class InMemoryStorageView {
// This is in-memory view of the workerToLeaseMapping, this is updated in-memory before actual
// changes to storage.
private final Map<String, Set<Lease>> workerToLeasesMap = new HashMap<>();
/**
* This is computed initially after the loading leases and then updated when the
* {@link InMemoryStorageView#performLeaseAssignment} is called.
*/
private final Map<String, Double> workerToTotalAssignedThroughputMap = new HashMap<>();
/**
* Captures the new assignment done during the lifecycle of single run.
*/
private final Map<Lease, String> leaseToNewAssignedWorkerMap = new HashMap<>();
/**
* List of all leases in the application.
*/
private List<Lease> leaseList;
/**
* List of workers which are active (i.e., updated metric stats before the threshold ref)
* {@link this#computeWorkerExpiryThresholdInSecond})
*/
private List<WorkerMetricStats> activeWorkerMetrics;
/**
* List of all workerMetrics entries from storage.
*/
private List<WorkerMetricStats> workerMetricsList;
/**
* List of active workers ids.
*/
private Set<String> activeWorkerIdSet;
/**
* Wall time in nanoseconds when the lease table scan was completed.
*/
private long leaseTableScanTime = 0L;
/**
* Average throughput for all workers.
*/
private double targetAverageThroughput;
/**
* Update {@ref inMemoryWorkerToLeasesMapping} with the change in ownership and update newLeaseAssignmentMap
*
* @param lease lease changing assignment
* @param newOwner new owner of the lease
*/
public void performLeaseAssignment(final Lease lease, final String newOwner) {
final String existingOwner = lease.actualOwner();
workerToLeasesMap.get(existingOwner).remove(lease);
workerToLeasesMap
.computeIfAbsent(newOwner, owner -> new HashSet<>())
.add(lease);
updateWorkerThroughput(newOwner, lease.throughputKBps());
// Remove the same lease throughput from oldOwner
updateWorkerThroughput(existingOwner, -lease.throughputKBps());
leaseToNewAssignedWorkerMap.put(lease, newOwner);
}
/**
* Scans the LeaseTable and WorkerMetricStats in parallel and load the data and populate datastructures used
* in lease assignment.
*/
public void loadInMemoryStorageView(final MetricsScope metricsScope) throws Exception {
final CompletableFuture<Map.Entry<List<Lease>, List<String>>> leaseListFuture = loadLeaseListAsync();
final CompletableFuture<List<WorkerMetricStats>> workerMetricsFuture = loadWorkerMetricStats();
final List<WorkerMetricStats> workerMetricsFromStorage = workerMetricsFuture.join();
final List<String> listOfWorkerIdOfInvalidWorkerMetricsEntry = workerMetricsFromStorage.stream()
.filter(workerMetrics -> !workerMetrics.isValidWorkerMetric())
.map(WorkerMetricStats::getWorkerId)
.collect(Collectors.toList());
log.warn("List of workerIds with invalid entries : {}", listOfWorkerIdOfInvalidWorkerMetricsEntry);
if (!listOfWorkerIdOfInvalidWorkerMetricsEntry.isEmpty()) {
metricsScope.addData(
"NumWorkersWithInvalidEntry",
listOfWorkerIdOfInvalidWorkerMetricsEntry.size(),
StandardUnit.COUNT,
MetricsLevel.SUMMARY);
}
// Valid entries are considered further, for validity of entry refer WorkerMetricStats#isValidWorkerMetrics
this.workerMetricsList = workerMetricsFromStorage.stream()
.filter(WorkerMetricStats::isValidWorkerMetric)
.collect(Collectors.toList());
log.info("Total WorkerMetricStats available : {}", workerMetricsList.size());
final long workerExpiryThreshold = computeWorkerExpiryThresholdInSecond();
final long countOfWorkersWithFailingWorkerMetric = workerMetricsList.stream()
.filter(WorkerMetricStats::isAnyWorkerMetricFailing)
.count();
if (countOfWorkersWithFailingWorkerMetric != 0) {
metricsScope.addData(
"NumWorkersWithFailingWorkerMetric",
countOfWorkersWithFailingWorkerMetric,
StandardUnit.COUNT,
MetricsLevel.SUMMARY);
}
final Map.Entry<List<Lease>, List<String>> leaseListResponse = leaseListFuture.join();
this.leaseList = leaseListResponse.getKey();
log.warn("Leases that failed deserialization : {}", leaseListResponse.getValue());
if (!leaseListResponse.getValue().isEmpty()) {
MetricsUtil.addCount(
metricsScope,
"LeaseDeserializationFailureCount",
leaseListResponse.getValue().size(),
MetricsLevel.SUMMARY);
}
this.leaseTableScanTime = nanoTimeProvider.get();
log.info("Total Leases available : {}", leaseList.size());
final double averageLeaseThroughput = leaseList.stream()
.filter(lease -> nonNull(lease.throughputKBps()))
.mapToDouble(Lease::throughputKBps)
.average()
// If none of the leases has any value, that means its app
// startup time and thus assigns 0 in that case to start with.
.orElse(0D);
/*
* If a workerMetrics has a metric (i.e. has -1 value in last index which denotes failure),
* skip it from activeWorkerMetrics and no new action on it will be done
* (new assignment etc.) until the metric has non -1 value in last index. This is to avoid performing action
* with the stale data on worker.
*/
this.activeWorkerMetrics = workerMetricsList.stream()
.filter(workerMetrics -> workerMetrics.getLastUpdateTime() >= workerExpiryThreshold
&& !workerMetrics.isAnyWorkerMetricFailing())
.collect(Collectors.toList());
log.info("activeWorkerMetrics : {}", activeWorkerMetrics.size());
targetAverageThroughput =
averageLeaseThroughput * leaseList.size() / Math.max(1, activeWorkerMetrics.size());
leaseList.forEach(lease -> {
if (isNull(lease.throughputKBps())) {
// If the lease is unassigned, it will not have any throughput value, use average throughput
// as good enough value to start with.
lease.throughputKBps(averageLeaseThroughput);
}
workerToLeasesMap
.computeIfAbsent(lease.actualOwner(), workerId -> new HashSet<>())
.add(lease);
updateWorkerThroughput(lease.actualOwner(), lease.throughputKBps());
});
this.activeWorkerIdSet = new HashSet<>();
// Calculate initial ratio
this.activeWorkerMetrics.forEach(workerMetrics -> {
activeWorkerIdSet.add(workerMetrics.getWorkerId());
workerMetrics.setEmaAlpha(config.workerMetricsEMAAlpha());
if (workerMetrics.isUsingDefaultWorkerMetric()) {
setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
workerMetrics,
getTotalAssignedThroughput(workerMetrics.getWorkerId()) / targetAverageThroughput);
}
});
}
private void updateWorkerThroughput(final String workerId, final double leaseThroughput) {
double value = workerToTotalAssignedThroughputMap.computeIfAbsent(workerId, worker -> (double) 0L);
workerToTotalAssignedThroughputMap.put(workerId, value + leaseThroughput);
}
private void setOperatingRangeAndWorkerMetricsDataForDefaultWorker(
final WorkerMetricStats workerMetrics, final Double ratio) {
// for workers with default WorkerMetricStats, the operating range ceiling of 100 represents the
// target throughput. This way, with either heterogeneous or homogeneous fleets
// of explicit WorkerMetricStats and default WorkerMetricStats applications, load will be evenly
// distributed.
log.info(
"Worker [{}] is using default WorkerMetricStats, setting initial utilization ratio to [{}].",
workerMetrics.getWorkerId(),
ratio);
workerMetrics.setOperatingRange(ImmutableMap.of("T", ImmutableList.of(100L)));
workerMetrics.setMetricStats(ImmutableMap.of("T", ImmutableList.of(ratio * 100, ratio * 100)));
}
/**
* Calculates the value threshold in seconds for a worker to be considered as active.
* If a worker has not updated the WorkerMetricStats entry within this threshold, the worker is not considered
* as active.
*
* @return wall time in seconds
*/
private long computeWorkerExpiryThresholdInSecond() {
final long timeInSeconds = Duration.ofMillis(System.currentTimeMillis()
- DEFAULT_NO_OF_SKIP_STAT_FOR_DEAD_WORKER_THRESHOLD
* config.workerMetricsReporterFreqInMillis())
.getSeconds();
log.info("WorkerMetricStats expiry time in seconds : {}", timeInSeconds);
return timeInSeconds;
}
/**
* Looks at inMemoryWorkerToLeasesMapping for lease assignment and figures out if there is room considering
* any new assignment that would have happened.
*/
public boolean isWorkerTotalThroughputLessThanMaxThroughput(final String workerId) {
return getTotalAssignedThroughput(workerId) <= config.maxThroughputPerHostKBps();
}
/**
* Looks at inMemoryWorkerToLeasesMapping for lease assignment of a worker and returns true if the worker has
* no leases assigned or less than maxNumberOfLeasesPerHost else false.
*/
public boolean isWorkerAssignedLeasesLessThanMaxLeases(final String workerId) {
final Set<Lease> assignedLeases = workerToLeasesMap.get(workerId);
if (CollectionUtils.isEmpty(assignedLeases)) {
// There are no leases assigned to the worker, that means its less than maxNumberOfLeasesPerHost.
return true;
} else {
return assignedLeases.size() < maxLeasesForWorker;
}
}
public Double getTotalAssignedThroughput(final String workerId) {
return workerToTotalAssignedThroughputMap.getOrDefault(workerId, 0D);
}
private CompletableFuture<List<WorkerMetricStats>> loadWorkerMetricStats() {
return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricsDAO::getAllWorkerMetricStats));
}
private CompletableFuture<Map.Entry<List<Lease>, List<String>>> loadLeaseListAsync() {
return CompletableFuture.supplyAsync(() -> loadWithRetry(() -> leaseRefresher.listLeasesParallely(
LEASE_ASSIGNMENT_CALL_THREAD_POOL, DEFAULT_LEASE_TABLE_SCAN_PARALLELISM_FACTOR)));
}
private <T> T loadWithRetry(final Callable<T> loadFunction) {
int retryAttempt = 0;
while (true) {
try {
return loadFunction.call();
} catch (final Exception e) {
if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
log.warn(
"Failed to load : {}, retrying",
loadFunction.getClass().getName(),
e);
retryAttempt++;
} else {
throw new CompletionException(e);
}
}
}
}
}
private long getCheckpointOwnerTimeoutTimestampMillis() {
// this is a future timestamp in millis that the graceful lease handoff shutdown can be considered
// expired. LeaseDurationMillis is used here to account for how long it might take for the
// lease owner to receive the shutdown signal before executing shutdown.
return getNanoTimeMillis()
+ gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis()
+ leaseDurationMillis;
}
private long getNanoTimeMillis() {
// this is not a wall clock time. But if we stick with using this time provider for calculating the elapsed
// time it should be okay to use in checkpoint expiration calculation.
return TimeUnit.NANOSECONDS.toMillis(nanoTimeProvider.get());
}
private static boolean isSameOwners(Lease currentLease, Lease previousLease) {
return Objects.equals(currentLease.leaseOwner(), previousLease.leaseOwner())
&& Objects.equals(currentLease.checkpointOwner(), previousLease.checkpointOwner());
}
}

View file

@ -0,0 +1,348 @@
package software.amazon.kinesis.coordinator.assignment;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull;
/**
* VarianceBasedLeaseAssignmentDecider
* This implementation of LeaseAssignmentDecider performs lease assignment by considering the WorkerMetricStats values of workers
* with respect to fleet level average of that WorkerMetricStats.
* Rebalanced leases are assigned to workers which has maximum capacity to in terms of throughput to reach fleet level
* across the WorkerMetricStats value. In case of multiple WorkerMetricStats, the capacity to reach fleet level average is determined by outlier
* WorkerMetricStats.
* To minimize the variance, the algorithm picks the fleet level average of the WorkerMetricStats for workers as a
* pivot point and uses it to determine workers to take leases from and then assign to other workers.
* The threshold for considering a worker for re-balance is configurable via
* {@code reBalanceThreshold}. During reassignments the {@code dampeningPercentageValue} is used to achieve
* critical dampening.
*/
@Slf4j
@KinesisClientInternalApi
public final class VarianceBasedLeaseAssignmentDecider implements LeaseAssignmentDecider {
private final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView;
private final int dampeningPercentageValue;
private final int reBalanceThreshold;
private final boolean allowThroughputOvershoot;
private final Map<String, Double> workerMetricsToFleetLevelAverageMap = new HashMap<>();
private final PriorityQueue<WorkerMetricStats> assignableWorkerSortedByAvailableCapacity;
private int targetLeasePerWorker;
public VarianceBasedLeaseAssignmentDecider(
final LeaseAssignmentManager.InMemoryStorageView inMemoryStorageView,
final int dampeningPercentageValue,
final int reBalanceThreshold,
final boolean allowThroughputOvershoot) {
this.inMemoryStorageView = inMemoryStorageView;
this.dampeningPercentageValue = dampeningPercentageValue;
this.reBalanceThreshold = reBalanceThreshold;
this.allowThroughputOvershoot = allowThroughputOvershoot;
initialize();
final Comparator<WorkerMetricStats> comparator = Comparator.comparingDouble(
workerMetrics -> workerMetrics.computePercentageToReachAverage(workerMetricsToFleetLevelAverageMap));
this.assignableWorkerSortedByAvailableCapacity = new PriorityQueue<>(comparator.reversed());
this.assignableWorkerSortedByAvailableCapacity.addAll(
getAvailableWorkersForAssignment(inMemoryStorageView.getActiveWorkerMetrics()));
}
private void initialize() {
final Map<String, Double> workerMetricsNameToAverage = inMemoryStorageView.getActiveWorkerMetrics().stream()
.flatMap(workerMetrics -> workerMetrics.getMetricStats().keySet().stream()
.map(workerMetricsName ->
new SimpleEntry<>(workerMetricsName, workerMetrics.getMetricStat(workerMetricsName))))
.collect(Collectors.groupingBy(
SimpleEntry::getKey, HashMap::new, Collectors.averagingDouble(SimpleEntry::getValue)));
workerMetricsToFleetLevelAverageMap.putAll(workerMetricsNameToAverage);
final int totalWorkers =
Math.max(inMemoryStorageView.getActiveWorkerMetrics().size(), 1);
this.targetLeasePerWorker = Math.max(inMemoryStorageView.getLeaseList().size() / totalWorkers, 1);
}
private List<WorkerMetricStats> getAvailableWorkersForAssignment(final List<WorkerMetricStats> workerMetricsList) {
// Workers with WorkerMetricStats running hot are also available for assignment as the goal is to balance
// utilization
// always (e.g., if all workers have hot WorkerMetricStats, balance the variance between them too)
return workerMetricsList.stream()
.filter(workerMetrics -> inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(
workerMetrics.getWorkerId())
&& inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId()))
.collect(Collectors.toList());
}
@Override
public void assignExpiredOrUnassignedLeases(final List<Lease> expiredOrUnAssignedLeases) {
// Sort the expiredOrUnAssignedLeases using lastCounterIncrementNanos such that leases expired first are
// picked first.
// Unassigned leases have lastCounterIncrementNanos as zero and thus assigned first.
Collections.sort(expiredOrUnAssignedLeases, Comparator.comparing(Lease::lastCounterIncrementNanos));
final Set<Lease> assignedLeases = new HashSet<>();
for (final Lease lease : expiredOrUnAssignedLeases) {
final WorkerMetricStats workerToAssignLease = assignableWorkerSortedByAvailableCapacity.poll();
if (nonNull(workerToAssignLease)) {
assignLease(lease, workerToAssignLease);
assignedLeases.add(lease);
} else {
log.info("No worker available to assign lease {}", lease.leaseKey());
break;
}
}
expiredOrUnAssignedLeases.removeAll(assignedLeases);
}
private List<WorkerMetricStats> getWorkersToTakeLeasesFromIfRequired(
final List<WorkerMetricStats> currentWorkerMetrics,
final String workerMetricsName,
final double workerMetricsValueAvg) {
final List<WorkerMetricStats> workerIdsAboveAverage = new ArrayList<>();
final double upperLimit = workerMetricsValueAvg * (1.0D + (double) reBalanceThreshold / 100);
final double lowerLimit = workerMetricsValueAvg * (1.0D - (double) reBalanceThreshold / 100);
WorkerMetricStats mostLoadedWorker = null;
log.info("Range for re-balance upper threshold {} and lower threshold {}", upperLimit, lowerLimit);
boolean shouldTriggerReBalance = false;
for (final WorkerMetricStats workerMetrics : currentWorkerMetrics) {
final double currentWorkerMetricsValue = workerMetrics.getMetricStat(workerMetricsName);
final boolean isCurrentWorkerMetricsAboveOperatingRange =
workerMetrics.isWorkerMetricAboveOperatingRange(workerMetricsName);
/*
If there is any worker, whose WorkerMetricStats value is between +/- reBalanceThreshold % of workerMetricsValueAvg or if
worker's WorkerMetricStats value is above operating range trigger re-balance
*/
if (currentWorkerMetricsValue > upperLimit
|| currentWorkerMetricsValue < lowerLimit
|| isCurrentWorkerMetricsAboveOperatingRange) {
shouldTriggerReBalance = true;
}
// Perform re-balance on the worker if its above upperLimit or if current WorkerMetricStats is above
// operating range.
if (currentWorkerMetricsValue >= upperLimit || isCurrentWorkerMetricsAboveOperatingRange) {
workerIdsAboveAverage.add(workerMetrics);
}
if (mostLoadedWorker == null
|| mostLoadedWorker.getMetricStat(workerMetricsName) < currentWorkerMetricsValue) {
mostLoadedWorker = workerMetrics;
}
}
/*
If workerIdsAboveAverage is empty that means there is no worker with WorkerMetricStats value above upperLimit so pick
the worker with higher CPU. This can happen when there is worker with WorkerMetricStats value below lowerLimit but
all other workers are within upperLimit.
*/
if (workerIdsAboveAverage.isEmpty()) {
workerIdsAboveAverage.add(mostLoadedWorker);
}
return shouldTriggerReBalance ? workerIdsAboveAverage : Collections.emptyList();
}
/**
* Performs the balancing of the throughput assigned to workers based on the WorkerMetricsValues of worker with respect
* to fleet level average.
* Each WorkerMetricStats is treated independently to determine workers for re-balance computed (computed based on
* reBalanceThreshold) are determined.
* The magnitude of throughput to take is determined by how much worker is away from the average of that WorkerMetricStats
* across fleet and in case of multiple WorkerMetricStats, the one with maximum magnitude of throughput is considered.
*/
@Override
public void balanceWorkerVariance() {
final List<WorkerMetricStats> activeWorkerMetrics = inMemoryStorageView.getActiveWorkerMetrics();
log.info("WorkerMetricStats to corresponding fleet level average : {}", workerMetricsToFleetLevelAverageMap);
log.info("Active WorkerMetricStats : {}", activeWorkerMetrics);
final Map<String, Double> workerIdToThroughputToTakeMap = new HashMap<>();
String largestOutlierWorkerMetricsName = "";
double maxThroughputTake = -1.0D;
for (final Map.Entry<String, Double> workerMetricsToFleetLevelAverageEntry :
workerMetricsToFleetLevelAverageMap.entrySet()) {
final String workerMetricsName = workerMetricsToFleetLevelAverageEntry.getKey();
// Filter workers that does not have current WorkerMetricStats. This is possible if application is adding a
// new WorkerMetricStats and currently in phase of deployment.
final List<WorkerMetricStats> currentWorkerMetrics = activeWorkerMetrics.stream()
.filter(workerMetrics -> workerMetrics.containsMetricStat(workerMetricsName))
.collect(Collectors.toList());
final double fleetAverageForWorkerMetrics = workerMetricsToFleetLevelAverageEntry.getValue();
final List<WorkerMetricStats> workerToTakeLeasesFrom = getWorkersToTakeLeasesFromIfRequired(
currentWorkerMetrics, workerMetricsName, fleetAverageForWorkerMetrics);
final Map<String, Double> workerIdToThroughputToTakeForCurrentWorkerMetrics = new HashMap<>();
double totalThroughputToTakeForCurrentWorkerMetrics = 0D;
for (final WorkerMetricStats workerToTakeLease : workerToTakeLeasesFrom) {
final double workerMetricsValueForWorker = workerToTakeLease.getMetricStat(workerMetricsName);
// Load to take based on the difference compared to the fleet level average
final double loadPercentageToTake =
(workerMetricsValueForWorker - fleetAverageForWorkerMetrics) / workerMetricsValueForWorker;
// Dampen the load based on dampeningPercentageValue
final double dampenedLoadPercentageToTake =
loadPercentageToTake * ((double) dampeningPercentageValue / 100);
final double throughputToTake =
inMemoryStorageView.getTotalAssignedThroughput(workerToTakeLease.getWorkerId())
* dampenedLoadPercentageToTake;
log.info(
"For worker : {} taking throughput : {} after dampening based on WorkerMetricStats : {}",
workerToTakeLease.getWorkerId(),
throughputToTake,
workerMetricsName);
totalThroughputToTakeForCurrentWorkerMetrics += throughputToTake;
workerIdToThroughputToTakeForCurrentWorkerMetrics.put(
workerToTakeLease.getWorkerId(), throughputToTake);
}
/*
If totalThroughputToTakeForCurrentWorkerMetrics is more than maxThroughputTake that means this WorkerMetricStats is more
outlier so consider this for reBalancing
*/
if (maxThroughputTake < totalThroughputToTakeForCurrentWorkerMetrics) {
largestOutlierWorkerMetricsName = workerMetricsName;
workerIdToThroughputToTakeMap.clear();
workerIdToThroughputToTakeMap.putAll(workerIdToThroughputToTakeForCurrentWorkerMetrics);
maxThroughputTake = totalThroughputToTakeForCurrentWorkerMetrics;
}
}
log.info(
"Largest outlier WorkerMetricStats is : {} and total of {} throughput will be rebalanced",
largestOutlierWorkerMetricsName,
maxThroughputTake);
log.info("Workers to throughput taken from them is : {}", workerIdToThroughputToTakeMap);
final List<Map.Entry<String, Double>> sortedWorkerIdToThroughputToTakeEntries =
new ArrayList<>(workerIdToThroughputToTakeMap.entrySet());
// sort entries by values.
Collections.sort(sortedWorkerIdToThroughputToTakeEntries, (e1, e2) -> e2.getValue()
.compareTo(e1.getValue()));
for (final Map.Entry<String, Double> workerIdToThroughputToTakeEntry :
sortedWorkerIdToThroughputToTakeEntries) {
final String workerId = workerIdToThroughputToTakeEntry.getKey();
final double throughputToTake = workerIdToThroughputToTakeEntry.getValue();
final Queue<Lease> leasesToTake = getLeasesToTake(workerId, throughputToTake);
log.info(
"Leases taken from worker : {} are : {}",
workerId,
leasesToTake.stream().map(Lease::leaseKey).collect(Collectors.toSet()));
for (final Lease lease : leasesToTake) {
final WorkerMetricStats workerToAssign = assignableWorkerSortedByAvailableCapacity.poll();
if (nonNull(workerToAssign)
&& workerToAssign.willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange(
workerMetricsToFleetLevelAverageMap,
inMemoryStorageView.getTargetAverageThroughput(),
lease.throughputKBps(),
targetLeasePerWorker)) {
log.info("No worker to assign anymore in this iteration due to hitting average values");
break;
}
if (nonNull(workerToAssign)) {
assignLease(lease, workerToAssign);
}
}
}
printWorkerToUtilizationLog(inMemoryStorageView.getActiveWorkerMetrics());
}
private Queue<Lease> getLeasesToTake(final String workerId, final double throughputToTake) {
final Set<Lease> existingLeases =
inMemoryStorageView.getWorkerToLeasesMap().get(workerId);
if (isNull(existingLeases) || existingLeases.isEmpty()) {
return new ArrayDeque<>();
}
if (inMemoryStorageView.getTotalAssignedThroughput(workerId) == 0D) {
// This is the case where throughput of this worker is zero and have 1 or more leases assigned.
// Its not possible to determine leases to take based on throughput so simply take 1 lease and move on.
return new ArrayDeque<>(new ArrayList<>(existingLeases).subList(0, 1));
}
return getLeasesCombiningToThroughput(workerId, throughputToTake);
}
private void assignLease(final Lease lease, final WorkerMetricStats workerMetrics) {
if (nonNull(lease.actualOwner()) && lease.actualOwner().equals(workerMetrics.getWorkerId())) {
// if a new owner and current owner are same then no assignment to do
// put back the worker as well as no assignment is done
assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
return;
}
workerMetrics.extrapolateMetricStatValuesForAddedThroughput(
workerMetricsToFleetLevelAverageMap,
inMemoryStorageView.getTargetAverageThroughput(),
lease.throughputKBps(),
targetLeasePerWorker);
log.info("Assigning lease : {} to worker : {}", lease.leaseKey(), workerMetrics.getWorkerId());
inMemoryStorageView.performLeaseAssignment(lease, workerMetrics.getWorkerId());
if (inMemoryStorageView.isWorkerTotalThroughputLessThanMaxThroughput(workerMetrics.getWorkerId())
&& inMemoryStorageView.isWorkerAssignedLeasesLessThanMaxLeases(workerMetrics.getWorkerId())) {
assignableWorkerSortedByAvailableCapacity.add(workerMetrics);
}
}
private void printWorkerToUtilizationLog(final List<WorkerMetricStats> activeWorkerMetrics) {
activeWorkerMetrics.forEach(workerMetrics -> log.info(
"WorkerId : {} and average WorkerMetricStats data : {}",
workerMetrics.getWorkerId(),
workerMetrics.getMetricStatsMap()));
}
private Queue<Lease> getLeasesCombiningToThroughput(final String workerId, final double throughputToGet) {
final List<Lease> assignedLeases =
new ArrayList<>(inMemoryStorageView.getWorkerToLeasesMap().get(workerId));
if (assignedLeases.isEmpty()) {
// This is possible if the worker is having high utilization but does not have any leases assigned to it
return new ArrayDeque<>();
}
// Shuffle leases to randomize what leases gets picked.
Collections.shuffle(assignedLeases);
final Queue<Lease> response = new ArrayDeque<>();
double remainingThroughputToGet = throughputToGet;
for (final Lease lease : assignedLeases) {
// if adding this lease makes throughout to take go below zero avoid taking this lease.
if (remainingThroughputToGet - lease.throughputKBps() <= 0) {
continue;
}
remainingThroughputToGet -= lease.throughputKBps();
response.add(lease);
}
// If allowThroughputOvershoot is set to true, take a minimum throughput lease
if (allowThroughputOvershoot && response.isEmpty()) {
assignedLeases.stream()
.min(Comparator.comparingDouble(Lease::throughputKBps))
.ifPresent(response::add);
}
return response;
}
}

View file

@ -0,0 +1,58 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
/**
* ClientVersion support during upgrade from KCLv2.x to KCLv3.x
*
* This enum is persisted in storage, so any changes to it needs to be backward compatible.
* Reorganizing the values is not backward compatible, also if versions are removed, the corresponding
* enum value cannot be reused without backward compatibility considerations.
*/
public enum ClientVersion {
/**
* This is a transient start state version used during initialization of the Migration State Machine.
*/
CLIENT_VERSION_INIT,
/**
* This version is used during the upgrade of an application from KCLv2.x to KCLv3.x, in this version
* KCL workers will emit WorkerMetricStats and run KCLv2.x algorithms for leader election and lease
* assignment. KCL will also monitor for upgrade to KCLv3.x readiness of the worker fleet.
*/
CLIENT_VERSION_UPGRADE_FROM_2x,
/**
* This version is used during rollback from CLIENT_VERSION_UPGRADE_FROM_2x or CLIENT_VERSION_3x_WITH_ROLLBACK,
* which can only be initiated using a KCL migration tool, when customer wants to revert to KCLv2.x functionality.
* In this version, KCL will not emit WorkerMetricStats and run KCLv2.x algorithms for leader election
* and lease assignment. In this version, KCL will monitor for roll-forward scenario where
* client version is updated to CLIENT_VERSION_UPGRADE_FROM_2x using the migration tool.
*/
CLIENT_VERSION_2x,
/**
* When workers are operating in CLIENT_VERSION_UPGRADE_FROM_2x and when worker fleet is determined to be
* KCLv3.x ready (when lease table GSI is active and worker-metrics are being emitted by all lease owners)
* then the leader will initiate the switch to KCLv3.x algorithms for leader election and lease assignment,
* by using this version and persisting it in the {@link MigrationState} that allows all worker hosts
* to also flip to KCLv3.x functionality. In this KCL will also monitor for rollback to detect when the
* customer updates version to CLIENT_VERSION_2x using migration tool, so that it instantly flips back
* to CLIENT_VERSION_2x.
*/
CLIENT_VERSION_3x_WITH_ROLLBACK,
/**
* A new application starting KCLv3.x or an upgraded application from KCLv2.x after upgrade is successful
* can use this version to default all KCLv3.x algorithms without any monitor to rollback.
*/
CLIENT_VERSION_3x;
}

View file

@ -0,0 +1,161 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.time.Duration;
import java.util.Random;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
/**
* Change monitor for MigrationState.clientVersion to notify a callback if the value
* changes from a given value. This monitor will be run to monitor
* rollback, roll-forward and also upgrade to 3.x scenarios. Look at {@link ClientVersion}
* for more details.
*
* Since all KCL workers will be running the monitor, the monitor poll interval uses
* a random jitter to stagger the reads to ddb.
*
* The class is thread-safe and will invoke callback on a separate thread.
*/
@Slf4j
@RequiredArgsConstructor
@ThreadSafe
public class ClientVersionChangeMonitor implements Runnable {
/**
* Interface of a callback to invoke when monitor condition is true.
*/
public interface ClientVersionChangeCallback {
void accept(final MigrationState currentMigrationState) throws InvalidStateException, DependencyException;
}
private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
private static final double JITTER_FACTOR = 0.1;
private final MetricsFactory metricsFactory;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ScheduledExecutorService stateMachineThreadPool;
private final ClientVersionChangeCallback callback;
private final ClientVersion expectedVersion;
private final Random random;
private long monitorIntervalMillis;
private ScheduledFuture<?> scheduledFuture;
public synchronized void startMonitor() {
if (scheduledFuture == null) {
final long jitter = (long) (random.nextDouble() * MONITOR_INTERVAL_MILLIS * JITTER_FACTOR);
monitorIntervalMillis = MONITOR_INTERVAL_MILLIS + jitter;
log.info(
"Monitoring for MigrationState client version change from {} every {}ms",
expectedVersion,
monitorIntervalMillis);
scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
this, monitorIntervalMillis, monitorIntervalMillis, TimeUnit.MILLISECONDS);
}
}
@Override
public String toString() {
return new StringBuilder(getClass().getSimpleName())
.append("[")
.append(expectedVersion)
.append("]")
.toString();
}
/**
* Cancel the monitor explicity before the condition is met, e.g. when the worker is going down.
* Note on synchronization: callback of this monitor is invoked while holding the lock on this monitor object.
* If cancel is called from within the same lock context that callback uses, then it can lead to
* deadlock. Ensure synchronization context between callback the caller of cancel is not shared.
*/
public synchronized void cancel() {
if (scheduledFuture != null) {
log.info("Cancelling {}", this);
scheduledFuture.cancel(false);
} else {
log.info("Monitor {} is not running", this);
}
}
@Override
public synchronized void run() {
try {
if (scheduledFuture == null) {
log.debug("Monitor has been cancelled, not running...");
return;
}
final MigrationState migrationState =
(MigrationState) coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
if (migrationState != null) {
if (migrationState.getClientVersion() != expectedVersion) {
log.info("MigrationState client version has changed {}, invoking monitor callback", migrationState);
callback.accept(migrationState);
log.info("Callback successful, monitoring cancelling itself.");
// stop further monitoring
scheduledFuture.cancel(false);
scheduledFuture = null;
} else {
emitMetrics();
log.debug("No change detected {}", this);
}
}
} catch (final Exception e) {
log.warn(
"Exception occurred when monitoring for client version change from {}, will retry in {}",
expectedVersion,
monitorIntervalMillis,
e);
}
}
private void emitMetrics() {
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
try {
switch (expectedVersion) {
case CLIENT_VERSION_3x_WITH_ROLLBACK:
scope.addData("CurrentState:3xWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
break;
case CLIENT_VERSION_2x:
case CLIENT_VERSION_UPGRADE_FROM_2x:
scope.addData("CurrentState:2xCompatibleWorker", 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
break;
default:
throw new IllegalStateException(String.format("Unexpected version %s", expectedVersion.name()));
}
} finally {
MetricsUtil.endScope(scope);
}
}
}

View file

@ -0,0 +1,159 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.util.Random;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2x;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2x;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
/**
* State for CLIENT_VERSION_2x. In this state, the only allowed valid transition is
* the roll-forward scenario which can only be performed using the KCL Migration tool.
* So when the state machine enters this state, a monitor is started to detect the
* roll-forward scenario.
*/
@KinesisClientInternalApi
@RequiredArgsConstructor
@Slf4j
@ThreadSafe
public class MigrationClientVersion2xState implements MigrationClientVersionState {
private final MigrationStateMachine stateMachine;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ScheduledExecutorService stateMachineThreadPool;
private final DynamicMigrationComponentsInitializer initializer;
private final Random random;
private ClientVersionChangeMonitor rollForwardMonitor;
private boolean entered = false;
private boolean left = false;
@Override
public ClientVersion clientVersion() {
return CLIENT_VERSION_2x;
}
@Override
public synchronized void enter(final ClientVersion fromClientVersion) {
if (!entered) {
log.info("Entering {} from {}", this, fromClientVersion);
initializer.initializeClientVersionFor2x(fromClientVersion);
log.info("Starting roll-forward monitor");
rollForwardMonitor = new ClientVersionChangeMonitor(
initializer.metricsFactory(),
coordinatorStateDAO,
stateMachineThreadPool,
this::onClientVersionChange,
clientVersion(),
random);
rollForwardMonitor.startMonitor();
entered = true;
} else {
log.info("Not entering {}", left ? "already exited state" : "already entered state");
}
}
@Override
public synchronized void leave() {
if (entered && !left) {
log.info("Leaving {}", this);
cancelRollForwardMonitor();
left = false;
} else {
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
}
}
@Override
public String toString() {
return getClass().getSimpleName();
}
/**
* Callback handler to handle client version changes in MigrationState in DDB.
* @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_2x
* @throws InvalidStateException during transition to the next state based on the new ClientVersion
* or if the new state in DDB is unexpected.
*/
private synchronized void onClientVersionChange(@NonNull final MigrationState newState)
throws InvalidStateException, DependencyException {
if (!entered || left) {
log.warn("Received client version change notification on inactive state {}", this);
return;
}
final MetricsScope scope =
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
try {
if (newState.getClientVersion() == CLIENT_VERSION_UPGRADE_FROM_2x) {
log.info(
"A roll-forward has been initiated for the application. Transition to {}",
CLIENT_VERSION_UPGRADE_FROM_2x);
// If this succeeds, the monitor will cancel itself.
stateMachine.transitionTo(CLIENT_VERSION_UPGRADE_FROM_2x, newState);
} else {
// This should not happen, so throw an exception that allows the monitor to continue monitoring
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
// state transition is possible.
// However, there could be a split brain here, new workers will use DDB value as source of truth,
// so we could also write back CLIENT_VERSION_2x to DDB to ensure all workers have consistent
// behavior.
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
// so keeping it simple and not writing back to DDB, the error log below would help capture
// any strange behavior if this happens.
log.error(
"Migration state has invalid client version {}. Transition from {} is not supported",
newState,
CLIENT_VERSION_2x);
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
}
} catch (final InvalidStateException | DependencyException e) {
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
throw e;
} finally {
MetricsUtil.endScope(scope);
}
}
private void cancelRollForwardMonitor() {
if (rollForwardMonitor != null) {
final ClientVersionChangeMonitor localRollForwardMonitor = rollForwardMonitor;
CompletableFuture.supplyAsync(() -> {
log.info("Cancelling roll-forward monitor");
localRollForwardMonitor.cancel();
return null;
});
rollForwardMonitor = null;
}
}
}

View file

@ -0,0 +1,70 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
import software.amazon.kinesis.leases.exceptions.DependencyException;
/**
* State for CLIENT_VERSION_3x which enables KCL to run 3.x algorithms on new KCLv3.x application
* or successfully upgraded application which upgraded from v2.x. This is a terminal state of the
* state machine and no rollbacks are supported in this state.
*/
@KinesisClientInternalApi
@RequiredArgsConstructor
@Slf4j
@ThreadSafe
public class MigrationClientVersion3xState implements MigrationClientVersionState {
private final MigrationStateMachine stateMachine;
private final DynamicMigrationComponentsInitializer initializer;
private boolean entered = false;
private boolean left = false;
@Override
public ClientVersion clientVersion() {
return ClientVersion.CLIENT_VERSION_3x;
}
@Override
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
if (!entered) {
log.info("Entering {} from {}", this, fromClientVersion);
initializer.initializeClientVersionFor3x(fromClientVersion);
entered = true;
} else {
log.info("Not entering {}", left ? "already exited state" : "already entered state");
}
}
@Override
public void leave() {
if (entered && !left) {
log.info("Leaving {}", this);
entered = false;
left = true;
} else {
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
}
}
@Override
public String toString() {
return getClass().getSimpleName();
}
}

View file

@ -0,0 +1,156 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.util.Random;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2x;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3x;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
/**
* State for CLIENT_VERSION_3x_WITH_ROLLBACK which enables KCL to run its 3.x compliant algorithms
* during the upgrade process after all KCL workers in the fleet are 3.x complaint. Since this
* is an instant switch from CLIENT_VERSION_UPGRADE_FROM_2x, it also supports rollback if customers
* see regression to allow for instant rollbacks as well. This would be achieved by customers
* running a KCL migration tool to update MigrationState in DDB. So this state monitors for
* rollback triggers and performs state transitions accordingly.
*/
@Slf4j
@KinesisClientInternalApi
@RequiredArgsConstructor
@ThreadSafe
public class MigrationClientVersion3xWithRollbackState implements MigrationClientVersionState {
private final MigrationStateMachine stateMachine;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ScheduledExecutorService stateMachineThreadPool;
private final DynamicMigrationComponentsInitializer initializer;
private final Random random;
private ClientVersionChangeMonitor rollbackMonitor;
private boolean entered;
private boolean left;
@Override
public ClientVersion clientVersion() {
return ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK;
}
@Override
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
if (!entered) {
log.info("Entering {} from {}", this, fromClientVersion);
initializer.initializeClientVersionFor3xWithRollback(fromClientVersion);
// we need to run the rollback monitor
log.info("Starting rollback monitor");
rollbackMonitor = new ClientVersionChangeMonitor(
initializer.metricsFactory(),
coordinatorStateDAO,
stateMachineThreadPool,
this::onClientVersionChange,
clientVersion(),
random);
rollbackMonitor.startMonitor();
entered = true;
} else {
log.info("Not entering {}", left ? "already exited state" : "already entered state");
}
}
@Override
public void leave() {
if (entered && !left) {
log.info("Leaving {}", this);
cancelRollbackMonitor();
entered = false;
left = true;
} else {
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
}
}
private synchronized void onClientVersionChange(final MigrationState newState)
throws InvalidStateException, DependencyException {
if (!entered || left) {
log.warn("Received client version change notification on inactive state {}", this);
return;
}
final MetricsScope scope =
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
try {
switch (newState.getClientVersion()) {
case CLIENT_VERSION_2x:
log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2x);
stateMachine.transitionTo(ClientVersion.CLIENT_VERSION_2x, newState);
break;
case CLIENT_VERSION_3x:
log.info("Customer has switched to 3.x after successful upgrade, state machine will move to a"
+ "terminal state and stop monitoring. Rollbacks will no longer be supported anymore");
stateMachine.transitionTo(CLIENT_VERSION_3x, newState);
// This worker will still be running the migrationAdaptive components in 3.x mode which will
// no longer dynamically switch back to 2.x mode, however to directly run 3.x component without
// adaption to migration (i.e. move to CLIENT_VERSION_3x state), it requires this worker to go
// through the current deployment which initiated the switch to 3.x mode.
break;
default:
// This should not happen, so throw an exception that allows the monitor to continue monitoring
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
// state transition is possible.
// However, there could be a split brain here, new workers will use DDB value as source of truth,
// so we could also write back CLIENT_VERSION_3x_WITH_ROLLBACK to DDB to ensure all workers have
// consistent behavior.
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
// so keeping it simple and not writing back to DDB, the error log below would help capture
// any strange behavior if this happens.
log.error("Migration state has invalid client version {}", newState);
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
}
} catch (final InvalidStateException | DependencyException e) {
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
throw e;
} finally {
MetricsUtil.endScope(scope);
}
}
private void cancelRollbackMonitor() {
if (rollbackMonitor != null) {
final ClientVersionChangeMonitor localRollbackMonitor = rollbackMonitor;
CompletableFuture.supplyAsync(() -> {
log.info("Cancelling rollback monitor");
localRollbackMonitor.cancel();
return null;
});
rollbackMonitor = null;
}
}
}

View file

@ -0,0 +1,47 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import software.amazon.kinesis.leases.exceptions.DependencyException;
/**
* Interface of a state implementation for the MigrationStateMachine
*/
public interface MigrationClientVersionState {
/**
* The associated clientVersion this state corresponds to
* @return ClientVersion that this state implements the logic for.
*/
ClientVersion clientVersion();
/**
* Enter the state and perform the business logic of being in this state
* which includes performing any monitoring that allows the next state
* transition and also initializing the KCL based on the ClientVersion.
* @param fromClientVersion from previous state if any specific action must
* be taken based on the state from which this state
* is being entered from.
* @throws DependencyException if DDB fails in unexpected ways for those states
* that create the GSI
*/
void enter(ClientVersion fromClientVersion) throws DependencyException;
/**
* Invoked after the transition to another state has occurred
* to allow printing any helpful logs or performing cleanup.
*/
void leave();
}

View file

@ -0,0 +1,263 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.util.AbstractMap.SimpleEntry;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.Callable;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
import software.amazon.kinesis.coordinator.CoordinatorState;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2x;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3x;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2x;
import static software.amazon.kinesis.coordinator.migration.MigrationState.MIGRATION_HASH_KEY;
/**
* Initializer to determine start state of the state machine which identifies the
* state to initialize KCL when it is starting up. The initial state is determined based on the
* customer configured {@link ClientVersionConfig} and the current {@link MigrationState} in DDB,
* as follows
* ClientVersionConfig | MigrationState (DDB) | initial client version
* --------------------+---------------------------------+--------------------------------
* COMPATIBLE_WITH_2x | Does not exist | CLIENT_VERSION_UPGRADE_FROM_2x
* 3x | Does not exist | CLIENT_VERSION_3x
* COMPATIBLE_WITH_2x | CLIENT_VERSION_3x_WITH_ROLLBACK | CLIENT_VERSION_3x_WITH_ROLLBACK
* 3x | CLIENT_VERSION_3x_WITH_ROLLBACK | CLIENT_VERSION_3x
* any | CLIENT_VERSION_2x | CLIENT_VERSION_2x
* any | CLIENT_VERSION_UPGRADE_FROM_2x | CLIENT_VERSION_UPGRADE_FROM_2x
* any | CLIENT_VERSION_3x | CLIENT_VERSION_3x
*/
@KinesisClientInternalApi
@RequiredArgsConstructor
@Slf4j
@ThreadSafe
public class MigrationClientVersionStateInitializer {
private static final int MAX_INITIALIZATION_RETRY = 10;
private static final long INITIALIZATION_RETRY_DELAY_MILLIS = 1000L;
/**
* A jitter factor of 10% to stagger the retries.
*/
private static final double JITTER_FACTOR = 0.1;
private final Callable<Long> timeProvider;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ClientVersionConfig clientVersionConfig;
private final Random random;
private final String workerIdentifier;
public SimpleEntry<ClientVersion, MigrationState> getInitialState() throws DependencyException {
log.info("Initializing migration state machine starting state, configured version {}", clientVersionConfig);
try {
MigrationState migrationState = getMigrationStateFromDynamo();
int retryCount = 0;
while (retryCount++ < MAX_INITIALIZATION_RETRY) {
final ClientVersion initialClientVersion = getClientVersionForInitialization(migrationState);
if (migrationState.getClientVersion() != initialClientVersion) {
// If update fails, the value represents current state in dynamo
migrationState = updateMigrationStateInDynamo(migrationState, initialClientVersion);
if (migrationState.getClientVersion() == initialClientVersion) {
// update succeeded. Transition to the state
return new SimpleEntry<>(initialClientVersion, migrationState);
}
final long delay = getInitializationRetryDelay();
log.warn(
"Failed to update migration state with {}, retry after delay {}",
initialClientVersion,
delay);
safeSleep(delay);
} else {
return new SimpleEntry<>(initialClientVersion, migrationState);
}
}
} catch (final InvalidStateException e) {
log.error("Unable to initialize state machine", e);
}
throw new DependencyException(
new RuntimeException("Unable to determine initial state for migration state machine"));
}
public ClientVersion getClientVersionForInitialization(final MigrationState migrationState) {
final ClientVersion nextClientVersion;
switch (migrationState.getClientVersion()) {
case CLIENT_VERSION_INIT:
// There is no state in DDB, set state to config version and transition to configured version.
nextClientVersion = getNextClientVersionBasedOnConfigVersion();
log.info("Application is starting in {}", nextClientVersion);
break;
case CLIENT_VERSION_3x_WITH_ROLLBACK:
if (clientVersionConfig == ClientVersionConfig.CLIENT_VERSION_CONFIG_3x) {
// upgrade successful, allow transition to 3x.
log.info("Application has successfully upgraded, transitioning to {}", CLIENT_VERSION_3x);
nextClientVersion = CLIENT_VERSION_3x;
break;
}
log.info("Initialize with {}", CLIENT_VERSION_3x_WITH_ROLLBACK);
nextClientVersion = migrationState.getClientVersion();
break;
case CLIENT_VERSION_2x:
log.info("Application has rolled-back, initialize with {}", CLIENT_VERSION_2x);
nextClientVersion = migrationState.getClientVersion();
break;
case CLIENT_VERSION_UPGRADE_FROM_2x:
log.info("Application is upgrading, initialize with {}", CLIENT_VERSION_UPGRADE_FROM_2x);
nextClientVersion = migrationState.getClientVersion();
break;
case CLIENT_VERSION_3x:
log.info("Initialize with {}", CLIENT_VERSION_3x);
nextClientVersion = migrationState.getClientVersion();
break;
default:
throw new IllegalStateException(String.format("Unknown version in DDB %s", migrationState));
}
return nextClientVersion;
}
/**
* Update the migration state's client version in dynamo conditional on the current client version
* in dynamo. So that if another worker updates the value first, the update fails. If the update fails,
* the method will read the latest value and return so that initialization can be retried.
* If the value does not exist in dynamo, it will creat it.
*/
private MigrationState updateMigrationStateInDynamo(
final MigrationState migrationState, final ClientVersion nextClientVersion) throws InvalidStateException {
try {
if (migrationState.getClientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
migrationState.update(nextClientVersion, workerIdentifier);
log.info("Creating {}", migrationState);
final boolean created = coordinatorStateDAO.createCoordinatorStateIfNotExists(migrationState);
if (!created) {
log.debug("Create {} did not succeed", migrationState);
return getMigrationStateFromDynamo();
}
} else {
log.info("Updating {} with {}", migrationState, nextClientVersion);
final Map<String, ExpectedAttributeValue> expectations =
migrationState.getDynamoClientVersionExpectation();
migrationState.update(nextClientVersion, workerIdentifier);
final boolean updated =
coordinatorStateDAO.updateCoordinatorStateWithExpectation(migrationState, expectations);
if (!updated) {
log.debug("Update {} did not succeed", migrationState);
return getMigrationStateFromDynamo();
}
}
return migrationState;
} catch (final ProvisionedThroughputException | DependencyException e) {
log.debug(
"Failed to update migration state {} with {}, return previous value to trigger a retry",
migrationState,
nextClientVersion,
e);
return migrationState;
}
}
private ClientVersion getNextClientVersionBasedOnConfigVersion() {
switch (clientVersionConfig) {
case CLIENT_VERSION_CONFIG_COMPATIBLE_WITH_2x:
return CLIENT_VERSION_UPGRADE_FROM_2x;
case CLIENT_VERSION_CONFIG_3x:
return CLIENT_VERSION_3x;
}
throw new IllegalStateException(String.format("Unknown configured Client version %s", clientVersionConfig));
}
/**
* Read the current {@link MigrationState} from DDB with retries.
* @return current Migration state from DDB, if none exists, an initial Migration State with CLIENT_VERSION_INIT
* will be returned
* @throws InvalidStateException, this occurs when dynamo table does not exist in which retrying is not useful.
*/
private MigrationState getMigrationStateFromDynamo() throws InvalidStateException {
return executeCallableWithRetryAndJitter(
() -> {
final CoordinatorState state = coordinatorStateDAO.getCoordinatorState(MIGRATION_HASH_KEY);
if (state == null) {
log.info("No Migration state available in DDB");
return new MigrationState(MIGRATION_HASH_KEY, workerIdentifier);
}
if (state instanceof MigrationState) {
log.info("Current migration state in DDB {}", state);
return (MigrationState) state;
}
throw new InvalidStateException(
String.format("Unexpected state found not confirming to MigrationState schema %s", state));
},
"get MigrationState from DDB");
}
/**
* Helper method to retry a given callable upto MAX_INITIALIZATION_RETRY times for all retryable exceptions.
* It considers InvalidStateException as non-retryable exception. During retry, it will compute a delay
* with jitter before retrying.
* @param callable callable to invoke either until it succeeds or max retry attempts exceed.
* @param description a meaningful description to log exceptions
* @return the value returned by the callable
* @param <T> Return type of the callable
* @throws InvalidStateException If the callable throws InvalidStateException, it will not be retried and will
* be thrown back.
*/
private <T> T executeCallableWithRetryAndJitter(final Callable<T> callable, final String description)
throws InvalidStateException {
int retryCount = 0;
while (retryCount++ < MAX_INITIALIZATION_RETRY) {
try {
return callable.call();
} catch (final Exception e) {
if (e instanceof InvalidStateException) {
// throw the non-retryable exception
throw (InvalidStateException) e;
}
final long delay = getInitializationRetryDelay();
log.warn("Failed to {}, retry after delay {}", description, delay, e);
safeSleep(delay);
}
}
throw new RuntimeException(
String.format("Failed to %s after %d retries, giving up", description, MAX_INITIALIZATION_RETRY));
}
private void safeSleep(final long delay) {
try {
Thread.sleep(delay);
} catch (final InterruptedException ie) {
log.debug("Interrupted sleep during state machine initialization retry");
}
}
/**
* Generate a delay with jitter that is factor of the interval.
* @return delay with jitter
*/
private long getInitializationRetryDelay() {
final long jitter = (long) (random.nextDouble() * JITTER_FACTOR * INITIALIZATION_RETRY_DELAY_MILLIS);
return INITIALIZATION_RETRY_DELAY_MILLIS + jitter;
}
}

View file

@ -0,0 +1,241 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_2x;
import static software.amazon.kinesis.coordinator.migration.ClientVersion.CLIENT_VERSION_3x_WITH_ROLLBACK;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.FAULT_METRIC;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
/**
* State for CLIENT_VERSION_UPGRADE_FROM_2x. When state machine enters this state,
* KCL is initialized to operate in dual mode for Lease assignment and Leader decider algorithms
* which initially start in 2.x compatible mode and when all the KCL workers are 3.x compliant,
* it dynamically switches to the 3.x algorithms. It also monitors for rollback
* initiated from customer via the KCL migration tool and instantly switches back to the 2.x
* complaint algorithms.
* The allowed state transitions are to CLIENT_VERSION_3x_WITH_ROLLBACK when KCL workers are
* 3.x complaint, and to CLIENT_VERSION_2x when customer has initiated a rollback.
* Only the leader KCL worker performs migration ready monitor and notifies all workers (including
* itself) via a MigrationState update. When all worker's monitor notice the MigrationState change
* (including itself), it will transition to CLIENT_VERSION_3x_WITH_ROLLBACK.
*/
@KinesisClientInternalApi
@RequiredArgsConstructor
@Slf4j
@ThreadSafe
public class MigrationClientVersionUpgradeFrom2xState implements MigrationClientVersionState {
private final MigrationStateMachine stateMachine;
private final Callable<Long> timeProvider;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ScheduledExecutorService stateMachineThreadPool;
private final DynamicMigrationComponentsInitializer initializer;
private final Random random;
private final MigrationState currentMigrationState;
private final long flipTo3XStabilizerTimeInSeconds;
private MigrationReadyMonitor migrationMonitor;
private ClientVersionChangeMonitor clientVersionChangeMonitor;
private boolean entered = false;
private boolean left = false;
@Override
public ClientVersion clientVersion() {
return ClientVersion.CLIENT_VERSION_UPGRADE_FROM_2x;
}
@Override
public synchronized void enter(final ClientVersion fromClientVersion) throws DependencyException {
if (!entered) {
log.info("Entering state {} from {}", this, fromClientVersion);
initializer.initializeClientVersionForUpgradeFrom2x(fromClientVersion);
log.info("Starting migration ready monitor to monitor 3.x compliance of the KCL workers");
migrationMonitor = new MigrationReadyMonitor(
initializer.metricsFactory(),
timeProvider,
initializer.leaderDecider(),
initializer.workerIdentifier(),
initializer.workerMetricsDAO(),
initializer.workerMetricsExpirySeconds(),
initializer.leaseRefresher(),
stateMachineThreadPool,
this::onMigrationReady,
flipTo3XStabilizerTimeInSeconds);
migrationMonitor.startMonitor();
log.info("Starting monitor for rollback and flip to 3.x");
clientVersionChangeMonitor = new ClientVersionChangeMonitor(
initializer.metricsFactory(),
coordinatorStateDAO,
stateMachineThreadPool,
this::onClientVersionChange,
clientVersion(),
random);
clientVersionChangeMonitor.startMonitor();
entered = true;
} else {
log.info("Not entering {}", left ? "already exited state" : "already entered state");
}
}
@Override
public synchronized void leave() {
if (entered && !left) {
log.info("Leaving {}", this);
cancelMigrationReadyMonitor();
cancelClientChangeVersionMonitor();
entered = false;
} else {
log.info("Cannot leave {}", entered ? "already exited state" : "because state is not active");
}
}
@Override
public String toString() {
return getClass().getSimpleName();
}
private synchronized void onMigrationReady() {
// this is invoked on the leader worker only
if (!entered || left || migrationMonitor == null) {
log.info("Ignoring migration ready monitor, state already transitioned");
return;
}
// update dynamo with the state to toggle to 3.x
// and let the clientVersionChange kick in to do state transition
// this way both leader and non-leader worker all transition when
// it discovers the update from ddb.
if (updateDynamoStateForTransition()) {
// successfully toggled the state, now we can cancel the monitor
cancelMigrationReadyMonitor();
}
// else - either migration ready monitor will retry or
// client Version change callback will initiate the next state transition.
}
private void cancelMigrationReadyMonitor() {
if (migrationMonitor != null) {
final MigrationReadyMonitor localMigrationMonitor = migrationMonitor;
CompletableFuture.supplyAsync(() -> {
log.info("Cancelling migration ready monitor");
localMigrationMonitor.cancel();
return null;
});
migrationMonitor = null;
}
}
private void cancelClientChangeVersionMonitor() {
if (clientVersionChangeMonitor != null) {
final ClientVersionChangeMonitor localClientVersionChangeMonitor = clientVersionChangeMonitor;
CompletableFuture.supplyAsync(() -> {
log.info("Cancelling client change version monitor");
localClientVersionChangeMonitor.cancel();
return null;
});
clientVersionChangeMonitor = null;
}
}
/**
* Callback handler to handle client version changes in MigrationState in DDB.
* @param newState current MigrationState read from DDB where client version is not CLIENT_VERSION_UPGRADE_FROM_2x
* @throws InvalidStateException during transition to the next state based on the new ClientVersion
* or if the new state in DDB is unexpected.
*/
private synchronized void onClientVersionChange(final MigrationState newState)
throws InvalidStateException, DependencyException {
if (!entered || left) {
log.warn("Received client version change notification on inactive state {}", this);
return;
}
final MetricsScope scope =
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
try {
switch (newState.getClientVersion()) {
case CLIENT_VERSION_2x:
log.info("A rollback has been initiated for the application. Transition to {}", CLIENT_VERSION_2x);
// cancel monitor asynchronously
cancelMigrationReadyMonitor();
stateMachine.transitionTo(CLIENT_VERSION_2x, newState);
break;
case CLIENT_VERSION_3x_WITH_ROLLBACK:
log.info("KCL workers are v3.x compliant, transition to {}", CLIENT_VERSION_3x_WITH_ROLLBACK);
cancelMigrationReadyMonitor();
stateMachine.transitionTo(CLIENT_VERSION_3x_WITH_ROLLBACK, newState);
break;
default:
// This should not happen, so throw an exception that allows the monitor to continue monitoring
// changes, this allows KCL to operate in the current state and keep monitoring until a valid
// state transition is possible.
// However, there could be a split brain here, new workers will use DDB value as source of truth,
// so we could also write back CLIENT_VERSION_UPGRADE_FROM_2x to DDB to ensure all workers have
// consistent behavior.
// Ideally we don't expect modifications to DDB table out of the KCL migration tool scope,
// so keeping it simple and not writing back to DDB, the error log below would help capture
// any strange behavior if this happens.
log.error("Migration state has invalid client version {}", newState);
throw new InvalidStateException(String.format("Unexpected new state %s", newState));
}
} catch (final DependencyException | InvalidStateException e) {
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
throw e;
} finally {
MetricsUtil.endScope(scope);
}
}
private boolean updateDynamoStateForTransition() {
final MetricsScope scope =
MetricsUtil.createMetricsWithOperation(initializer.metricsFactory(), METRICS_OPERATION);
try {
final MigrationState newMigrationState = currentMigrationState
.copy()
.update(CLIENT_VERSION_3x_WITH_ROLLBACK, initializer.workerIdentifier());
log.info("Updating Migration State in DDB with {} prev state {}", newMigrationState, currentMigrationState);
return coordinatorStateDAO.updateCoordinatorStateWithExpectation(
newMigrationState, currentMigrationState.getDynamoClientVersionExpectation());
} catch (final Exception e) {
log.warn(
"Exception occurred when toggling to {}, upgradeReadyMonitor will retry the update"
+ " if upgrade condition is still true",
CLIENT_VERSION_3x_WITH_ROLLBACK,
e);
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
return false;
} finally {
MetricsUtil.endScope(scope);
}
}
}

View file

@ -0,0 +1,352 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.time.Duration;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.coordinator.LeaderDecider;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStats;
import software.amazon.kinesis.worker.metricstats.WorkerMetricStatsDAO;
import static software.amazon.kinesis.coordinator.migration.MigrationStateMachineImpl.METRICS_OPERATION;
/**
* Monitor for KCL workers 3.x readiness. This monitor is started on all workers but only
* executed on the leader of the fleet. The leader determines 3.x readiness if GSI of the lease
* table is active and all lease owners are emitting WorkerMetricStats. The monitor performs this
* check periodically and will invoke callback if the readiness conditions are true. Monitor
* needs to be explicitly cancelled after the readiness trigger has successfully been handled.
*
* Thread safety - Guard for safety against public method invocation and internal runnable method.
*/
@Slf4j
@ThreadSafe
public class MigrationReadyMonitor implements Runnable {
private static final long MONITOR_INTERVAL_MILLIS = Duration.ofMinutes(1).toMillis();
private static final long LOG_INTERVAL_NANOS = Duration.ofMinutes(5).toNanos();
/**
* Default retry attempt for loading leases and workers before giving up.
*/
private static final int DDB_LOAD_RETRY_ATTEMPT = 1;
private final MetricsFactory metricsFactory;
private final Callable<Long> timeProvider;
private final LeaderDecider leaderDecider;
private final String currentWorkerId;
private final WorkerMetricStatsDAO workerMetricStatsDAO;
private final long workerMetricStatsExpirySeconds;
private final LeaseRefresher leaseRefresher;
private final ScheduledExecutorService stateMachineThreadPool;
private final MonitorTriggerStabilizer triggerStabilizer;
private final LogRateLimiter rateLimitedStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
private ScheduledFuture<?> scheduledFuture;
private boolean gsiStatusReady;
private boolean workerMetricsReady;
private Set<String> lastKnownUniqueLeaseOwners = new HashSet<>();
private Set<String> lastKnownWorkersWithActiveWorkerMetrics = new HashSet<>();
public MigrationReadyMonitor(
final MetricsFactory metricsFactory,
final Callable<Long> timeProvider,
final LeaderDecider leaderDecider,
final String currentWorkerId,
final WorkerMetricStatsDAO workerMetricStatsDAO,
final long workerMetricsExpirySeconds,
final LeaseRefresher leaseRefresher,
final ScheduledExecutorService stateMachineThreadPool,
final Runnable callback,
final long callbackStabilizationInSeconds) {
this.metricsFactory = metricsFactory;
this.timeProvider = timeProvider;
this.leaderDecider = leaderDecider;
this.currentWorkerId = currentWorkerId;
this.workerMetricStatsDAO = workerMetricStatsDAO;
this.workerMetricStatsExpirySeconds = workerMetricsExpirySeconds;
this.leaseRefresher = leaseRefresher;
this.stateMachineThreadPool = stateMachineThreadPool;
this.triggerStabilizer =
new MonitorTriggerStabilizer(timeProvider, callbackStabilizationInSeconds, callback, currentWorkerId);
}
public synchronized void startMonitor() {
if (Objects.isNull(scheduledFuture)) {
log.info("Starting migration ready monitor");
scheduledFuture = stateMachineThreadPool.scheduleWithFixedDelay(
this, MONITOR_INTERVAL_MILLIS, MONITOR_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
} else {
log.info("Ignoring monitor request, since it is already started");
}
}
/**
* Cancel the monitor. Once the method returns callback will not be invoked,
* but callback can be invoked reentrantly before this method returns.
*/
public synchronized void cancel() {
if (Objects.nonNull(scheduledFuture)) {
log.info("Cancelled migration ready monitor");
scheduledFuture.cancel(true);
scheduledFuture = null;
} else {
log.info("{} is currently not active", this);
}
}
@Override
public synchronized void run() {
try {
if (Thread.currentThread().isInterrupted()) {
log.info("{} cancelled, exiting...", this);
return;
}
if (!leaderDecider.isLeader(currentWorkerId)) {
log.debug("Not the leader, not performing migration ready check {}", this);
triggerStabilizer.reset();
lastKnownUniqueLeaseOwners.clear();
lastKnownWorkersWithActiveWorkerMetrics.clear();
return;
}
triggerStabilizer.call(isReadyForUpgradeTo3x());
rateLimitedStatusLogger.log(() -> log.info("Monitor ran successfully {}", this));
} catch (final Throwable t) {
log.warn("{} failed, will retry after {}", this, MONITOR_INTERVAL_MILLIS, t);
}
}
@Override
public String toString() {
return new StringBuilder("UpgradeReadyMonitor[")
.append("G=")
.append(gsiStatusReady)
.append(",W=")
.append(workerMetricsReady)
.append("]")
.toString();
}
private boolean isReadyForUpgradeTo3x() throws DependencyException {
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
try {
// If GSI is not ready, optimize to not check if worker metrics are being emitted
final boolean localGsiReadyStatus = leaseRefresher.isLeaseOwnerToLeaseKeyIndexActive();
if (localGsiReadyStatus != gsiStatusReady) {
gsiStatusReady = localGsiReadyStatus;
log.info("Gsi ready status changed to {}", gsiStatusReady);
} else {
log.debug("GsiReady status {}", gsiStatusReady);
}
return gsiStatusReady && areLeaseOwnersEmittingWorkerMetrics();
} finally {
scope.addData("GsiReadyStatus", gsiStatusReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
scope.addData(
"WorkerMetricsReadyStatus", workerMetricsReady ? 1 : 0, StandardUnit.COUNT, MetricsLevel.SUMMARY);
MetricsUtil.endScope(scope);
}
}
private boolean areLeaseOwnersEmittingWorkerMetrics() {
final CompletableFuture<List<Lease>> leaseListFuture = loadLeaseListAsync();
final CompletableFuture<List<WorkerMetricStats>> workerMetricsFuture = loadWorkerMetricStats();
final List<Lease> leaseList = leaseListFuture.join();
final Set<String> leaseOwners = getUniqueLeaseOwnersFromLeaseTable(leaseList);
final List<WorkerMetricStats> workerMetricStatsList = workerMetricsFuture.join();
final Set<String> workersWithActiveWorkerMetrics = getWorkersWithActiveWorkerMetricStats(workerMetricStatsList);
// Leases are not checked for expired condition because:
// If some worker has gone down and is not active, but has lease assigned to it, those leases
// maybe expired. Since the worker is down, it may not have worker-metrics, or worker-metrics may not be active,
// In that case, the migration condition is not considered to be met.
// However, those leases should be assigned to another worker and so the check in the next
// iteration could succeed. This is intentional to make sure all leases owners are accounted for
// and the old owner does not come back up without worker metrics and reacquires the lease.
final boolean localWorkerMetricsReady = leaseOwners.equals(workersWithActiveWorkerMetrics);
if (localWorkerMetricsReady != workerMetricsReady) {
workerMetricsReady = localWorkerMetricsReady;
log.info("WorkerMetricStats status changed to {}", workerMetricsReady);
log.info("Lease List {}", leaseList);
log.info("WorkerMetricStats {}", workerMetricStatsList);
} else {
log.debug("WorkerMetricStats ready status {}", workerMetricsReady);
}
if (lastKnownUniqueLeaseOwners == null) {
log.info("Unique lease owners {}", leaseOwners);
} else if (!lastKnownUniqueLeaseOwners.equals(leaseOwners)) {
log.info("Unique lease owners changed to {}", leaseOwners);
}
lastKnownUniqueLeaseOwners = leaseOwners;
if (lastKnownWorkersWithActiveWorkerMetrics == null) {
log.info("Workers with active worker metric stats {}", workersWithActiveWorkerMetrics);
} else if (!lastKnownWorkersWithActiveWorkerMetrics.equals(workersWithActiveWorkerMetrics)) {
log.info("Workers with active worker metric stats changed {}", workersWithActiveWorkerMetrics);
}
lastKnownWorkersWithActiveWorkerMetrics = workersWithActiveWorkerMetrics;
return workerMetricsReady;
}
private Set<String> getUniqueLeaseOwnersFromLeaseTable(final List<Lease> leaseList) {
return leaseList.stream().map(Lease::leaseOwner).collect(Collectors.toSet());
}
private Set<String> getWorkersWithActiveWorkerMetricStats(final List<WorkerMetricStats> workerMetricStats) {
final long nowInSeconds = Duration.ofMillis(now(timeProvider)).getSeconds();
return workerMetricStats.stream()
.filter(metricStats -> isWorkerMetricStatsActive(metricStats, nowInSeconds))
.map(WorkerMetricStats::getWorkerId)
.collect(Collectors.toSet());
}
private boolean isWorkerMetricStatsActive(final WorkerMetricStats metricStats, final long nowInSeconds) {
return (metricStats.getLastUpdateTime() + workerMetricStatsExpirySeconds) > nowInSeconds;
}
private CompletableFuture<List<WorkerMetricStats>> loadWorkerMetricStats() {
return CompletableFuture.supplyAsync(() -> loadWithRetry(workerMetricStatsDAO::getAllWorkerMetricStats));
}
private CompletableFuture<List<Lease>> loadLeaseListAsync() {
return CompletableFuture.supplyAsync(() -> loadWithRetry(leaseRefresher::listLeases));
}
private <T> T loadWithRetry(final Callable<T> loadFunction) {
int retryAttempt = 0;
while (true) {
try {
return loadFunction.call();
} catch (final Exception e) {
if (retryAttempt < DDB_LOAD_RETRY_ATTEMPT) {
log.warn(
"Failed to load : {}, retrying",
loadFunction.getClass().getName(),
e);
retryAttempt++;
} else {
throw new CompletionException(e);
}
}
}
}
private static long now(final Callable<Long> timeProvider) {
try {
return timeProvider.call();
} catch (final Exception e) {
log.debug("Time provider threw exception, using System.currentTimeMillis", e);
return System.currentTimeMillis();
}
}
/**
* Stabilize the monitor trigger before invoking the callback
* to ensure we are consistently seeing the trigger for a configured
* stabilizationDurationInMillis
*/
private static class MonitorTriggerStabilizer {
private final Callable<Long> timeProvider;
private final long stabilizationDurationInSeconds;
private final Runnable callback;
private final String currentWorkerId;
private final LogRateLimiter rateLimitedTriggerStatusLogger;
private long lastToggleTimeInMillis;
private boolean currentTriggerStatus;
public MonitorTriggerStabilizer(
final Callable<Long> timeProvider,
final long stabilizationDurationInSeconds,
final Runnable callback,
final String currentWorkerId) {
this.timeProvider = timeProvider;
this.stabilizationDurationInSeconds = stabilizationDurationInSeconds;
this.callback = callback;
this.currentWorkerId = currentWorkerId;
this.rateLimitedTriggerStatusLogger = new LogRateLimiter(LOG_INTERVAL_NANOS);
}
public void call(final boolean isMonitorTriggered) {
final long now = now(timeProvider);
if (currentTriggerStatus != isMonitorTriggered) {
log.info("Trigger status has changed to {}", isMonitorTriggered);
currentTriggerStatus = isMonitorTriggered;
lastToggleTimeInMillis = now;
}
if (currentTriggerStatus) {
final long deltaSeconds =
Duration.ofMillis(now - lastToggleTimeInMillis).getSeconds();
if (deltaSeconds >= stabilizationDurationInSeconds) {
log.info("Trigger has been consistently true for {}s, invoking callback", deltaSeconds);
callback.run();
} else {
rateLimitedTriggerStatusLogger.log(() -> log.info(
"Trigger has been true for {}s, waiting for stabilization time of {}s",
deltaSeconds,
stabilizationDurationInSeconds));
}
}
}
public void reset() {
if (currentTriggerStatus) {
log.info("This worker {} is no longer the leader, reset current status", currentWorkerId);
}
currentTriggerStatus = false;
}
}
@RequiredArgsConstructor
private static class LogRateLimiter {
private final long logIntervalInNanos;
private long nextLogTime = System.nanoTime();
public void log(final Runnable logger) {
final long now = System.nanoTime();
if (now >= nextLogTime) {
logger.run();
nextLogTime = now + logIntervalInNanos;
}
}
}
}

View file

@ -0,0 +1,231 @@
package software.amazon.kinesis.coordinator.migration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.dynamodb.model.AttributeAction;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.awssdk.services.dynamodb.model.AttributeValueUpdate;
import software.amazon.awssdk.services.dynamodb.model.ExpectedAttributeValue;
import software.amazon.kinesis.common.StackTraceUtils;
import software.amazon.kinesis.coordinator.CoordinatorState;
/**
* Data model of the Migration state. This is used to track the state related to migration
* from KCLv2.x to KCLv3.x.
*/
@Getter
@ToString(callSuper = true)
@Slf4j
public class MigrationState extends CoordinatorState {
/**
* Key value for the item in the CoordinatorState table
*/
public static final String MIGRATION_HASH_KEY = "Migration3.0";
/**
* Attribute name in migration state item, whose value is used during
* the KCL v3.x migration process to know whether the workers need to
* perform KCL v2.x compatible operations or can perform native KCL v3.x
* operations.
*/
public static final String CLIENT_VERSION_ATTRIBUTE_NAME = "cv";
public static final String MODIFIED_BY_ATTRIBUTE_NAME = "mb";
public static final String MODIFIED_TIMESTAMP_ATTRIBUTE_NAME = "mts";
public static final String HISTORY_ATTRIBUTE_NAME = "h";
private static final int MAX_HISTORY_ENTRIES = 10;
private ClientVersion clientVersion;
private String modifiedBy;
private long modifiedTimestamp;
private final List<HistoryEntry> history;
private MigrationState(
final String key,
final ClientVersion clientVersion,
final String modifiedBy,
final long modifiedTimestamp,
final List<HistoryEntry> historyEntries,
final Map<String, AttributeValue> others) {
setKey(key);
setAttributes(others);
this.clientVersion = clientVersion;
this.modifiedBy = modifiedBy;
this.modifiedTimestamp = modifiedTimestamp;
this.history = historyEntries;
}
public MigrationState(final String key, final String modifiedBy) {
this(
key,
ClientVersion.CLIENT_VERSION_INIT,
modifiedBy,
System.currentTimeMillis(),
new ArrayList<>(),
new HashMap<>());
}
public HashMap<String, AttributeValue> serialize() {
final HashMap<String, AttributeValue> result = new HashMap<>();
result.put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(clientVersion.name()));
result.put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(modifiedBy));
result.put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(modifiedTimestamp)));
if (!history.isEmpty()) {
final List<AttributeValue> historyList = new ArrayList<>();
for (final HistoryEntry entry : history) {
historyList.add(AttributeValue.builder().m(entry.serialize()).build());
}
result.put(
HISTORY_ATTRIBUTE_NAME,
AttributeValue.builder().l(historyList).build());
}
return result;
}
public static MigrationState deserialize(final String key, final HashMap<String, AttributeValue> attributes) {
if (!MIGRATION_HASH_KEY.equals(key)) {
return null;
}
try {
final HashMap<String, AttributeValue> mutableAttributes = new HashMap<>(attributes);
final ClientVersion clientVersion = ClientVersion.valueOf(
mutableAttributes.remove(CLIENT_VERSION_ATTRIBUTE_NAME).s());
final String modifiedBy =
mutableAttributes.remove(MODIFIED_BY_ATTRIBUTE_NAME).s();
final long modifiedTimestamp = Long.parseLong(
mutableAttributes.remove(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n());
final List<HistoryEntry> historyList = new ArrayList<>();
if (attributes.containsKey(HISTORY_ATTRIBUTE_NAME)) {
mutableAttributes.remove(HISTORY_ATTRIBUTE_NAME).l().stream()
.map(historyEntry -> HistoryEntry.deserialize(historyEntry.m()))
.forEach(historyList::add);
}
final MigrationState migrationState = new MigrationState(
MIGRATION_HASH_KEY, clientVersion, modifiedBy, modifiedTimestamp, historyList, mutableAttributes);
if (!mutableAttributes.isEmpty()) {
log.info("Unknown attributes {} for state {}", mutableAttributes, migrationState);
}
return migrationState;
} catch (final Exception e) {
log.warn("Unable to deserialize state with key {} and attributes {}", key, attributes, e);
}
return null;
}
public Map<String, ExpectedAttributeValue> getDynamoClientVersionExpectation() {
return new HashMap<String, ExpectedAttributeValue>() {
{
put(
CLIENT_VERSION_ATTRIBUTE_NAME,
ExpectedAttributeValue.builder()
.value(AttributeValue.fromS(clientVersion.name()))
.build());
}
};
}
public MigrationState copy() {
return new MigrationState(
getKey(),
getClientVersion(),
getModifiedBy(),
getModifiedTimestamp(),
new ArrayList<>(getHistory()),
new HashMap<>(getAttributes()));
}
public MigrationState update(final ClientVersion clientVersion, final String modifiedBy) {
log.info(
"Migration state is being updated to {} current state {} caller {}",
clientVersion,
this,
StackTraceUtils.getPrintableStackTrace(Thread.currentThread().getStackTrace()));
addHistoryEntry(this.clientVersion, this.modifiedBy, this.modifiedTimestamp);
this.clientVersion = clientVersion;
this.modifiedBy = modifiedBy;
this.modifiedTimestamp = System.currentTimeMillis();
return this;
}
public void addHistoryEntry(
final ClientVersion lastClientVersion, final String lastModifiedBy, final long lastModifiedTimestamp) {
history.add(0, new HistoryEntry(lastClientVersion, lastModifiedBy, lastModifiedTimestamp));
if (history.size() > MAX_HISTORY_ENTRIES) {
log.info("Limit {} reached, dropping history {}", MAX_HISTORY_ENTRIES, history.remove(history.size() - 1));
}
}
public Map<String, AttributeValueUpdate> getDynamoUpdate() {
final HashMap<String, AttributeValueUpdate> updates = new HashMap<>();
updates.put(
CLIENT_VERSION_ATTRIBUTE_NAME,
AttributeValueUpdate.builder()
.value(AttributeValue.fromS(clientVersion.name()))
.action(AttributeAction.PUT)
.build());
updates.put(
MODIFIED_BY_ATTRIBUTE_NAME,
AttributeValueUpdate.builder()
.value(AttributeValue.fromS(modifiedBy))
.action(AttributeAction.PUT)
.build());
updates.put(
MODIFIED_TIMESTAMP_ATTRIBUTE_NAME,
AttributeValueUpdate.builder()
.value(AttributeValue.fromN(String.valueOf(modifiedTimestamp)))
.action(AttributeAction.PUT)
.build());
if (!history.isEmpty()) {
updates.put(
HISTORY_ATTRIBUTE_NAME,
AttributeValueUpdate.builder()
.value(AttributeValue.fromL(
history.stream().map(HistoryEntry::toAv).collect(Collectors.toList())))
.action(AttributeAction.PUT)
.build());
}
return updates;
}
@RequiredArgsConstructor
@ToString
public static class HistoryEntry {
private final ClientVersion lastClientVersion;
private final String lastModifiedBy;
private final long lastModifiedTimestamp;
public AttributeValue toAv() {
return AttributeValue.fromM(serialize());
}
public Map<String, AttributeValue> serialize() {
return new HashMap<String, AttributeValue>() {
{
put(CLIENT_VERSION_ATTRIBUTE_NAME, AttributeValue.fromS(lastClientVersion.name()));
put(MODIFIED_BY_ATTRIBUTE_NAME, AttributeValue.fromS(lastModifiedBy));
put(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME, AttributeValue.fromN(String.valueOf(lastModifiedTimestamp)));
}
};
}
public static HistoryEntry deserialize(final Map<String, AttributeValue> map) {
return new HistoryEntry(
ClientVersion.valueOf(map.get(CLIENT_VERSION_ATTRIBUTE_NAME).s()),
map.get(MODIFIED_BY_ATTRIBUTE_NAME).s(),
Long.parseLong(map.get(MODIFIED_TIMESTAMP_ATTRIBUTE_NAME).n()));
}
}
}

View file

@ -0,0 +1,66 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
/**
* State machine that provides:
* 1. Seamless upgrade from 2.x to 3.x - 3.x has introduced new algorithms that are not compatible with 2.x
* workers, so the state machine allows to seamlessly run the 2.x functionality to be compliant with any
* 2.x worker in the fleet, and also seamlessly switch to 3.x functionality when all KCL workers are
* 3.x complaint.
* 2. Instant rollbacks - Rollbacks are supported using the KCL Migration tool to revert back to 2.x functionality
* if customer finds regressions in 3.x functionality.
* 3. Instant roll-forwards - Once any issue has been mitigated, rollfowards are supported instantly
* with KCL Migration tool.
*/
public interface MigrationStateMachine {
/**
* Initialize the state machine by identifying the initial state when the KCL worker comes up for the first time.
* @throws DependencyException When unable to identify the initial state.
*/
void initialize() throws DependencyException;
/**
* Shutdown state machine and perform necessary cleanup for the worker to gracefully shutdown
*/
void shutdown();
/**
* Terminate the state machine when it reaches a terminal state, which is a successful upgrade
* to v3.x.
*/
void terminate();
/**
* Peform transition from current state to the given new ClientVersion
* @param nextClientVersion clientVersion of the new state the state machine must transition to
* @param state the current MigrationState in dynamo
* @throws InvalidStateException when transition fails, this allows the state machine to stay
* in the current state until a valid transition is possible
* @throws DependencyException when transition fails due to dependency on DDB failing in
* unexpected ways.
*/
void transitionTo(final ClientVersion nextClientVersion, final MigrationState state)
throws InvalidStateException, DependencyException;
/**
* Get the ClientVersion of current state machine state.
* @return ClientVersion of current state machine state
*/
ClientVersion getCurrentClientVersion();
}

View file

@ -0,0 +1,254 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator.migration;
import java.util.AbstractMap.SimpleEntry;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.CoordinatorConfig.ClientVersionConfig;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.coordinator.DynamicMigrationComponentsInitializer;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
/**
* Implementation of {@link MigrationStateMachine}
*/
@KinesisClientInternalApi
@Getter
@Slf4j
@ThreadSafe
public class MigrationStateMachineImpl implements MigrationStateMachine {
public static final String FAULT_METRIC = "Fault";
public static final String METRICS_OPERATION = "Migration";
private static final long THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS = 5L;
private final MetricsFactory metricsFactory;
private final Callable<Long> timeProvider;
private final CoordinatorStateDAO coordinatorStateDAO;
private final ScheduledExecutorService stateMachineThreadPool;
private DynamicMigrationComponentsInitializer initializer;
private final ClientVersionConfig clientVersionConfig;
private final Random random;
private final String workerId;
private final long flipTo3XStabilizerTimeInSeconds;
private MigrationState startingMigrationState;
@Getter
private ClientVersion startingClientVersion;
private MigrationClientVersionState currentMigrationClientVersionState = new MigrationClientVersionState() {
@Override
public ClientVersion clientVersion() {
return ClientVersion.CLIENT_VERSION_INIT;
}
@Override
public void enter(final ClientVersion fromClientVersion) {
log.info("Entered {}...", clientVersion());
}
@Override
public void leave() {
log.info("Left {}...", clientVersion());
}
};
private boolean terminated = false;
public MigrationStateMachineImpl(
final MetricsFactory metricsFactory,
final Callable<Long> timeProvider,
final CoordinatorStateDAO coordinatorStateDAO,
final ScheduledExecutorService stateMachineThreadPool,
final ClientVersionConfig clientVersionConfig,
final Random random,
final DynamicMigrationComponentsInitializer initializer,
final String workerId,
final long flipTo3XStabilizerTimeInSeconds) {
this.metricsFactory = metricsFactory;
this.timeProvider = timeProvider;
this.coordinatorStateDAO = coordinatorStateDAO;
this.stateMachineThreadPool = stateMachineThreadPool;
this.clientVersionConfig = clientVersionConfig;
this.random = random;
this.initializer = initializer;
this.workerId = workerId;
this.flipTo3XStabilizerTimeInSeconds = flipTo3XStabilizerTimeInSeconds;
}
@Override
public void initialize() throws DependencyException {
if (startingClientVersion == null) {
log.info("Initializing MigrationStateMachine");
coordinatorStateDAO.initialize();
final MigrationClientVersionStateInitializer startingStateInitializer =
new MigrationClientVersionStateInitializer(
timeProvider, coordinatorStateDAO, clientVersionConfig, random, workerId);
final SimpleEntry<ClientVersion, MigrationState> dataForInitialization =
startingStateInitializer.getInitialState();
initializer.initialize(dataForInitialization.getKey());
transitionTo(dataForInitialization.getKey(), dataForInitialization.getValue());
startingClientVersion = dataForInitialization.getKey();
startingMigrationState = dataForInitialization.getValue();
log.info("MigrationStateMachine initial clientVersion {}", startingClientVersion);
} else {
log.info("MigrationStateMachine already initialized with clientVersion {}", startingClientVersion);
}
}
@Override
public void shutdown() {
terminate();
if (!stateMachineThreadPool.isShutdown()) {
stateMachineThreadPool.shutdown();
try {
if (stateMachineThreadPool.awaitTermination(THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
log.info(
"StateMachineThreadPool did not shutdown within {} seconds, forcefully shutting down",
THREAD_POOL_SHUTDOWN_TIMEOUT_SECONDS);
stateMachineThreadPool.shutdownNow();
}
} catch (final InterruptedException e) {
log.info("Interrupted when shutting down StateMachineThreadPool, forcefully shutting down");
stateMachineThreadPool.shutdownNow();
}
}
log.info("Shutdown successfully");
}
@Override
public synchronized void terminate() {
if (!terminated && currentMigrationClientVersionState != null) {
log.info("State machine is about to terminate");
currentMigrationClientVersionState.leave();
currentMigrationClientVersionState = null;
log.info("State machine reached a terminal state.");
terminated = true;
}
}
@Override
public synchronized void transitionTo(final ClientVersion nextClientVersion, final MigrationState migrationState)
throws DependencyException {
if (terminated) {
throw new IllegalStateException(String.format(
"Cannot transition to %s after state machine is terminated, %s",
nextClientVersion.name(), migrationState));
}
final MigrationClientVersionState nextMigrationClientVersionState =
createMigrationClientVersionState(nextClientVersion, migrationState);
log.info(
"Attempting to transition from {} to {}",
currentMigrationClientVersionState.clientVersion(),
nextClientVersion);
currentMigrationClientVersionState.leave();
enter(nextMigrationClientVersionState);
}
/**
* Enter with retry. When entering the state machine for the first time, the caller has retry so exceptions
* will be re-thrown. Once the state machine has initialized all transitions will be an indefinite retry.
* It is possible the DDB state has changed by the time enter succeeds but that will occur as a new
* state transition after entering the state. Usually the failures are due to unexpected issues with
* DDB which will be transitional and will recover on a retry.
* @param nextMigrationClientVersionState the state to transition to
* @throws DependencyException If entering fails during state machine initialization.
*/
private void enter(final MigrationClientVersionState nextMigrationClientVersionState) throws DependencyException {
boolean success = false;
while (!success) {
try {
// Enter should never fail unless it is the starting state and fails to create the GSI,
// in which case it is an unrecoverable error that is bubbled up and KCL start up will fail.
nextMigrationClientVersionState.enter(currentMigrationClientVersionState.clientVersion());
currentMigrationClientVersionState = nextMigrationClientVersionState;
log.info("Successfully transitioned to {}", nextMigrationClientVersionState.clientVersion());
if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_3x) {
terminate();
}
success = true;
} catch (final DependencyException e) {
if (currentMigrationClientVersionState.clientVersion() == ClientVersion.CLIENT_VERSION_INIT) {
throw e;
}
log.info(
"Transitioning from {} to {} failed, retrying after a minute",
currentMigrationClientVersionState.clientVersion(),
nextMigrationClientVersionState.clientVersion(),
e);
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION);
scope.addData(FAULT_METRIC, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
MetricsUtil.endScope(scope);
try {
Thread.sleep(1000);
} catch (final InterruptedException ie) {
log.info("Interrupted while sleeping before retrying state machine transition", ie);
}
}
}
}
private MigrationClientVersionState createMigrationClientVersionState(
final ClientVersion clientVersion, final MigrationState migrationState) {
switch (clientVersion) {
case CLIENT_VERSION_2x:
return new MigrationClientVersion2xState(
this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
case CLIENT_VERSION_UPGRADE_FROM_2x:
return new MigrationClientVersionUpgradeFrom2xState(
this,
timeProvider,
coordinatorStateDAO,
stateMachineThreadPool,
initializer,
random,
migrationState,
flipTo3XStabilizerTimeInSeconds);
case CLIENT_VERSION_3x_WITH_ROLLBACK:
return new MigrationClientVersion3xWithRollbackState(
this, coordinatorStateDAO, stateMachineThreadPool, initializer, random);
case CLIENT_VERSION_3x:
return new MigrationClientVersion3xState(this, initializer);
}
throw new IllegalStateException(String.format("Unknown client version %s", clientVersion));
}
public ClientVersion getCurrentClientVersion() {
if (currentMigrationClientVersionState != null) {
return currentMigrationClientVersionState.clientVersion();
} else if (terminated) {
return ClientVersion.CLIENT_VERSION_3x;
}
throw new UnsupportedOperationException(
"No current state when state machine is either not initialized" + " or already terminated");
}
}

View file

@ -0,0 +1,270 @@
package software.amazon.kinesis.leader;
import java.time.Duration;
import java.time.Instant;
import java.util.AbstractMap;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import com.amazonaws.services.dynamodbv2.AcquireLockOptions;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBLockClient;
import com.amazonaws.services.dynamodbv2.GetLockOptions;
import com.amazonaws.services.dynamodbv2.LockItem;
import com.amazonaws.services.dynamodbv2.model.LockCurrentlyUnavailableException;
import com.google.common.annotations.VisibleForTesting;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
import software.amazon.kinesis.coordinator.CoordinatorStateDAO;
import software.amazon.kinesis.coordinator.LeaderDecider;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static java.util.Objects.isNull;
import static software.amazon.kinesis.coordinator.CoordinatorState.LEADER_HASH_KEY;
/**
* Implementation for LeaderDecider to elect leader using lock on dynamo db table. This class uses
* AmazonDynamoDBLockClient library to perform the leader election.
*/
@RequiredArgsConstructor
@Slf4j
public class DynamoDBLockBasedLeaderDecider implements LeaderDecider {
private static final Long DEFAULT_LEASE_DURATION_MILLIS =
Duration.ofMinutes(2).toMillis();
// Heartbeat frequency should be at-least 3 times smaller the lease duration according to LockClient documentation
private static final Long DEFAULT_HEARTBEAT_PERIOD_MILLIS =
Duration.ofSeconds(30).toMillis();
private final CoordinatorStateDAO coordinatorStateDao;
private final AmazonDynamoDBLockClient dynamoDBLockClient;
private final Long heartbeatPeriodMillis;
private final String workerId;
private final MetricsFactory metricsFactory;
private long lastCheckTimeInMillis = 0L;
private boolean lastIsLeaderResult = false;
private final AtomicBoolean isShutdown = new AtomicBoolean(false);
private long lastIsAnyLeaderElectedDDBReadTimeMillis = 0L;
private boolean lastIsAnyLeaderElectedResult = false;
/**
* Key value pair of LockItem to the time when it was first discovered.
* If a new LockItem fetched from ddb has different recordVersionNumber than the one in-memory,
* its considered as new LockItem, and the time when it was fetched is stored in memory to identify lockItem
* expiry. This is used only in the context of isAnyLeaderElected method.
*/
private AbstractMap.SimpleEntry<LockItem, Long> lastIsAnyLeaderCheckLockItemToFirstEncounterTime = null;
@VisibleForTesting
static DynamoDBLockBasedLeaderDecider create(
final CoordinatorStateDAO coordinatorStateDao,
final String workerId,
final Long leaseDuration,
final Long heartbeatPeriod,
final MetricsFactory metricsFactory) {
final AmazonDynamoDBLockClient dynamoDBLockClient = new AmazonDynamoDBLockClient(coordinatorStateDao
.getDDBLockClientOptionsBuilder()
.withTimeUnit(TimeUnit.MILLISECONDS)
.withLeaseDuration(leaseDuration)
.withHeartbeatPeriod(heartbeatPeriod)
.withCreateHeartbeatBackgroundThread(true)
.withOwnerName(workerId)
.build());
return new DynamoDBLockBasedLeaderDecider(
coordinatorStateDao, dynamoDBLockClient, heartbeatPeriod, workerId, metricsFactory);
}
public static DynamoDBLockBasedLeaderDecider create(
final CoordinatorStateDAO coordinatorStateDao, final String workerId, final MetricsFactory metricsFactory) {
return create(
coordinatorStateDao,
workerId,
DEFAULT_LEASE_DURATION_MILLIS,
DEFAULT_HEARTBEAT_PERIOD_MILLIS,
metricsFactory);
}
@Override
public void initialize() {
log.info("Initializing DDB Lock based leader decider");
}
/**
* Check the lockItem in storage and if the current worker is not leader worker, then tries to acquire lock and
* returns true if it was able to acquire lock else false.
* @param workerId ID of the worker
* @return true if current worker is leader else false.
*/
@Override
public synchronized Boolean isLeader(final String workerId) {
// if the decider has shutdown, then return false and don't try acquireLock anymore.
if (isShutdown.get()) {
publishIsLeaderMetrics(false);
return false;
}
// If the last time we tried to take lock and didnt get lock, don't try to take again for heartbeatPeriodMillis
// this is to avoid unnecessary calls to dynamoDB.
// Different modules in KCL can request for isLeader check within heartbeatPeriodMillis, and this optimization
// will help in those cases.
// In case the last call returned true, we want to check the source always to ensure the correctness of leader.
if (!lastIsLeaderResult && lastCheckTimeInMillis + heartbeatPeriodMillis > System.currentTimeMillis()) {
publishIsLeaderMetrics(lastIsLeaderResult);
return lastIsLeaderResult;
}
boolean response;
// Get the lockItem from storage (if present
final Optional<LockItem> lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
lockItem.ifPresent(item -> log.info("Worker : {} is the current leader.", item.getOwnerName()));
// If the lockItem is present and is expired, that means either current worker is not leader.
if (!lockItem.isPresent() || lockItem.get().isExpired()) {
try {
// Current worker does not hold the lock, try to acquireOne.
final Optional<LockItem> leaderLockItem =
dynamoDBLockClient.tryAcquireLock(AcquireLockOptions.builder(LEADER_HASH_KEY)
.withRefreshPeriod(heartbeatPeriodMillis)
.withTimeUnit(TimeUnit.MILLISECONDS)
.withShouldSkipBlockingWait(true)
.build());
leaderLockItem.ifPresent(item -> log.info("Worker : {} is new leader", item.getOwnerName()));
// if leaderLockItem optional is empty, that means the lock is not acquired by this worker.
response = leaderLockItem.isPresent();
} catch (final InterruptedException e) {
// Something bad happened, don't assume leadership and also release lock just in case the
// lock was granted and still interrupt happened.
releaseLeadershipIfHeld();
log.error("Acquiring lock was interrupted in between", e);
response = false;
} catch (final LockCurrentlyUnavailableException e) {
response = false;
}
} else {
response = lockItem.get().getOwnerName().equals(workerId);
}
lastCheckTimeInMillis = System.currentTimeMillis();
lastIsLeaderResult = response;
publishIsLeaderMetrics(response);
return response;
}
private void publishIsLeaderMetrics(final boolean response) {
final MetricsScope metricsScope =
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
metricsScope.addData(
METRIC_OPERATION_LEADER_DECIDER_IS_LEADER, response ? 1 : 0, StandardUnit.COUNT, MetricsLevel.DETAILED);
MetricsUtil.endScope(metricsScope);
}
/**
* Releases the lock if held by current worker when this method is invoked.
*/
@Override
public void shutdown() {
if (!isShutdown.getAndSet(true)) {
releaseLeadershipIfHeld();
}
}
@Override
public void releaseLeadershipIfHeld() {
try {
final Optional<LockItem> lockItem = dynamoDBLockClient.getLock(LEADER_HASH_KEY, Optional.empty());
if (lockItem.isPresent()
&& !lockItem.get().isExpired()
&& lockItem.get().getOwnerName().equals(workerId)) {
log.info(
"Current worker : {} holds the lock, releasing it.",
lockItem.get().getOwnerName());
// LockItem.close() will release the lock if current worker owns it else this call is no op.
lockItem.get().close();
}
} catch (final Exception e) {
log.error("Failed to complete releaseLeadershipIfHeld call.", e);
}
}
/**
* Returns if any ACTIVE leader exists that is elected by the current implementation which can be outside the
* scope of this worker. That is leader elected by this implementation in any worker in fleet.
* DynamoDBLockClient does not provide an interface which can tell if an active lock exists or not, thus
* we need to put custom implementation.
* The implementation performs DDB get every heartbeatPeriodMillis to have low RCU consumption, which means that
* the leader could have been elected from the last time the check happened and before check happens again.
* The information returned from this method has eventual consistency (up to heartbeatPeriodMillis interval).
*
* @return true, if any leader is elected else false.
*/
@Override
public synchronized boolean isAnyLeaderElected() {
// Avoid going to ddb for every call and do it once every heartbeatPeriod to have low RCU usage.
if (Duration.between(
Instant.ofEpochMilli(lastIsAnyLeaderElectedDDBReadTimeMillis),
Instant.ofEpochMilli(System.currentTimeMillis()))
.toMillis()
> heartbeatPeriodMillis) {
final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(
metricsFactory, this.getClass().getSimpleName() + ":isAnyLeaderElected");
final long startTime = System.currentTimeMillis();
try {
lastIsAnyLeaderElectedDDBReadTimeMillis = System.currentTimeMillis();
final Optional<LockItem> lockItem = dynamoDBLockClient.getLockFromDynamoDB(
GetLockOptions.builder(LEADER_HASH_KEY).build());
if (!lockItem.isPresent()) {
// There is no LockItem in the ddb table, that means no one is holding lock.
lastIsAnyLeaderElectedResult = false;
log.info("LockItem present : {}", false);
} else {
final LockItem ddbLockItem = lockItem.get();
if (isNull(lastIsAnyLeaderCheckLockItemToFirstEncounterTime)
|| !ddbLockItem
.getRecordVersionNumber()
.equals(lastIsAnyLeaderCheckLockItemToFirstEncounterTime
.getKey()
.getRecordVersionNumber())) {
// This is the first isAnyLeaderElected call, so we can't evaluate if the LockItem has expired
// or not yet so consider LOCK as ACTIVE.
// OR LockItem in ddb and in-memory LockItem have different RecordVersionNumber
// and thus the LOCK is still ACTIVE
lastIsAnyLeaderElectedResult = true;
lastIsAnyLeaderCheckLockItemToFirstEncounterTime =
new AbstractMap.SimpleEntry<>(ddbLockItem, lastIsAnyLeaderElectedDDBReadTimeMillis);
log.info(
"LockItem present : {}, and this is either first call OR lockItem has had "
+ "a heartbeat",
true);
} else {
// There is no change in the ddb lock item, so if the last update time is more than
// lease duration, the lock is expired else it is still ACTIVE,
lastIsAnyLeaderElectedResult = lastIsAnyLeaderCheckLockItemToFirstEncounterTime.getValue()
+ ddbLockItem.getLeaseDuration()
> lastIsAnyLeaderElectedDDBReadTimeMillis;
log.info("LockItem present : {}, and lease expiry: {}", true, lastIsAnyLeaderElectedResult);
}
}
} catch (final ResourceNotFoundException exception) {
log.info("Lock table does not exists...");
// If the table itself doesn't exist, there is no elected leader.
lastIsAnyLeaderElectedResult = false;
} finally {
metricsScope.addData(
"Latency",
System.currentTimeMillis() - startTime,
StandardUnit.MILLISECONDS,
MetricsLevel.DETAILED);
MetricsUtil.endScope(metricsScope);
}
}
return lastIsAnyLeaderElectedResult;
}
}

View file

@ -0,0 +1,79 @@
package software.amazon.kinesis.leader;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.LeaderDecider;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static java.util.Objects.nonNull;
/**
* MigrationAdaptiveLeaderDecider that wraps around the actual LeaderDecider which can dynamically
* change based on the MigrationStateMachine.
*/
@Slf4j
@KinesisClientInternalApi
@ThreadSafe
public class MigrationAdaptiveLeaderDecider implements LeaderDecider {
private final MetricsFactory metricsFactory;
private LeaderDecider currentLeaderDecider;
public MigrationAdaptiveLeaderDecider(final MetricsFactory metricsFactory) {
this.metricsFactory = metricsFactory;
}
@Override
public synchronized Boolean isLeader(final String workerId) {
if (currentLeaderDecider == null) {
throw new IllegalStateException("LeaderDecider uninitialized");
}
final MetricsScope scope =
MetricsUtil.createMetricsWithOperation(metricsFactory, METRIC_OPERATION_LEADER_DECIDER);
try {
publishSelectedLeaderDeciderMetrics(scope, currentLeaderDecider);
return currentLeaderDecider.isLeader(workerId);
} finally {
MetricsUtil.endScope(scope);
}
}
private static void publishSelectedLeaderDeciderMetrics(
final MetricsScope scope, final LeaderDecider leaderDecider) {
scope.addData(
String.format(leaderDecider.getClass().getSimpleName()), 1D, StandardUnit.COUNT, MetricsLevel.DETAILED);
}
public synchronized void updateLeaderDecider(final LeaderDecider leaderDecider) {
if (currentLeaderDecider != null) {
currentLeaderDecider.shutdown();
log.info(
"Updating leader decider dynamically from {} to {}",
this.currentLeaderDecider.getClass().getSimpleName(),
leaderDecider.getClass().getSimpleName());
} else {
log.info(
"Initializing dynamic leader decider with {}",
leaderDecider.getClass().getSimpleName());
}
currentLeaderDecider = leaderDecider;
currentLeaderDecider.initialize();
}
@Override
public void shutdown() {
if (nonNull(currentLeaderDecider)) {
log.info("Shutting down current {}", currentLeaderDecider.getClass().getSimpleName());
currentLeaderDecider.shutdown();
currentLeaderDecider = null;
} else {
log.info("LeaderDecider has already been shutdown");
}
}
}

View file

@ -81,8 +81,20 @@ public class DynamoUtils {
}
}
public static AttributeValue createAttributeValue(Double doubleValue) {
if (doubleValue == null) {
throw new IllegalArgumentException("Double attributeValues cannot be null.");
}
return AttributeValue.builder().n(doubleValue.toString()).build();
}
public static String safeGetString(Map<String, AttributeValue> dynamoRecord, String key) {
AttributeValue av = dynamoRecord.get(key);
return safeGetString(av);
}
public static String safeGetString(AttributeValue av) {
if (av == null) {
return null;
} else {
@ -99,4 +111,13 @@ public class DynamoUtils {
return av.ss();
}
}
public static Double safeGetDouble(Map<String, AttributeValue> dynamoRecord, String key) {
AttributeValue av = dynamoRecord.get(key);
if (av == null) {
return null;
} else {
return new Double(av.n());
}
}
}

View file

@ -103,26 +103,6 @@ public class KinesisShardDetector implements ShardDetector {
private static final Boolean THROW_RESOURCE_NOT_FOUND_EXCEPTION = true;
@Deprecated
public KinesisShardDetector(
KinesisAsyncClient kinesisClient,
String streamName,
long listShardsBackoffTimeInMillis,
int maxListShardsRetryAttempts,
long listShardsCacheAllowedAgeInSeconds,
int maxCacheMissesBeforeReload,
int cacheMissWarningModulus) {
this(
kinesisClient,
StreamIdentifier.singleStreamInstance(streamName),
listShardsBackoffTimeInMillis,
maxListShardsRetryAttempts,
listShardsCacheAllowedAgeInSeconds,
maxCacheMissesBeforeReload,
cacheMissWarningModulus,
LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT);
}
public KinesisShardDetector(
KinesisAsyncClient kinesisClient,
StreamIdentifier streamIdentifier,

View file

@ -46,7 +46,11 @@ import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
"lastCounterIncrementNanos",
"childShardIds",
"pendingCheckpointState",
"isMarkedForLeaseSteal"
"isMarkedForLeaseSteal",
"throughputKBps",
"checkpointOwner",
"checkpointOwnerTimeoutTimestampMillis",
"isExpiredOrUnassigned"
})
@ToString
public class Lease {
@ -104,6 +108,33 @@ public class Lease {
@Setter
private boolean isMarkedForLeaseSteal;
/**
* If true, this indicates that lease is ready to be immediately reassigned.
*/
@Setter
private boolean isExpiredOrUnassigned;
/**
* Throughput in Kbps for the lease.
*/
private Double throughputKBps;
/**
* Owner of the checkpoint. The attribute is used for graceful shutdowns to indicate the owner that
* is allowed to write the checkpoint.
*/
@Setter
private String checkpointOwner;
/**
* This field is used for tracking when the shutdown was requested on the lease so we can expire it. This is
* deliberately not persisted in DynamoDB because leaseOwner are expected to transfer lease from itself to the
* next owner during shutdown. If the worker dies before shutdown the lease will just become expired then we can
* pick it up. If for some reason worker is not able to shut down and continues holding onto the lease
* this timeout will kick in and force a lease transfer.
*/
@Setter
private Long checkpointOwnerTimeoutTimestampMillis;
/**
* Count of distinct lease holders between checkpoints.
*/
@ -242,6 +273,54 @@ public class Lease {
}
}
/**
* @return true if checkpoint owner is set. Indicating a requested shutdown.
*/
public boolean shutdownRequested() {
return checkpointOwner != null;
}
/**
* Check whether lease should be blocked on pending checkpoint. We DON'T block if
* - lease is expired (Expired lease should be assigned right away) OR
* ----- at this point we know lease is assigned -----
* - lease is shardEnd (No more processing possible) OR
* - lease is NOT requested for shutdown OR
* - lease shutdown expired
*
* @param currentTimeMillis current time in milliseconds
* @return true if lease is blocked on pending checkpoint
*/
public boolean blockedOnPendingCheckpoint(long currentTimeMillis) {
// using ORs and negate
return !(isExpiredOrUnassigned
|| ExtendedSequenceNumber.SHARD_END.equals(checkpoint)
|| !shutdownRequested()
// if shutdown requested then checkpointOwnerTimeoutTimestampMillis should present
|| currentTimeMillis - checkpointOwnerTimeoutTimestampMillis >= 0);
}
/**
* Check whether lease is eligible for graceful shutdown. It's eligible if
* - lease is still assigned (not expired) AND
* - lease is NOT shardEnd (No more processing possible AND
* - lease is NOT requested for shutdown
*
* @return true if lease is eligible for graceful shutdown
*/
public boolean isEligibleForGracefulShutdown() {
return !isExpiredOrUnassigned && !ExtendedSequenceNumber.SHARD_END.equals(checkpoint) && !shutdownRequested();
}
/**
* Need to handle the case during graceful shutdown where leaseOwner isn't the current owner
*
* @return the actual owner
*/
public String actualOwner() {
return checkpointOwner == null ? leaseOwner : checkpointOwner;
}
/**
* @return true if lease is not currently owned
*/
@ -343,6 +422,15 @@ public class Lease {
this.childShardIds.addAll(childShardIds);
}
/**
* Sets throughputKbps.
*
* @param throughputKBps may not be null
*/
public void throughputKBps(double throughputKBps) {
this.throughputKBps = throughputKBps;
}
/**
* Set the hash range key for this shard.
* @param hashKeyRangeForLease
@ -370,6 +458,8 @@ public class Lease {
* @return A deep copy of this object.
*/
public Lease copy() {
return new Lease(this);
final Lease lease = new Lease(this);
lease.checkpointOwner(this.checkpointOwner);
return lease;
}
}

View file

@ -20,6 +20,7 @@ import java.util.Collections;
import java.util.List;
import java.util.UUID;
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
@ -38,11 +39,14 @@ public interface LeaseCoordinator {
/**
* Start background LeaseHolder and LeaseTaker threads.
* @param leaseAssignmentModeProvider provider of Lease Assignment mode to determine whether to start components
* for both V2 and V3 functionality or only V3 functionality
* @throws ProvisionedThroughputException If we can't talk to DynamoDB due to insufficient capacity.
* @throws InvalidStateException If the lease table doesn't exist
* @throws DependencyException If we encountered exception taking to DynamoDB
*/
void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
/**
* Runs a single iteration of the lease taker - used by integration tests.
@ -152,4 +156,9 @@ public interface LeaseCoordinator {
* @return LeaseCoordinator
*/
DynamoDBLeaseCoordinator initialLeaseTableReadCapacity(long readCapacity);
/**
* @return instance of {@link LeaseStatsRecorder}
*/
LeaseStatsRecorder leaseStatsRecorder();
}

View file

@ -0,0 +1,20 @@
package software.amazon.kinesis.leases;
import java.util.List;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
public interface LeaseDiscoverer {
/**
* Identifies the leases that are assigned to the current worker but are not being tracked and processed by the
* current worker.
*
* @return list of leases assigned to worker which doesn't exist in {@param currentHeldLeaseKeys}
* @throws DependencyException if DynamoDB scan fails in an unexpected way
* @throws InvalidStateException if lease table does not exist
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
*/
List<Lease> discoverNewLeases() throws ProvisionedThroughputException, InvalidStateException, DependencyException;
}

View file

@ -16,7 +16,9 @@
package software.amazon.kinesis.leases;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
@ -25,7 +27,9 @@ import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.Builder;
import lombok.Data;
import lombok.Getter;
import lombok.NonNull;
import lombok.experimental.Accessors;
import org.apache.commons.lang3.Validate;
@ -34,6 +38,7 @@ import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.awssdk.services.dynamodb.model.Tag;
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
import software.amazon.kinesis.common.DdbTableConfig;
import software.amazon.kinesis.common.InitialPositionInStream;
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.common.LeaseCleanupConfig;
@ -42,6 +47,7 @@ import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseManagementFactory;
import software.amazon.kinesis.leases.dynamodb.TableCreatorCallback;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.NullMetricsFactory;
import software.amazon.kinesis.worker.metric.WorkerMetric;
/**
* Used by the KCL to configure lease management.
@ -209,6 +215,9 @@ public class LeaseManagementConfig {
private BillingMode billingMode = BillingMode.PAY_PER_REQUEST;
private WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig =
new WorkerUtilizationAwareAssignmentConfig();
/**
* Whether to enable deletion protection on the DynamoDB lease table created by KCL. This does not update
* already existing tables.
@ -276,14 +285,17 @@ public class LeaseManagementConfig {
}
public LeaseManagementConfig(
String tableName,
DynamoDbAsyncClient dynamoDBClient,
KinesisAsyncClient kinesisClient,
String workerIdentifier) {
final String tableName,
final String applicationName,
final DynamoDbAsyncClient dynamoDBClient,
final KinesisAsyncClient kinesisClient,
final String workerIdentifier) {
this.tableName = tableName;
this.dynamoDBClient = dynamoDBClient;
this.kinesisClient = kinesisClient;
this.workerIdentifier = workerIdentifier;
this.workerUtilizationAwareAssignmentConfig.workerMetricsTableConfig =
new WorkerMetricsTableConfig(applicationName);
}
/**
@ -361,6 +373,53 @@ public class LeaseManagementConfig {
return hierarchicalShardSyncer;
}
/**
* Configuration class for controlling the graceful handoff of leases.
* This configuration allows tuning of the shutdown behavior during lease transfers.
* <p>
* It provides settings to control the timeout period for waiting on the record processor
* to shut down and an option to enable or disable graceful lease handoff.
* </p>
*/
@Builder
@Getter
@Accessors(fluent = true)
public static class GracefulLeaseHandoffConfig {
/**
* The minimum amount of time (in milliseconds) to wait for the current shard's RecordProcessor
* to gracefully shut down before forcefully transferring the lease to the next owner.
* <p>
* If each call to {@code processRecords} is expected to run longer than the default value,
* it makes sense to set this to a higher value to ensure the RecordProcessor has enough
* time to complete its processing.
* </p>
* <p>
* Default value is 30,000 milliseconds (30 seconds).
* </p>
*/
@Builder.Default
private long gracefulLeaseHandoffTimeoutMillis = 30_000L;
/**
* Flag to enable or disable the graceful lease handoff mechanism.
* <p>
* When set to {@code true}, the KCL will attempt to gracefully transfer leases by
* allowing the shard's RecordProcessor sufficient time to complete processing before
* handing off the lease to another worker. When {@code false}, the lease will be
* handed off without waiting for the RecordProcessor to shut down gracefully. Note
* that checkpointing is expected to be implemented inside {@code shutdownRequested}
* for this feature to work end to end.
* </p>
* <p>
* Default value is {@code true}.
* </p>
*/
@Builder.Default
private boolean isGracefulLeaseHandoffEnabled = true;
}
private GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig =
GracefulLeaseHandoffConfig.builder().build();
@Deprecated
public LeaseManagementFactory leaseManagementFactory() {
if (leaseManagementFactory == null) {
@ -440,7 +499,9 @@ public class LeaseManagementConfig {
leaseSerializer,
customShardDetectorProvider(),
isMultiStreamingMode,
leaseCleanupConfig());
leaseCleanupConfig(),
workerUtilizationAwareAssignmentConfig(),
gracefulLeaseHandoffConfig);
}
return leaseManagementFactory;
}
@ -454,4 +515,89 @@ public class LeaseManagementConfig {
this.leaseManagementFactory = leaseManagementFactory;
return this;
}
@Data
@Accessors(fluent = true)
public static class WorkerUtilizationAwareAssignmentConfig {
/**
* This defines the frequency of capturing worker metric stats in memory. Default is 1s
*/
private long inMemoryWorkerMetricsCaptureFrequencyMillis =
Duration.ofSeconds(1L).toMillis();
/**
* This defines the frequency of reporting worker metric stats to storage. Default is 30s
*/
private long workerMetricsReporterFreqInMillis = Duration.ofSeconds(30).toMillis();
/**
* These are the no. of metrics that are persisted in storage in WorkerMetricStats ddb table.
*/
private int noOfPersistedMetricsPerWorkerMetrics = 10;
/**
* Option to disable workerMetrics to use in lease balancing.
*/
private boolean disableWorkerMetrics = false;
/**
* List of workerMetrics for the application.
*/
private List<WorkerMetric> workerMetricList = new ArrayList<>();
/**
* Max throughput per host KBps, default is unlimited.
*/
private double maxThroughputPerHostKBps = Double.MAX_VALUE;
/**
* Percentage of value to achieve critical dampening during this case
*/
private int dampeningPercentage = 60;
/**
* Percentage value used to trigger reBalance. If fleet has workers which are have metrics value more or less
* than 20% of fleet level average then reBalance is triggered.
* Leases are taken from workers with metrics value more than fleet level average. The load to take from these
* workers is determined by evaluating how far they are with respect to fleet level average.
*/
private int reBalanceThresholdPercentage = 10;
/**
* The allowThroughputOvershoot flag determines whether leases should still be taken even if
* it causes the total assigned throughput to exceed the desired throughput to take for re-balance.
* Enabling this flag provides more flexibility for the LeaseAssignmentManager to explore additional
* assignment possibilities, which can lead to faster throughput convergence.
*/
private boolean allowThroughputOvershoot = true;
/**
* Duration after which workerMetrics entry from WorkerMetricStats table will be cleaned up. When an entry's
* lastUpdateTime is older than staleWorkerMetricsEntryCleanupDuration from current time, entry will be removed
* from the table.
*/
private Duration staleWorkerMetricsEntryCleanupDuration = Duration.ofDays(1);
/**
* configuration to configure how to create the WorkerMetricStats table, such as table name,
* billing mode, provisioned capacity. If no table name is specified, the table name will
* default to applicationName-WorkerMetricStats. If no billing more is chosen, default is
* On-Demand.
*/
private WorkerMetricsTableConfig workerMetricsTableConfig;
/**
* Frequency to perform worker variance balancing frequency. This value is used with respect to the LAM freq,
* that is every third (as default) iteration of LAM the worker variance balancing will be performed.
* Setting it to 1 will make varianceBalancing run on every iteration of LAM and 2 on every 2nd iteration
* and so on.
*/
private int varianceBalancingFrequency = 3;
/**
* Alpha value used for calculating exponential moving average of worker's metrics values. Selecting
* higher alpha value gives more weightage to recent value and thus low smoothing effect on computed average
* and selecting smaller alpha values gives more weightage to past value and high smoothing effect.
*/
private double workerMetricsEMAAlpha = 0.5;
}
public static class WorkerMetricsTableConfig extends DdbTableConfig {
public WorkerMetricsTableConfig(final String applicationName) {
super(applicationName, "WorkerMetricStats");
}
}
}

View file

@ -15,9 +15,12 @@
package software.amazon.kinesis.leases;
import java.util.concurrent.ConcurrentMap;
import software.amazon.kinesis.common.StreamConfig;
import software.amazon.kinesis.coordinator.DeletedStreamListProvider;
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseRefresher;
import software.amazon.kinesis.lifecycle.ShardConsumer;
import software.amazon.kinesis.metrics.MetricsFactory;
/**
@ -26,6 +29,11 @@ import software.amazon.kinesis.metrics.MetricsFactory;
public interface LeaseManagementFactory {
LeaseCoordinator createLeaseCoordinator(MetricsFactory metricsFactory);
default LeaseCoordinator createLeaseCoordinator(
MetricsFactory metricsFactory, ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
throw new UnsupportedOperationException();
}
ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory);
default ShardSyncTaskManager createShardSyncTaskManager(MetricsFactory metricsFactory, StreamConfig streamConfig) {

View file

@ -15,6 +15,9 @@
package software.amazon.kinesis.leases;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.leases.exceptions.DependencyException;
@ -75,6 +78,37 @@ public interface LeaseRefresher {
*/
boolean waitUntilLeaseTableExists(long secondsBetweenPolls, long timeoutSeconds) throws DependencyException;
/**
* Creates the LeaseOwnerToLeaseKey index on the lease table if it doesn't exist and returns the status of index.
*
* @return indexStatus status of the index.
* @throws DependencyException if storage's describe API fails in an unexpected way
*/
default String createLeaseOwnerToLeaseKeyIndexIfNotExists() throws DependencyException {
return null;
}
/**
* Blocks until the index exists by polling storage till either the index is ACTIVE or else timeout has
* happened.
*
* @param secondsBetweenPolls time to wait between polls in seconds
* @param timeoutSeconds total time to wait in seconds
*
* @return true if index on the table exists and is ACTIVE, false if timeout was reached
*/
default boolean waitUntilLeaseOwnerToLeaseKeyIndexExists(
final long secondsBetweenPolls, final long timeoutSeconds) {
return false;
}
/**
* Check if leaseOwner GSI is ACTIVE
* @return true if index is active, false otherwise
* @throws DependencyException if storage's describe API fails in an unexpected way
*/
boolean isLeaseOwnerToLeaseKeyIndexActive() throws DependencyException;
/**
* List all leases for a given stream synchronously.
*
@ -87,6 +121,24 @@ public interface LeaseRefresher {
List<Lease> listLeasesForStream(StreamIdentifier streamIdentifier)
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
/**
* List all leases for a given workerIdentifier synchronously.
* Default implementation calls listLeases() and filters the results.
*
* @throws DependencyException if DynamoDB scan fails in an unexpected way
* @throws InvalidStateException if lease table does not exist
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
*
* @return list of leases
*/
default List<String> listLeaseKeysForWorker(final String workerIdentifier)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
return listLeases().stream()
.filter(lease -> lease.leaseOwner().equals(workerIdentifier))
.map(Lease::leaseKey)
.collect(Collectors.toList());
}
/**
* List all objects in table synchronously.
*
@ -98,6 +150,23 @@ public interface LeaseRefresher {
*/
List<Lease> listLeases() throws DependencyException, InvalidStateException, ProvisionedThroughputException;
/**
* List all leases from the storage parallely and deserialize into Lease objects. Returns the list of leaseKey
* that failed deserialize separately.
*
* @param threadPool threadpool to use for parallel scan
* @param parallelismFactor no. of parallel scans
* @return Pair of List of leases from the storage and List of items failed to deserialize
* @throws DependencyException if DynamoDB scan fails in an unexpected way
* @throws InvalidStateException if lease table does not exist
* @throws ProvisionedThroughputException if DynamoDB scan fails due to lack of capacity
*/
default Map.Entry<List<Lease>, List<String>> listLeasesParallely(
final ExecutorService threadPool, final int parallelismFactor)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
throw new UnsupportedOperationException("listLeasesParallely is not implemented");
}
/**
* Create a new lease. Conditional on a lease not already existing with this shardId.
*
@ -154,6 +223,47 @@ public interface LeaseRefresher {
boolean takeLease(Lease lease, String owner)
throws DependencyException, InvalidStateException, ProvisionedThroughputException;
/**
* Assigns given lease to newOwner owner by incrementing its leaseCounter and setting its owner field. Conditional
* on the leaseOwner in DynamoDB matching the leaseOwner of the input lease. Mutates the leaseCounter and owner of
* the passed-in lease object after updating DynamoDB.
*
* @param lease the lease to be assigned
* @param newOwner the new owner
*
* @return true if lease was successfully assigned, false otherwise
*
* @throws InvalidStateException if lease table does not exist
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
* @throws DependencyException if DynamoDB update fails in an unexpected way
*/
default boolean assignLease(final Lease lease, final String newOwner)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
throw new UnsupportedOperationException("assignLease is not implemented");
}
/**
* Initiates a graceful handoff of the given lease to the specified new owner, allowing the current owner
* to complete its processing before transferring ownership.
* <p>
* This method updates the lease with the new owner information but ensures that the current owner
* is given time to gracefully finish its work (e.g., processing records) before the lease is reassigned.
* </p>
*
* @param lease the lease to be assigned
* @param newOwner the new owner
* @return true if a graceful handoff was successfully initiated
* @throws InvalidStateException if lease table does not exist
* @throws ProvisionedThroughputException if DynamoDB update fails due to lack of capacity
* @throws DependencyException if DynamoDB update fails in an unexpected way
*/
default boolean initiateGracefulLeaseHandoff(final Lease lease, final String newOwner)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
throw new UnsupportedOperationException("assignLeaseWithWait is not implemented");
}
/**
* Evict the current owner of lease by setting owner to null. Conditional on the owner in DynamoDB matching the owner of
* the input. Mutates the lease counter and owner of the passed-in lease object after updating the record in DynamoDB.

View file

@ -15,6 +15,7 @@
package software.amazon.kinesis.leases;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition;
@ -100,6 +101,15 @@ public interface LeaseSerializer {
*/
Map<String, AttributeValueUpdate> getDynamoTakeLeaseUpdate(Lease lease, String newOwner);
/**
* @param lease lease that needs to be assigned
* @param newOwner newLeaseOwner
* @return the attribute value map that takes a lease for a new owner
*/
default Map<String, AttributeValueUpdate> getDynamoAssignLeaseUpdate(Lease lease, String newOwner) {
throw new UnsupportedOperationException("getDynamoAssignLeaseUpdate is not implemented");
}
/**
* @param lease
* @return the attribute value map that voids a lease
@ -127,8 +137,22 @@ public interface LeaseSerializer {
*/
Collection<KeySchemaElement> getKeySchema();
default Collection<KeySchemaElement> getWorkerIdToLeaseKeyIndexKeySchema() {
return Collections.EMPTY_LIST;
}
default Collection<AttributeDefinition> getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
return Collections.EMPTY_LIST;
}
/**
* @return attribute definitions for creating a DynamoDB table to store leases
*/
Collection<AttributeDefinition> getAttributeDefinitions();
/**
* @param lease
* @return the attribute value map that includes lease throughput
*/
Map<String, AttributeValueUpdate> getDynamoLeaseThroughputKbpsUpdate(Lease lease);
}

View file

@ -0,0 +1,158 @@
package software.amazon.kinesis.leases;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import software.amazon.awssdk.annotations.ThreadSafe;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.utils.ExponentialMovingAverage;
import static java.util.Objects.isNull;
/**
* This class records the stats for the leases.
* The stats are recorded in a thread safe queue, and the throughput is calculated by summing up the bytes and dividing
* by interval in seconds.
* This class is thread safe and backed by thread safe data structures.
*/
@RequiredArgsConstructor
@KinesisClientInternalApi
@ThreadSafe
public class LeaseStatsRecorder {
/**
* This default alpha is chosen based on the testing so far between simple average and moving average with 0.5.
* In the future, if one value does not fit all use cases, inject this via config.
*/
private static final double DEFAULT_ALPHA = 0.5;
public static final int BYTES_PER_KB = 1024;
private final Long renewerFrequencyInMillis;
private final Map<String, Queue<LeaseStats>> leaseStatsMap = new ConcurrentHashMap<>();
private final Map<String, ExponentialMovingAverage> leaseKeyToExponentialMovingAverageMap =
new ConcurrentHashMap<>();
private final Callable<Long> timeProviderInMillis;
/**
* This method provides happens-before semantics (i.e., the action (access or removal) from a thread happens
* before the action from subsequent thread) for the stats recording in multithreaded environment.
*/
public void recordStats(@NonNull final LeaseStats leaseStats) {
final Queue<LeaseStats> leaseStatsQueue =
leaseStatsMap.computeIfAbsent(leaseStats.getLeaseKey(), lease -> new ConcurrentLinkedQueue<>());
leaseStatsQueue.add(leaseStats);
}
/**
* Calculates the throughput in KBps for the given leaseKey.
* Method first clears the items that are older than {@link #renewerFrequencyInMillis} from the queue and then
* calculates the throughput per second during {@link #renewerFrequencyInMillis} interval and then returns the
* ExponentialMovingAverage of the throughput. If method is called in quick succession with or without new stats
* the result can be different as ExponentialMovingAverage decays old values on every new call.
* This method is thread safe.
* @param leaseKey leaseKey for which stats are required
* @return throughput in Kbps, returns null if there is no stats available for the leaseKey.
*/
public Double getThroughputKBps(final String leaseKey) {
final Queue<LeaseStats> leaseStatsQueue = leaseStatsMap.get(leaseKey);
if (isNull(leaseStatsQueue)) {
// This means there is no entry for this leaseKey yet
return null;
}
filterExpiredEntries(leaseStatsQueue);
// Convert bytes into KB and divide by interval in second to get throughput per second.
final ExponentialMovingAverage exponentialMovingAverage = leaseKeyToExponentialMovingAverageMap.computeIfAbsent(
leaseKey, leaseId -> new ExponentialMovingAverage(DEFAULT_ALPHA));
// Specifically dividing by 1000.0 rather than using Duration class to get seconds, because Duration class
// implementation rounds off to seconds and precision is lost.
final double frequency = renewerFrequencyInMillis / 1000.0;
final double throughput = readQueue(leaseStatsQueue).stream()
.mapToDouble(LeaseStats::getBytes)
.sum()
/ BYTES_PER_KB
/ frequency;
exponentialMovingAverage.add(throughput);
return exponentialMovingAverage.getValue();
}
/**
* Gets the currentTimeMillis and then iterates over the queue to get the stats with creation time less than
* currentTimeMillis.
* This is specifically done to avoid potential race between with high-frequency put thread blocking get thread.
*/
private Queue<LeaseStats> readQueue(final Queue<LeaseStats> leaseStatsQueue) {
final long currentTimeMillis = getCurrenTimeInMillis();
final Queue<LeaseStats> response = new LinkedList<>();
for (LeaseStats leaseStats : leaseStatsQueue) {
if (leaseStats.creationTimeMillis > currentTimeMillis) {
break;
}
response.add(leaseStats);
}
return response;
}
private long getCurrenTimeInMillis() {
try {
return timeProviderInMillis.call();
} catch (final Exception e) {
// Fallback to using the System.currentTimeMillis if failed.
return System.currentTimeMillis();
}
}
private void filterExpiredEntries(final Queue<LeaseStats> leaseStatsQueue) {
final long currentTime = getCurrenTimeInMillis();
while (!leaseStatsQueue.isEmpty()) {
final LeaseStats leaseStats = leaseStatsQueue.peek();
if (isNull(leaseStats) || currentTime - leaseStats.getCreationTimeMillis() < renewerFrequencyInMillis) {
break;
}
leaseStatsQueue.poll();
}
}
/**
* Clear the in-memory stats for the lease when a lease is reassigned (due to shut down or lease stealing)
* @param leaseKey leaseKey, for which stats are supposed to be clear.
*/
public void dropLeaseStats(final String leaseKey) {
leaseStatsMap.remove(leaseKey);
leaseKeyToExponentialMovingAverageMap.remove(leaseKey);
}
@Builder
@Getter
@ToString
@KinesisClientInternalApi
public static final class LeaseStats {
/**
* Lease key for which this leaseStats object is created.
*/
private final String leaseKey;
/**
* Bytes that are processed for a lease
*/
private final long bytes;
/**
* Wall time in epoch millis at which this leaseStats object was created. This time is used to determine the
* expiry of the lease stats.
*/
@Builder.Default
private final long creationTimeMillis = System.currentTimeMillis();
}
}

View file

@ -19,6 +19,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedTransferQueue;
@ -30,13 +31,17 @@ import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.coordinator.MigrationAdaptiveLeaseAssignmentModeProvider;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.LeaseDiscoverer;
import software.amazon.kinesis.leases.LeaseManagementConfig;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.LeaseRenewer;
import software.amazon.kinesis.leases.LeaseStatsRecorder;
import software.amazon.kinesis.leases.LeaseTaker;
import software.amazon.kinesis.leases.MultiStreamLease;
import software.amazon.kinesis.leases.ShardInfo;
@ -44,6 +49,8 @@ import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.LeasingException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.lifecycle.LeaseGracefulShutdownHandler;
import software.amazon.kinesis.lifecycle.ShardConsumer;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
@ -70,115 +77,34 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
.setNameFormat("LeaseRenewer-%04d")
.setDaemon(true)
.build();
private static final ThreadFactory LEASE_DISCOVERY_THREAD_FACTORY = new ThreadFactoryBuilder()
.setNameFormat("LeaseDiscovery-%04d")
.setDaemon(true)
.build();
private final LeaseRenewer leaseRenewer;
private final LeaseTaker leaseTaker;
private final LeaseDiscoverer leaseDiscoverer;
private final long renewerIntervalMillis;
private final long takerIntervalMillis;
private final long leaseDiscovererIntervalMillis;
private final ExecutorService leaseRenewalThreadpool;
private final ExecutorService leaseDiscoveryThreadPool;
private final LeaseRefresher leaseRefresher;
private final LeaseStatsRecorder leaseStatsRecorder;
private final LeaseGracefulShutdownHandler leaseGracefulShutdownHandler;
private long initialLeaseTableReadCapacity;
private long initialLeaseTableWriteCapacity;
protected final MetricsFactory metricsFactory;
private final Object shutdownLock = new Object();
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
private ScheduledExecutorService leaseCoordinatorThreadPool;
private ScheduledFuture<?> leaseDiscoveryFuture;
private ScheduledFuture<?> takerFuture;
private volatile boolean running = false;
/**
* Constructor.
*
* <p>NOTE: This constructor is deprecated and will be removed in a future release.</p>
*
* @param leaseRefresher
* LeaseRefresher instance to use
* @param workerIdentifier
* Identifies the worker (e.g. useful to track lease ownership)
* @param leaseDurationMillis
* Duration of a lease
* @param epsilonMillis
* Allow for some variance when calculating lease expirations
* @param maxLeasesForWorker
* Max leases this Worker can handle at a time
* @param maxLeasesToStealAtOneTime
* Steal up to these many leases at a time (for load balancing)
* @param metricsFactory
* Used to publish metrics about lease operations
*/
@Deprecated
public DynamoDBLeaseCoordinator(
final LeaseRefresher leaseRefresher,
final String workerIdentifier,
final long leaseDurationMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewerThreadCount,
final MetricsFactory metricsFactory) {
this(
leaseRefresher,
workerIdentifier,
leaseDurationMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewerThreadCount,
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
metricsFactory);
}
/**
* Constructor.
*
* @param leaseRefresher
* LeaseRefresher instance to use
* @param workerIdentifier
* Identifies the worker (e.g. useful to track lease ownership)
* @param leaseDurationMillis
* Duration of a lease
* @param epsilonMillis
* Allow for some variance when calculating lease expirations
* @param maxLeasesForWorker
* Max leases this Worker can handle at a time
* @param maxLeasesToStealAtOneTime
* Steal up to these many leases at a time (for load balancing)
* @param initialLeaseTableReadCapacity
* Initial dynamodb lease table read iops if creating the lease table
* @param initialLeaseTableWriteCapacity
* Initial dynamodb lease table write iops if creating the lease table
* @param metricsFactory
* Used to publish metrics about lease operations
*/
@Deprecated
public DynamoDBLeaseCoordinator(
final LeaseRefresher leaseRefresher,
final String workerIdentifier,
final long leaseDurationMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewerThreadCount,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final MetricsFactory metricsFactory) {
this(
leaseRefresher,
workerIdentifier,
leaseDurationMillis,
LeaseManagementConfig.DEFAULT_ENABLE_PRIORITY_LEASE_ASSIGNMENT,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewerThreadCount,
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY,
metricsFactory);
}
/**
* Constructor.
*
@ -214,17 +140,35 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
final int maxLeaseRenewerThreadCount,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final MetricsFactory metricsFactory) {
final MetricsFactory metricsFactory,
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig,
final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
this.leaseRefresher = leaseRefresher;
this.leaseRenewalThreadpool = getLeaseRenewalExecutorService(maxLeaseRenewerThreadCount);
this.leaseRenewalThreadpool = createExecutorService(maxLeaseRenewerThreadCount, LEASE_RENEWAL_THREAD_FACTORY);
this.leaseTaker = new DynamoDBLeaseTaker(leaseRefresher, workerIdentifier, leaseDurationMillis, metricsFactory)
.withMaxLeasesForWorker(maxLeasesForWorker)
.withMaxLeasesToStealAtOneTime(maxLeasesToStealAtOneTime)
.withEnablePriorityLeaseAssignment(enablePriorityLeaseAssignment);
this.leaseRenewer = new DynamoDBLeaseRenewer(
leaseRefresher, workerIdentifier, leaseDurationMillis, leaseRenewalThreadpool, metricsFactory);
this.renewerIntervalMillis = getRenewerTakerIntervalMillis(leaseDurationMillis, epsilonMillis);
this.takerIntervalMillis = (leaseDurationMillis + epsilonMillis) * 2;
// Should run once every leaseDurationMillis to identify new leases before expiry.
this.leaseDiscovererIntervalMillis = leaseDurationMillis - epsilonMillis;
this.leaseStatsRecorder = new LeaseStatsRecorder(renewerIntervalMillis, System::currentTimeMillis);
this.leaseGracefulShutdownHandler = LeaseGracefulShutdownHandler.create(
gracefulLeaseHandoffConfig.gracefulLeaseHandoffTimeoutMillis(), shardInfoShardConsumerMap, this);
this.leaseRenewer = new DynamoDBLeaseRenewer(
leaseRefresher,
workerIdentifier,
leaseDurationMillis,
leaseRenewalThreadpool,
metricsFactory,
leaseStatsRecorder,
leaseGracefulShutdownHandler::enqueueShutdown);
this.leaseDiscoveryThreadPool =
createExecutorService(maxLeaseRenewerThreadCount, LEASE_DISCOVERY_THREAD_FACTORY);
this.leaseDiscoverer = new DynamoDBLeaseDiscoverer(
this.leaseRefresher, this.leaseRenewer, metricsFactory, workerIdentifier, leaseDiscoveryThreadPool);
if (initialLeaseTableReadCapacity <= 0) {
throw new IllegalArgumentException("readCapacity should be >= 1");
}
@ -234,6 +178,7 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
}
this.initialLeaseTableWriteCapacity = initialLeaseTableWriteCapacity;
this.metricsFactory = metricsFactory;
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
log.info(
"With failover time {} ms and epsilon {} ms, LeaseCoordinator will renew leases every {} ms, take"
@ -246,11 +191,49 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
maxLeasesToStealAtOneTime);
}
private class TakerRunnable implements Runnable {
@RequiredArgsConstructor
private class LeaseDiscoveryRunnable implements Runnable {
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
@Override
public void run() {
try {
// LeaseDiscoverer is run in WORKER_UTILIZATION_AWARE_ASSIGNMENT mode only
synchronized (shutdownLock) {
if (!leaseAssignmentModeProvider
.getLeaseAssignmentMode()
.equals(
MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
.WORKER_UTILIZATION_AWARE_ASSIGNMENT)) {
return;
}
if (running) {
leaseRenewer.addLeasesToRenew(leaseDiscoverer.discoverNewLeases());
}
}
} catch (Exception e) {
log.error("Failed to execute lease discovery", e);
}
}
}
@RequiredArgsConstructor
private class TakerRunnable implements Runnable {
private final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider;
@Override
public void run() {
try {
// LeaseTaker is run in DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT mode only
synchronized (shutdownLock) {
if (!leaseAssignmentModeProvider
.getLeaseAssignmentMode()
.equals(
MigrationAdaptiveLeaseAssignmentModeProvider.LeaseAssignmentMode
.DEFAULT_LEASE_COUNT_BASED_ASSIGNMENT)) {
return;
}
}
runLeaseTaker();
} catch (LeasingException e) {
log.error("LeasingException encountered in lease taking thread", e);
@ -290,18 +273,35 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
}
@Override
public void start() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
public void start(final MigrationAdaptiveLeaseAssignmentModeProvider leaseAssignmentModeProvider)
throws DependencyException, InvalidStateException, ProvisionedThroughputException {
leaseRenewer.initialize();
// At max, we need 3 threads - lease renewer, lease taker, lease discoverer - to run without contention.
leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(3, LEASE_COORDINATOR_THREAD_FACTORY);
// 2 because we know we'll have at most 2 concurrent tasks at a time.
leaseCoordinatorThreadPool = Executors.newScheduledThreadPool(2, LEASE_COORDINATOR_THREAD_FACTORY);
// During migration to KCLv3.x from KCLv2.x, lease assignment mode can change dynamically, so
// both lease assignment algorithms will be started but only one will execute based on
// leaseAssignmentModeProvider.getLeaseAssignmentMode(). However for new applications starting in
// KCLv3.x or applications successfully migrated to KCLv3.x, lease assignment mode will not
// change dynamically and will always be WORKER_UTILIZATION_AWARE_ASSIGNMENT, therefore
// don't initialize KCLv2.x lease assignment algorithm components that are not needed.
if (leaseAssignmentModeProvider.dynamicModeChangeSupportNeeded()) {
// Taker runs with fixed DELAY because we want it to run slower in the event of performance degradation.
takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
new TakerRunnable(leaseAssignmentModeProvider), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
}
// Taker runs with fixed DELAY because we want it to run slower in the event of performance degredation.
takerFuture = leaseCoordinatorThreadPool.scheduleWithFixedDelay(
new TakerRunnable(), 0L, takerIntervalMillis, TimeUnit.MILLISECONDS);
// Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degredation.
leaseDiscoveryFuture = leaseCoordinatorThreadPool.scheduleAtFixedRate(
new LeaseDiscoveryRunnable(leaseAssignmentModeProvider),
0L,
leaseDiscovererIntervalMillis,
TimeUnit.MILLISECONDS);
// Renewer runs at fixed INTERVAL because we want it to run at the same rate in the event of degradation.
leaseCoordinatorThreadPool.scheduleAtFixedRate(
new RenewerRunnable(), 0L, renewerIntervalMillis, TimeUnit.MILLISECONDS);
leaseGracefulShutdownHandler.start();
running = true;
}
@ -383,6 +383,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
}
leaseRenewalThreadpool.shutdownNow();
leaseCoordinatorThreadPool.shutdownNow();
leaseGracefulShutdownHandler.stop();
synchronized (shutdownLock) {
leaseRenewer.clearCurrentlyHeldLeases();
running = false;
@ -393,6 +395,10 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
public void stopLeaseTaker() {
if (takerFuture != null) {
takerFuture.cancel(false);
leaseDiscoveryFuture.cancel(false);
// the method is called in worker graceful shutdown. We want to stop any further lease shutdown
// so we don't interrupt worker shutdown.
leaseGracefulShutdownHandler.stop();
}
}
@ -418,20 +424,15 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
}
/**
* Returns executor service that should be used for lease renewal.
* Returns executor service for given ThreadFactory.
* @param maximumPoolSize Maximum allowed thread pool size
* @return Executor service that should be used for lease renewal.
* @return Executor service
*/
private static ExecutorService getLeaseRenewalExecutorService(int maximumPoolSize) {
private static ExecutorService createExecutorService(final int maximumPoolSize, final ThreadFactory threadFactory) {
int coreLeaseCount = Math.max(maximumPoolSize / 4, 2);
return new ThreadPoolExecutor(
coreLeaseCount,
maximumPoolSize,
60,
TimeUnit.SECONDS,
new LinkedTransferQueue<>(),
LEASE_RENEWAL_THREAD_FACTORY);
coreLeaseCount, maximumPoolSize, 60, TimeUnit.SECONDS, new LinkedTransferQueue<>(), threadFactory);
}
@Override
@ -472,6 +473,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
* {@inheritDoc}
*
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
*
* This is a method of the public lease coordinator interface.
*/
@Override
@Deprecated
@ -487,6 +490,8 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
* {@inheritDoc}
*
* <p>NOTE: This method is deprecated. Please set the initial capacity through the constructor.</p>
*
* This is a method of the public lease coordinator interface.
*/
@Override
@Deprecated
@ -497,4 +502,9 @@ public class DynamoDBLeaseCoordinator implements LeaseCoordinator {
initialLeaseTableWriteCapacity = writeCapacity;
return this;
}
@Override
public LeaseStatsRecorder leaseStatsRecorder() {
return leaseStatsRecorder;
}
}

View file

@ -0,0 +1,120 @@
package software.amazon.kinesis.leases.dynamodb;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseDiscoverer;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.LeaseRenewer;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static java.util.Objects.isNull;
/**
* An implementation of {@link LeaseDiscoverer}, it uses {@link LeaseRefresher} to query
* {@link DynamoDBLeaseRefresher#LEASE_OWNER_TO_LEASE_KEY_INDEX_NAME } and find the leases assigned
* to current worker and then filter and returns the leases that have not started processing (looks at
* {@link LeaseRenewer#getCurrentlyHeldLeases()} to find out which leases are currently held leases).
*/
@Slf4j
@RequiredArgsConstructor
public class DynamoDBLeaseDiscoverer implements LeaseDiscoverer {
private final LeaseRefresher leaseRefresher;
private final LeaseRenewer leaseRenewer;
private final MetricsFactory metricsFactory;
private final String workerIdentifier;
private final ExecutorService executorService;
@Override
public List<Lease> discoverNewLeases()
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
final MetricsScope metricsScope = MetricsUtil.createMetricsWithOperation(metricsFactory, "LeaseDiscovery");
long startTime = System.currentTimeMillis();
boolean success = false;
try {
final Set<String> currentHeldLeaseKeys =
leaseRenewer.getCurrentlyHeldLeases().keySet();
final long listLeaseKeysForWorkerStartTime = System.currentTimeMillis();
final List<String> leaseKeys = leaseRefresher.listLeaseKeysForWorker(workerIdentifier);
MetricsUtil.addLatency(
metricsScope, "ListLeaseKeysForWorker", listLeaseKeysForWorkerStartTime, MetricsLevel.DETAILED);
final List<String> newLeaseKeys = leaseKeys.stream()
.filter(leaseKey -> !currentHeldLeaseKeys.contains(leaseKey))
.collect(Collectors.toList());
final long fetchNewLeasesStartTime = System.currentTimeMillis();
final List<CompletableFuture<Lease>> completableFutures = newLeaseKeys.stream()
.map(leaseKey ->
CompletableFuture.supplyAsync(() -> fetchLease(leaseKey, metricsScope), executorService))
.collect(Collectors.toList());
final List<Lease> newLeases = completableFutures.stream()
.map(CompletableFuture::join)
.filter(Objects::nonNull)
.collect(Collectors.toList());
log.info(
"New leases assigned to worker : {}, count : {}, leases : {}",
workerIdentifier,
newLeases.size(),
newLeases.stream().map(Lease::leaseKey).collect(Collectors.toList()));
MetricsUtil.addLatency(metricsScope, "FetchNewLeases", fetchNewLeasesStartTime, MetricsLevel.DETAILED);
success = true;
MetricsUtil.addCount(metricsScope, "NewLeasesDiscovered", newLeases.size(), MetricsLevel.DETAILED);
return newLeases;
} finally {
MetricsUtil.addWorkerIdentifier(metricsScope, workerIdentifier);
MetricsUtil.addSuccessAndLatency(metricsScope, success, startTime, MetricsLevel.SUMMARY);
MetricsUtil.endScope(metricsScope);
}
}
private Lease fetchLease(final String leaseKey, final MetricsScope metricsScope) {
try {
final Lease lease = leaseRefresher.getLease(leaseKey);
if (isNull(lease)) {
return null;
}
// GSI is eventually consistent thus, validate that the fetched lease is indeed assigned to this
// worker, if not just pass in this run.
if (!lease.leaseOwner().equals(workerIdentifier)) {
MetricsUtil.addCount(metricsScope, "OwnerMismatch", 1, MetricsLevel.DETAILED);
return null;
}
// if checkpointOwner is not null, it means that the lease is still pending shutdown for the last owner.
// Don't add the lease to the in-memory map yet.
if (lease.checkpointOwner() != null) {
return null;
}
// when a new lease is discovered, set the lastCounterIncrementNanos to current time as the time
// when it has become visible, on next renewer interval this will be updated by LeaseRenewer to
// correct time.
lease.lastCounterIncrementNanos(System.nanoTime());
return lease;
} catch (final Exception e) {
// if getLease on some lease key fail, continue and fetch other leases, the one failed will
// be fetched in the next iteration or will be reassigned if stayed idle for long.
MetricsUtil.addCount(metricsScope, "GetLease:Error", 1, MetricsLevel.SUMMARY);
log.error("GetLease failed for leaseKey : {}", leaseKey, e);
return null;
}
}
}

View file

@ -17,18 +17,21 @@ package software.amazon.kinesis.leases.dynamodb;
import java.time.Duration;
import java.util.Collection;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.function.Function;
import lombok.Data;
import lombok.NonNull;
import software.amazon.awssdk.core.util.DefaultSdkAutoConstructList;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.awssdk.services.dynamodb.model.Tag;
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.common.DdbTableConfig;
import software.amazon.kinesis.common.InitialPositionInStreamExtended;
import software.amazon.kinesis.common.LeaseCleanupConfig;
import software.amazon.kinesis.common.StreamConfig;
@ -42,12 +45,15 @@ import software.amazon.kinesis.leases.LeaseManagementConfig;
import software.amazon.kinesis.leases.LeaseManagementFactory;
import software.amazon.kinesis.leases.LeaseSerializer;
import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo;
import software.amazon.kinesis.leases.ShardSyncTaskManager;
import software.amazon.kinesis.lifecycle.ShardConsumer;
import software.amazon.kinesis.metrics.MetricsFactory;
/**
*
*/
@Slf4j
@Data
@KinesisClientInternalApi
public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
@ -73,6 +79,8 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
@NonNull
private final LeaseSerializer leaseSerializer;
private final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig;
@NonNull
private StreamConfig streamConfig;
@ -103,434 +111,11 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
private final Collection<Tag> tags;
private final boolean isMultiStreamMode;
private final LeaseCleanupConfig leaseCleanupConfig;
private final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig;
/**
* Constructor.
*
* <p>NOTE: This constructor is deprecated and will be removed in a future release.</p>
*
* @param kinesisClient
* @param streamName
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param initialPositionInStream
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final String streamName,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final InitialPositionInStreamExtended initialPositionInStream,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus) {
this(
kinesisClient,
streamName,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
initialPositionInStream,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_READ_CAPACITY,
TableConstants.DEFAULT_INITIAL_LEASE_TABLE_WRITE_CAPACITY);
}
/**
* Constructor.
*
* <p>
* NOTE: This constructor is deprecated and will be removed in a future release.
* </p>
*
* @param kinesisClient
* @param streamName
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param initialPositionInStream
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final String streamName,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final InitialPositionInStreamExtended initialPositionInStream,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity) {
this(
kinesisClient,
streamName,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
initialPositionInStream,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
new HierarchicalShardSyncer(),
TableCreatorCallback.NOOP_TABLE_CREATOR_CALLBACK,
LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT);
}
/**
* Constructor.
*
* @param kinesisClient
* @param streamName
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param initialPositionInStream
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
* @param hierarchicalShardSyncer
* @param tableCreatorCallback
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final String streamName,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final InitialPositionInStreamExtended initialPositionInStream,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer hierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback) {
this(
kinesisClient,
streamName,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
initialPositionInStream,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
hierarchicalShardSyncer,
tableCreatorCallback,
LeaseManagementConfig.DEFAULT_REQUEST_TIMEOUT);
}
/**
* Constructor.
*
* @param kinesisClient
* @param streamName
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param initialPositionInStream
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
* @param hierarchicalShardSyncer
* @param tableCreatorCallback
* @param dynamoDbRequestTimeout
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final String streamName,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final InitialPositionInStreamExtended initialPositionInStream,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer hierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback,
Duration dynamoDbRequestTimeout) {
this(
kinesisClient,
streamName,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
initialPositionInStream,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
hierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
BillingMode.PAY_PER_REQUEST);
}
/**
* Constructor.
*
* @param kinesisClient
* @param streamName
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param initialPositionInStream
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
* @param hierarchicalShardSyncer
* @param tableCreatorCallback
* @param dynamoDbRequestTimeout
* @param billingMode
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final String streamName,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final InitialPositionInStreamExtended initialPositionInStream,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer hierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback,
Duration dynamoDbRequestTimeout,
BillingMode billingMode) {
this(
kinesisClient,
new StreamConfig(StreamIdentifier.singleStreamInstance(streamName), initialPositionInStream),
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
hierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
new DynamoDBLeaseSerializer());
}
/**
* Constructor.
* @deprecated this is used by the deprecated method in LeaseManagementConfig to construct the LeaseManagement factory
*
* @param kinesisClient
* @param streamName
@ -592,291 +177,6 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
BillingMode billingMode,
Collection<Tag> tags) {
this(
kinesisClient,
new StreamConfig(StreamIdentifier.singleStreamInstance(streamName), initialPositionInStream),
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
hierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
new DynamoDBLeaseSerializer());
}
/**
* Constructor.
*
* @param kinesisClient
* @param streamConfig
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
* @param deprecatedHierarchicalShardSyncer
* @param tableCreatorCallback
* @param dynamoDbRequestTimeout
* @param billingMode
*/
@Deprecated
private DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final StreamConfig streamConfig,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback,
Duration dynamoDbRequestTimeout,
BillingMode billingMode,
LeaseSerializer leaseSerializer) {
this(
kinesisClient,
streamConfig,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
deprecatedHierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED,
DefaultSdkAutoConstructList.getInstance(),
leaseSerializer);
}
/**
* Constructor.
*
* @param kinesisClient
* @param streamConfig
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
* @param deprecatedHierarchicalShardSyncer
* @param tableCreatorCallback
* @param dynamoDbRequestTimeout
* @param billingMode
* @param leaseTableDeletionProtectionEnabled
* @param tags
*/
@Deprecated
private DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final StreamConfig streamConfig,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback,
Duration dynamoDbRequestTimeout,
BillingMode billingMode,
final boolean leaseTableDeletionProtectionEnabled,
Collection<Tag> tags,
LeaseSerializer leaseSerializer) {
this(
kinesisClient,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
failoverTimeMillis,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
deprecatedHierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
leaseTableDeletionProtectionEnabled,
tags,
leaseSerializer,
null,
false,
LeaseManagementConfig.DEFAULT_LEASE_CLEANUP_CONFIG);
this.streamConfig = streamConfig;
}
/**
* Constructor.
* @param kinesisClient
* @param dynamoDBClient
* @param tableName
* @param workerIdentifier
* @param executorService
* @param failoverTimeMillis
* @param epsilonMillis
* @param maxLeasesForWorker
* @param maxLeasesToStealAtOneTime
* @param maxLeaseRenewalThreads
* @param cleanupLeasesUponShardCompletion
* @param ignoreUnexpectedChildShards
* @param shardSyncIntervalMillis
* @param consistentReads
* @param listShardsBackoffTimeMillis
* @param maxListShardsRetryAttempts
* @param maxCacheMissesBeforeReload
* @param listShardsCacheAllowedAgeInSeconds
* @param cacheMissWarningModulus
* @param initialLeaseTableReadCapacity
* @param initialLeaseTableWriteCapacity
* @param deprecatedHierarchicalShardSyncer
* @param tableCreatorCallback
* @param dynamoDbRequestTimeout
* @param billingMode
* @param leaseTableDeletionProtectionEnabled
* @param leaseSerializer
* @param customShardDetectorProvider
* @param isMultiStreamMode
* @param leaseCleanupConfig
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final long failoverTimeMillis,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback,
Duration dynamoDbRequestTimeout,
BillingMode billingMode,
final boolean leaseTableDeletionProtectionEnabled,
Collection<Tag> tags,
LeaseSerializer leaseSerializer,
Function<StreamConfig, ShardDetector> customShardDetectorProvider,
boolean isMultiStreamMode,
LeaseCleanupConfig leaseCleanupConfig) {
this(
kinesisClient,
dynamoDBClient,
@ -900,16 +200,21 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
deprecatedHierarchicalShardSyncer,
hierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
leaseTableDeletionProtectionEnabled,
LeaseManagementConfig.DEFAULT_LEASE_TABLE_DELETION_PROTECTION_ENABLED,
LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED,
tags,
leaseSerializer,
customShardDetectorProvider,
isMultiStreamMode,
leaseCleanupConfig);
new DynamoDBLeaseSerializer(),
null,
false,
LeaseManagementConfig.DEFAULT_LEASE_CLEANUP_CONFIG,
new LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig(),
LeaseManagementConfig.GracefulLeaseHandoffConfig.builder().build());
this.streamConfig =
new StreamConfig(StreamIdentifier.singleStreamInstance(streamName), initialPositionInStream);
}
/**
@ -947,75 +252,6 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
* @param leaseCleanupConfig
*/
@Deprecated
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final DynamoDbAsyncClient dynamoDBClient,
final String tableName,
final String workerIdentifier,
final ExecutorService executorService,
final long failoverTimeMillis,
final boolean enablePriorityLeaseAssignment,
final long epsilonMillis,
final int maxLeasesForWorker,
final int maxLeasesToStealAtOneTime,
final int maxLeaseRenewalThreads,
final boolean cleanupLeasesUponShardCompletion,
final boolean ignoreUnexpectedChildShards,
final long shardSyncIntervalMillis,
final boolean consistentReads,
final long listShardsBackoffTimeMillis,
final int maxListShardsRetryAttempts,
final int maxCacheMissesBeforeReload,
final long listShardsCacheAllowedAgeInSeconds,
final int cacheMissWarningModulus,
final long initialLeaseTableReadCapacity,
final long initialLeaseTableWriteCapacity,
final HierarchicalShardSyncer deprecatedHierarchicalShardSyncer,
final TableCreatorCallback tableCreatorCallback,
Duration dynamoDbRequestTimeout,
BillingMode billingMode,
final boolean leaseTableDeletionProtectionEnabled,
Collection<Tag> tags,
LeaseSerializer leaseSerializer,
Function<StreamConfig, ShardDetector> customShardDetectorProvider,
boolean isMultiStreamMode,
LeaseCleanupConfig leaseCleanupConfig) {
this(
kinesisClient,
dynamoDBClient,
tableName,
workerIdentifier,
executorService,
failoverTimeMillis,
enablePriorityLeaseAssignment,
epsilonMillis,
maxLeasesForWorker,
maxLeasesToStealAtOneTime,
maxLeaseRenewalThreads,
cleanupLeasesUponShardCompletion,
ignoreUnexpectedChildShards,
shardSyncIntervalMillis,
consistentReads,
listShardsBackoffTimeMillis,
maxListShardsRetryAttempts,
maxCacheMissesBeforeReload,
listShardsCacheAllowedAgeInSeconds,
cacheMissWarningModulus,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
deprecatedHierarchicalShardSyncer,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
leaseTableDeletionProtectionEnabled,
LeaseManagementConfig.DEFAULT_LEASE_TABLE_PITR_ENABLED,
tags,
leaseSerializer,
customShardDetectorProvider,
isMultiStreamMode,
leaseCleanupConfig);
}
public DynamoDBLeaseManagementFactory(
final KinesisAsyncClient kinesisClient,
final DynamoDbAsyncClient dynamoDBClient,
@ -1049,7 +285,9 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
LeaseSerializer leaseSerializer,
Function<StreamConfig, ShardDetector> customShardDetectorProvider,
boolean isMultiStreamMode,
LeaseCleanupConfig leaseCleanupConfig) {
LeaseCleanupConfig leaseCleanupConfig,
final LeaseManagementConfig.WorkerUtilizationAwareAssignmentConfig workerUtilizationAwareAssignmentConfig,
final LeaseManagementConfig.GracefulLeaseHandoffConfig gracefulLeaseHandoffConfig) {
this.kinesisClient = kinesisClient;
this.dynamoDBClient = dynamoDBClient;
this.tableName = tableName;
@ -1083,10 +321,19 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
this.isMultiStreamMode = isMultiStreamMode;
this.leaseCleanupConfig = leaseCleanupConfig;
this.tags = tags;
this.workerUtilizationAwareAssignmentConfig = workerUtilizationAwareAssignmentConfig;
this.gracefulLeaseHandoffConfig = gracefulLeaseHandoffConfig;
}
@Override
public LeaseCoordinator createLeaseCoordinator(@NonNull final MetricsFactory metricsFactory) {
return createLeaseCoordinator(metricsFactory, new ConcurrentHashMap<>());
}
@Override
public LeaseCoordinator createLeaseCoordinator(
@NonNull final MetricsFactory metricsFactory,
@NonNull final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap) {
return new DynamoDBLeaseCoordinator(
this.createLeaseRefresher(),
workerIdentifier,
@ -1098,9 +345,15 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
maxLeaseRenewalThreads,
initialLeaseTableReadCapacity,
initialLeaseTableWriteCapacity,
metricsFactory);
metricsFactory,
workerUtilizationAwareAssignmentConfig,
gracefulLeaseHandoffConfig,
shardInfoShardConsumerMap);
}
/**
* Even though this is deprecated, this is a method part of the public interface in LeaseManagementFactory
*/
@Override
@Deprecated
public ShardSyncTaskManager createShardSyncTaskManager(@NonNull final MetricsFactory metricsFactory) {
@ -1155,6 +408,10 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
@Override
public DynamoDBLeaseRefresher createLeaseRefresher() {
final DdbTableConfig ddbTableConfig = new DdbTableConfig();
ddbTableConfig.billingMode(billingMode);
ddbTableConfig.readCapacity(initialLeaseTableReadCapacity);
ddbTableConfig.writeCapacity(initialLeaseTableWriteCapacity);
return new DynamoDBLeaseRefresher(
tableName,
dynamoDBClient,
@ -1162,12 +419,15 @@ public class DynamoDBLeaseManagementFactory implements LeaseManagementFactory {
consistentReads,
tableCreatorCallback,
dynamoDbRequestTimeout,
billingMode,
ddbTableConfig,
leaseTableDeletionProtectionEnabled,
leaseTablePitrEnabled,
tags);
}
/**
* Even though this is deprecated, this is a method part of the public interface in LeaseManagementFactory
*/
@Override
@Deprecated
public ShardDetector createShardDetector() {

View file

@ -14,6 +14,8 @@
*/
package software.amazon.kinesis.leases.dynamodb;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -26,8 +28,10 @@ import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@ -39,6 +43,7 @@ import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.leases.LeaseRenewer;
import software.amazon.kinesis.leases.LeaseStatsRecorder;
import software.amazon.kinesis.leases.MultiStreamLease;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
@ -48,21 +53,32 @@ import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import static java.util.Objects.nonNull;
import static software.amazon.kinesis.leases.LeaseStatsRecorder.BYTES_PER_KB;
/**
* An implementation of {@link LeaseRenewer} that uses DynamoDB via {@link LeaseRefresher}.
*/
@Slf4j
@KinesisClientInternalApi
public class DynamoDBLeaseRenewer implements LeaseRenewer {
/**
* 6 digit after decimal gives the granularity of 0.001 byte per second.
*/
private static final int DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL = 6;
private static final int RENEWAL_RETRIES = 2;
private static final String RENEW_ALL_LEASES_DIMENSION = "RenewAllLeases";
private static final String LEASE_RENEWER_INITIALIZE = "LeaseRenewerInitialize";
private final LeaseRefresher leaseRefresher;
private final String workerIdentifier;
private final long leaseDurationNanos;
private final ExecutorService executorService;
private final MetricsFactory metricsFactory;
private final LeaseStatsRecorder leaseStatsRecorder;
private final Consumer<Lease> leaseGracefulShutdownCallback;
private final ConcurrentNavigableMap<String, Lease> ownedLeases = new ConcurrentSkipListMap<>();
/**
@ -82,12 +98,16 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
final String workerIdentifier,
final long leaseDurationMillis,
final ExecutorService executorService,
final MetricsFactory metricsFactory) {
final MetricsFactory metricsFactory,
final LeaseStatsRecorder leaseStatsRecorder,
final Consumer<Lease> leaseGracefulShutdownCallback) {
this.leaseRefresher = leaseRefresher;
this.workerIdentifier = workerIdentifier;
this.leaseDurationNanos = TimeUnit.MILLISECONDS.toNanos(leaseDurationMillis);
this.executorService = executorService;
this.metricsFactory = metricsFactory;
this.leaseStatsRecorder = leaseStatsRecorder;
this.leaseGracefulShutdownCallback = leaseGracefulShutdownCallback;
}
/**
@ -187,11 +207,21 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
// ShutdownException).
boolean isLeaseExpired = lease.isExpired(leaseDurationNanos, System.nanoTime());
if (renewEvenIfExpired || !isLeaseExpired) {
final Double throughputPerKBps = this.leaseStatsRecorder.getThroughputKBps(leaseKey);
if (nonNull(throughputPerKBps)) {
lease.throughputKBps(BigDecimal.valueOf(throughputPerKBps)
.setScale(DEFAULT_THROUGHPUT_DIGIT_AFTER_DECIMAL, RoundingMode.HALF_UP)
.doubleValue());
}
renewedLease = leaseRefresher.renewLease(lease);
}
if (renewedLease) {
lease.lastCounterIncrementNanos(System.nanoTime());
}
if (lease.shutdownRequested()) {
// the underlying function will dedup
leaseGracefulShutdownCallback.accept(lease.copy());
}
}
if (renewedLease) {
@ -391,6 +421,12 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
* every time we acquire a lease, it gets a new concurrency token.
*/
authoritativeLease.concurrencyToken(UUID.randomUUID());
if (nonNull(lease.throughputKBps())) {
leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder()
.leaseKey(lease.leaseKey())
.bytes(Math.round(lease.throughputKBps() * BYTES_PER_KB)) // Convert KB to Bytes
.build());
}
ownedLeases.put(authoritativeLease.leaseKey(), authoritativeLease);
}
}
@ -409,6 +445,7 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
*/
@Override
public void dropLease(Lease lease) {
leaseStatsRecorder.dropLeaseStats(lease.leaseKey());
ownedLeases.remove(lease.leaseKey());
}
@ -417,26 +454,48 @@ public class DynamoDBLeaseRenewer implements LeaseRenewer {
*/
@Override
public void initialize() throws DependencyException, InvalidStateException, ProvisionedThroughputException {
Collection<Lease> leases = leaseRefresher.listLeases();
List<Lease> myLeases = new LinkedList<>();
boolean renewEvenIfExpired = true;
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, LEASE_RENEWER_INITIALIZE);
final ExecutorService singleThreadExecutorService = Executors.newSingleThreadExecutor();
boolean success = false;
try {
final Map.Entry<List<Lease>, List<String>> response =
leaseRefresher.listLeasesParallely(singleThreadExecutorService, 1);
for (Lease lease : leases) {
if (workerIdentifier.equals(lease.leaseOwner())) {
log.info(" Worker {} found lease {}", workerIdentifier, lease);
// Okay to renew even if lease is expired, because we start with an empty list and we add the lease to
// our list only after a successful renew. So we don't need to worry about the edge case where we could
// continue renewing a lease after signaling a lease loss to the application.
if (renewLease(lease, renewEvenIfExpired)) {
myLeases.add(lease);
}
} else {
log.debug("Worker {} ignoring lease {} ", workerIdentifier, lease);
if (!response.getValue().isEmpty()) {
log.warn("List of leaseKeys failed to deserialize : {} ", response.getValue());
}
}
addLeasesToRenew(myLeases);
final List<Lease> myLeases = new LinkedList<>();
boolean renewEvenIfExpired = true;
for (Lease lease : response.getKey()) {
if (workerIdentifier.equals(lease.leaseOwner())) {
log.info(" Worker {} found lease {}", workerIdentifier, lease);
// Okay to renew even if lease is expired, because we start with an empty list and we add the lease
// to
// our list only after a successful renew. So we don't need to worry about the edge case where we
// could
// continue renewing a lease after signaling a lease loss to the application.
if (renewLease(lease, renewEvenIfExpired)) {
myLeases.add(lease);
}
} else {
log.debug("Worker {} ignoring lease {} ", workerIdentifier, lease);
}
}
addLeasesToRenew(myLeases);
success = true;
} catch (final Exception e) {
// It's ok to swollow exception here fail to discover all leases here, as the assignment logic takes
// care of reassignment if some lease is expired.
log.warn("LeaseRefresher failed in initialization during renewing of pre assigned leases", e);
} finally {
singleThreadExecutorService.shutdown();
MetricsUtil.addCount(scope, "Fault", success ? 0 : 1, MetricsLevel.DETAILED);
MetricsUtil.endScope(scope);
}
}
private void verifyNotNull(Object object, String message) {

View file

@ -44,11 +44,8 @@ import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
*/
@KinesisClientInternalApi
public class DynamoDBLeaseSerializer implements LeaseSerializer {
private static final String LEASE_KEY_KEY = "leaseKey";
private static final String LEASE_OWNER_KEY = "leaseOwner";
private static final String LEASE_COUNTER_KEY = "leaseCounter";
private static final String OWNER_SWITCHES_KEY = "ownerSwitchesSinceCheckpoint";
private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint";
private static final String CHECKPOINT_SUBSEQUENCE_NUMBER_KEY = "checkpointSubSequenceNumber";
private static final String PENDING_CHECKPOINT_SEQUENCE_KEY = "pendingCheckpoint";
private static final String PENDING_CHECKPOINT_SUBSEQUENCE_KEY = "pendingCheckpointSubSequenceNumber";
@ -57,6 +54,11 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
private static final String CHILD_SHARD_IDS_KEY = "childShardIds";
private static final String STARTING_HASH_KEY = "startingHashKey";
private static final String ENDING_HASH_KEY = "endingHashKey";
private static final String THROUGHOUT_PUT_KBPS = "throughputKBps";
private static final String CHECKPOINT_SEQUENCE_NUMBER_KEY = "checkpoint";
static final String CHECKPOINT_OWNER = "checkpointOwner";
static final String LEASE_OWNER_KEY = "leaseOwner";
static final String LEASE_KEY_KEY = "leaseKey";
@Override
public Map<String, AttributeValue> toDynamoRecord(final Lease lease) {
@ -110,6 +112,13 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
lease.hashKeyRangeForLease().serializedEndingHashKey()));
}
if (lease.throughputKBps() != null) {
result.put(THROUGHOUT_PUT_KBPS, DynamoUtils.createAttributeValue(lease.throughputKBps()));
}
if (lease.checkpointOwner() != null) {
result.put(CHECKPOINT_OWNER, DynamoUtils.createAttributeValue(lease.checkpointOwner()));
}
return result;
}
@ -146,6 +155,14 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
leaseToUpdate.hashKeyRange(HashKeyRangeForLease.deserialize(startingHashKey, endingHashKey));
}
if (DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS) != null) {
leaseToUpdate.throughputKBps(DynamoUtils.safeGetDouble(dynamoRecord, THROUGHOUT_PUT_KBPS));
}
if (DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER) != null) {
leaseToUpdate.checkpointOwner(DynamoUtils.safeGetString(dynamoRecord, CHECKPOINT_OWNER));
}
return leaseToUpdate;
}
@ -181,18 +198,9 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
@Override
public Map<String, ExpectedAttributeValue> getDynamoLeaseOwnerExpectation(final Lease lease) {
Map<String, ExpectedAttributeValue> result = new HashMap<>();
ExpectedAttributeValue.Builder eavBuilder = ExpectedAttributeValue.builder();
if (lease.leaseOwner() == null) {
eavBuilder = eavBuilder.exists(false);
} else {
eavBuilder = eavBuilder.value(DynamoUtils.createAttributeValue(lease.leaseOwner()));
}
result.put(LEASE_OWNER_KEY, eavBuilder.build());
final Map<String, ExpectedAttributeValue> result = new HashMap<>();
result.put(LEASE_OWNER_KEY, buildExpectedAttributeValueIfExistsOrValue(lease.leaseOwner()));
result.put(CHECKPOINT_OWNER, buildExpectedAttributeValueIfExistsOrValue(lease.checkpointOwner()));
return result;
}
@ -247,9 +255,17 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
.value(DynamoUtils.createAttributeValue(owner))
.action(AttributeAction.PUT)
.build());
// this method is currently used by assignLease and takeLease. In both case we want the checkpoint owner to be
// deleted as this is a fresh assignment
result.put(
CHECKPOINT_OWNER,
AttributeValueUpdate.builder().action(AttributeAction.DELETE).build());
String oldOwner = lease.leaseOwner();
if (oldOwner != null && !oldOwner.equals(owner)) {
String checkpointOwner = lease.checkpointOwner();
// if checkpoint owner is not null, this update is supposed to remove the checkpoint owner
// and transfer the lease ownership to the leaseOwner so incrementing the owner switch key
if (oldOwner != null && !oldOwner.equals(owner) || (checkpointOwner != null && checkpointOwner.equals(owner))) {
result.put(
OWNER_SWITCHES_KEY,
AttributeValueUpdate.builder()
@ -261,18 +277,38 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
return result;
}
/**
* AssignLease performs the PUT action on the LeaseOwner and ADD (1) action on the leaseCounter.
* @param lease lease that needs to be assigned
* @param newOwner newLeaseOwner
* @return Map of AttributeName to update operation
*/
@Override
public Map<String, AttributeValueUpdate> getDynamoAssignLeaseUpdate(final Lease lease, final String newOwner) {
Map<String, AttributeValueUpdate> result = getDynamoTakeLeaseUpdate(lease, newOwner);
result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd());
return result;
}
@Override
public Map<String, AttributeValueUpdate> getDynamoEvictLeaseUpdate(final Lease lease) {
Map<String, AttributeValueUpdate> result = new HashMap<>();
AttributeValue value = null;
final Map<String, AttributeValueUpdate> result = new HashMap<>();
// if checkpointOwner is not null, it means lease handoff is initiated. In this case we just remove the
// checkpoint owner so the next owner (leaseOwner) can pick up the lease without waiting for assignment.
// Otherwise, remove the leaseOwner
if (lease.checkpointOwner() == null) {
result.put(
LEASE_OWNER_KEY,
AttributeValueUpdate.builder()
.action(AttributeAction.DELETE)
.build());
}
// We always want to remove checkpointOwner, it's ok even if it's null
result.put(
LEASE_OWNER_KEY,
AttributeValueUpdate.builder()
.value(value)
.action(AttributeAction.DELETE)
.build());
CHECKPOINT_OWNER,
AttributeValueUpdate.builder().action(AttributeAction.DELETE).build());
result.put(LEASE_COUNTER_KEY, getAttributeValueUpdateForAdd());
return result;
}
@ -394,4 +430,58 @@ public class DynamoDBLeaseSerializer implements LeaseSerializer {
return definitions;
}
@Override
public Collection<KeySchemaElement> getWorkerIdToLeaseKeyIndexKeySchema() {
final List<KeySchemaElement> keySchema = new ArrayList<>();
keySchema.add(KeySchemaElement.builder()
.attributeName(LEASE_OWNER_KEY)
.keyType(KeyType.HASH)
.build());
keySchema.add(KeySchemaElement.builder()
.attributeName(LEASE_KEY_KEY)
.keyType(KeyType.RANGE)
.build());
return keySchema;
}
@Override
public Collection<AttributeDefinition> getWorkerIdToLeaseKeyIndexAttributeDefinitions() {
final List<AttributeDefinition> definitions = new ArrayList<>();
definitions.add(AttributeDefinition.builder()
.attributeName(LEASE_OWNER_KEY)
.attributeType(ScalarAttributeType.S)
.build());
definitions.add(AttributeDefinition.builder()
.attributeName(LEASE_KEY_KEY)
.attributeType(ScalarAttributeType.S)
.build());
return definitions;
}
@Override
public Map<String, AttributeValueUpdate> getDynamoLeaseThroughputKbpsUpdate(Lease lease) {
final Map<String, AttributeValueUpdate> result = new HashMap<>();
final AttributeValueUpdate avu = AttributeValueUpdate.builder()
.value(DynamoUtils.createAttributeValue(lease.throughputKBps()))
.action(AttributeAction.PUT)
.build();
result.put(THROUGHOUT_PUT_KBPS, avu);
return result;
}
private static ExpectedAttributeValue buildExpectedAttributeValueIfExistsOrValue(String value) {
return value == null
? ExpectedAttributeValue.builder().exists(false).build()
: ExpectedAttributeValue.builder()
.value(DynamoUtils.createAttributeValue(value))
.build();
}
private static AttributeValueUpdate getAttributeValueUpdateForAdd() {
return AttributeValueUpdate.builder()
.value(DynamoUtils.createAttributeValue(1L))
.action(AttributeAction.ADD)
.build();
}
}

View file

@ -106,15 +106,6 @@ public class DynamoDBLeaseTaker implements LeaseTaker {
return this;
}
/**
* @deprecated Misspelled method, use {@link DynamoDBLeaseTaker#withVeryOldLeaseDurationNanosMultiplier(int)}
*/
@Deprecated
public DynamoDBLeaseTaker withVeryOldLeaseDurationNanosMultipler(long veryOldLeaseDurationNanosMultipler) {
this.veryOldLeaseDurationNanosMultiplier = (int) veryOldLeaseDurationNanosMultipler;
return this;
}
/**
* Overrides the default very old lease duration nanos multiplier to increase the threshold for taking very old leases.
* Setting this to a higher value than 3 will increase the threshold for very old lease taking.

View file

@ -266,7 +266,8 @@ class ConsumerStates {
argument.idleTimeInMilliseconds(),
argument.aggregatorUtil(),
argument.metricsFactory(),
argument.schemaRegistryDecoder());
argument.schemaRegistryDecoder(),
argument.leaseCoordinator().leaseStatsRecorder());
}
@Override
@ -336,7 +337,8 @@ class ConsumerStates {
argument.shardRecordProcessor(),
argument.recordProcessorCheckpointer(),
consumer.shutdownNotification(),
argument.shardInfo());
argument.shardInfo(),
consumer.shardConsumerArgument().leaseCoordinator());
}
@Override

View file

@ -0,0 +1,213 @@
package software.amazon.kinesis.lifecycle;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.ShardInfo;
import software.amazon.kinesis.leases.dynamodb.DynamoDBLeaseCoordinator;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
/**
* This class handles the graceful shutdown of shard consumers. When a lease is requested for shutdown, it will be
* enqueued from the lease renewal thread which will call the shard consumer of the lease to enqueue a shutdown request.
* The class monitors those leases and check if the shutdown is properly completed.
* If the shard consumer doesn't shut down within the given timeout, it will trigger a lease transfer.
*/
@Slf4j
@RequiredArgsConstructor
@KinesisClientInternalApi
public class LeaseGracefulShutdownHandler {
// Arbitrary number to run a similar frequency as the scheduler based on shardConsumerDispatchPollIntervalMillis
// which is how fast scheduler triggers state change. It's ok to add few extra second delay to call shutdown since
// the leases should still be processing by the current owner so there should not be processing delay due to this.
private static final long SHUTDOWN_CHECK_INTERVAL_MILLIS = 2000;
private final long shutdownTimeoutMillis;
private final ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap;
private final LeaseCoordinator leaseCoordinator;
private final Supplier<Long> currentTimeSupplier;
private final ConcurrentMap<ShardInfo, LeasePendingShutdown> shardInfoLeasePendingShutdownMap =
new ConcurrentHashMap<>();
private final ScheduledExecutorService executorService;
private volatile boolean isRunning = false;
/**
* Factory method to create a new instance of LeaseGracefulShutdownHandler.
*
* @param shutdownTimeoutMillis Timeout for graceful shutdown of shard consumers.
* @param shardInfoShardConsumerMap Map of shard info to shard consumer instances.
* @param leaseCoordinator Lease coordinator instance to access lease information.
* @return A new instance of LeaseGracefulShutdownHandler.
*/
public static LeaseGracefulShutdownHandler create(
long shutdownTimeoutMillis,
ConcurrentMap<ShardInfo, ShardConsumer> shardInfoShardConsumerMap,
LeaseCoordinator leaseCoordinator) {
return new LeaseGracefulShutdownHandler(
shutdownTimeoutMillis,
shardInfoShardConsumerMap,
leaseCoordinator,
System::currentTimeMillis,
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder()
.setNameFormat("LeaseGracefulShutdown-%04d")
.setDaemon(true)
.build()));
}
/**
* Starts the shard consumer shutdown handler thread.
*/
public void start() {
if (!isRunning) {
log.info("Starting graceful lease handoff thread.");
executorService.scheduleAtFixedRate(
this::monitorGracefulShutdownLeases, 0, SHUTDOWN_CHECK_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
isRunning = true;
} else {
log.info("Graceful lease handoff thread already running, no need to start.");
}
}
/**
* Stops the shard consumer shutdown handler thread.
*/
public void stop() {
if (isRunning) {
log.info("Stopping graceful lease handoff thread.");
executorService.shutdown();
isRunning = false;
} else {
log.info("Graceful lease handoff thread already stopped.");
}
}
/**
* Enqueue a shutdown request for the given lease if the lease has requested shutdown and the shard consumer
* is not already shutdown.
*
* @param lease The lease to enqueue a shutdown request for.
*/
public void enqueueShutdown(Lease lease) {
if (lease == null || !lease.shutdownRequested() || !isRunning) {
return;
}
final ShardInfo shardInfo = DynamoDBLeaseCoordinator.convertLeaseToAssignment(lease);
final ShardConsumer consumer = shardInfoShardConsumerMap.get(shardInfo);
if (consumer == null || consumer.isShutdown()) {
shardInfoLeasePendingShutdownMap.remove(shardInfo);
} else {
// there could be change shard get enqueued after getting removed. This should be okay because
// this enqueue will be no-op and will be removed again because the shardConsumer associated with the
// shardInfo is shutdown by then.
shardInfoLeasePendingShutdownMap.computeIfAbsent(shardInfo, key -> {
log.info("Calling graceful shutdown for lease {}", lease.leaseKey());
LeasePendingShutdown leasePendingShutdown = new LeasePendingShutdown(lease, consumer);
initiateShutdown(leasePendingShutdown);
return leasePendingShutdown;
});
}
}
/**
* Wait for shutdown to complete or transfer ownership of lease to the next owner if timeout is met.
*/
private void monitorGracefulShutdownLeases() {
String leaseKey = null;
try {
for (ConcurrentMap.Entry<ShardInfo, LeasePendingShutdown> entry :
shardInfoLeasePendingShutdownMap.entrySet()) {
final LeasePendingShutdown leasePendingShutdown = entry.getValue();
final ShardInfo shardInfo = entry.getKey();
leaseKey = leasePendingShutdown.lease.leaseKey();
if (leasePendingShutdown.shardConsumer.isShutdown()
|| shardInfoShardConsumerMap.get(shardInfo) == null
|| leaseCoordinator.getCurrentlyHeldLease(leaseKey) == null) {
logTimeoutMessage(leasePendingShutdown);
shardInfoLeasePendingShutdownMap.remove(shardInfo);
} else if (getCurrentTimeMillis() >= leasePendingShutdown.timeoutTimestampMillis
&& !leasePendingShutdown.leaseTransferCalled) {
try {
log.info(
"Timeout {} millisecond reached waiting for lease {} to graceful handoff."
+ " Attempting to transfer the lease to {}",
shutdownTimeoutMillis,
leaseKey,
leasePendingShutdown.lease.leaseOwner());
transferLeaseIfOwner(leasePendingShutdown);
} catch (DependencyException | InvalidStateException | ProvisionedThroughputException e) {
log.warn("Failed to transfer lease for key {}. Will retry", leaseKey, e);
}
}
}
} catch (Exception e) {
log.error("Error in graceful shutdown for lease {}", leaseKey, e);
}
}
private void initiateShutdown(LeasePendingShutdown tracker) {
tracker.shardConsumer.gracefulShutdown(null);
tracker.shutdownRequested = true;
tracker.timeoutTimestampMillis = getCurrentTimeMillis() + shutdownTimeoutMillis;
}
private void logTimeoutMessage(LeasePendingShutdown leasePendingShutdown) {
if (leasePendingShutdown.leaseTransferCalled) {
final long timeElapsedSinceShutdownInitiated =
getCurrentTimeMillis() - leasePendingShutdown.timeoutTimestampMillis + shutdownTimeoutMillis;
log.info(
"Lease {} took {} milliseconds to complete the shutdown. "
+ "Consider tuning the GracefulLeaseHandoffTimeoutMillis to prevent timeouts, "
+ "if necessary.",
leasePendingShutdown.lease.leaseKey(),
timeElapsedSinceShutdownInitiated);
}
}
private void transferLeaseIfOwner(LeasePendingShutdown leasePendingShutdown)
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
final Lease lease = leasePendingShutdown.lease;
if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) {
// assignLease will increment the leaseCounter which will cause the heartbeat to stop on the current owner
// for the lease
leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner());
} else {
// the worker ID check is just for sanity. We don't expect it to be different from the current worker.
log.error(
"Lease {} checkpoint owner mismatch found {} but it should be {}",
lease.leaseKey(),
lease.checkpointOwner(),
leaseCoordinator.workerIdentifier());
}
// mark it true because we don't want to enter the method again because update is not possible anymore.
leasePendingShutdown.leaseTransferCalled = true;
}
private long getCurrentTimeMillis() {
return currentTimeSupplier.get();
}
@Data
private static class LeasePendingShutdown {
final Lease lease;
final ShardConsumer shardConsumer;
long timeoutTimestampMillis;
boolean shutdownRequested = false;
boolean leaseTransferCalled = false;
}
}

View file

@ -24,6 +24,7 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.checkpoint.ShardRecordProcessorCheckpointer;
import software.amazon.kinesis.common.StreamIdentifier;
import software.amazon.kinesis.leases.LeaseStatsRecorder;
import software.amazon.kinesis.leases.ShardDetector;
import software.amazon.kinesis.leases.ShardInfo;
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
@ -65,6 +66,7 @@ public class ProcessTask implements ConsumerTask {
private final AggregatorUtil aggregatorUtil;
private final String shardInfoId;
private final SchemaRegistryDecoder schemaRegistryDecoder;
private final LeaseStatsRecorder leaseStatsRecorder;
public ProcessTask(
@NonNull ShardInfo shardInfo,
@ -79,7 +81,8 @@ public class ProcessTask implements ConsumerTask {
long idleTimeInMilliseconds,
@NonNull AggregatorUtil aggregatorUtil,
@NonNull MetricsFactory metricsFactory,
SchemaRegistryDecoder schemaRegistryDecoder) {
SchemaRegistryDecoder schemaRegistryDecoder,
@NonNull LeaseStatsRecorder leaseStatsRecorder) {
this.shardInfo = shardInfo;
this.shardInfoId = ShardInfo.getLeaseKey(shardInfo);
this.shardRecordProcessor = shardRecordProcessor;
@ -91,6 +94,7 @@ public class ProcessTask implements ConsumerTask {
this.idleTimeInMilliseconds = idleTimeInMilliseconds;
this.metricsFactory = metricsFactory;
this.schemaRegistryDecoder = schemaRegistryDecoder;
this.leaseStatsRecorder = leaseStatsRecorder;
if (!skipShardSyncAtWorkerInitializationIfLeasesExist) {
this.shard = shardDetector.shard(shardInfo.shardId());
@ -173,6 +177,7 @@ public class ProcessTask implements ConsumerTask {
recordProcessorCheckpointer.largestPermittedCheckpointValue()));
if (shouldCallProcessRecords(records)) {
publishLeaseStats(records);
callProcessRecords(processRecordsInput, records);
}
success = true;
@ -197,6 +202,15 @@ public class ProcessTask implements ConsumerTask {
}
}
private void publishLeaseStats(final List<KinesisClientRecord> records) {
leaseStatsRecorder.recordStats(LeaseStatsRecorder.LeaseStats.builder()
.bytes(records.stream()
.mapToInt(record -> record.data().limit())
.sum())
.leaseKey(ShardInfo.getLeaseKey(shardInfo))
.build());
}
private List<KinesisClientRecord> deaggregateAnyKplRecords(List<KinesisClientRecord> records) {
if (shard == null) {
return aggregatorUtil.deaggregate(records);

View file

@ -21,7 +21,6 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionException;
import java.util.function.Function;
import com.google.common.annotations.VisibleForTesting;
import lombok.AccessLevel;
@ -35,8 +34,6 @@ import software.amazon.kinesis.exceptions.internal.BlockedOnParentShardException
import software.amazon.kinesis.leases.ShardInfo;
import software.amazon.kinesis.lifecycle.events.ProcessRecordsInput;
import software.amazon.kinesis.lifecycle.events.TaskExecutionListenerInput;
import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.retrieval.RecordsPublisher;
/**
@ -59,12 +56,6 @@ public class ShardConsumer {
@NonNull
private final Optional<Long> logWarningForTaskAfterMillis;
/**
* @deprecated unused; to be removed in a "major" version bump
*/
@Deprecated
private final Function<ConsumerTask, ConsumerTask> taskMetricsDecorator;
private final int bufferSize;
private final TaskExecutionListener taskExecutionListener;
private final String streamIdentifier;
@ -95,27 +86,6 @@ public class ShardConsumer {
private ProcessRecordsInput shardEndProcessRecordsInput;
@Deprecated
public ShardConsumer(
RecordsPublisher recordsPublisher,
ExecutorService executorService,
ShardInfo shardInfo,
Optional<Long> logWarningForTaskAfterMillis,
ShardConsumerArgument shardConsumerArgument,
TaskExecutionListener taskExecutionListener) {
this(
recordsPublisher,
executorService,
shardInfo,
logWarningForTaskAfterMillis,
shardConsumerArgument,
ConsumerStates.INITIAL_STATE,
ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()),
8,
taskExecutionListener,
LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE);
}
public ShardConsumer(
RecordsPublisher recordsPublisher,
ExecutorService executorService,
@ -131,36 +101,11 @@ public class ShardConsumer {
logWarningForTaskAfterMillis,
shardConsumerArgument,
ConsumerStates.INITIAL_STATE,
ShardConsumer.metricsWrappingFunction(shardConsumerArgument.metricsFactory()),
8,
taskExecutionListener,
readTimeoutsToIgnoreBeforeWarning);
}
@Deprecated
public ShardConsumer(
RecordsPublisher recordsPublisher,
ExecutorService executorService,
ShardInfo shardInfo,
Optional<Long> logWarningForTaskAfterMillis,
ShardConsumerArgument shardConsumerArgument,
ConsumerState initialState,
Function<ConsumerTask, ConsumerTask> taskMetricsDecorator,
int bufferSize,
TaskExecutionListener taskExecutionListener) {
this(
recordsPublisher,
executorService,
shardInfo,
logWarningForTaskAfterMillis,
shardConsumerArgument,
initialState,
taskMetricsDecorator,
bufferSize,
taskExecutionListener,
LifecycleConfig.DEFAULT_READ_TIMEOUTS_TO_IGNORE);
}
//
// TODO: Make bufferSize configurable
//
@ -171,7 +116,6 @@ public class ShardConsumer {
Optional<Long> logWarningForTaskAfterMillis,
ShardConsumerArgument shardConsumerArgument,
ConsumerState initialState,
Function<ConsumerTask, ConsumerTask> taskMetricsDecorator,
int bufferSize,
TaskExecutionListener taskExecutionListener,
int readTimeoutsToIgnoreBeforeWarning) {
@ -183,7 +127,6 @@ public class ShardConsumer {
this.logWarningForTaskAfterMillis = logWarningForTaskAfterMillis;
this.taskExecutionListener = taskExecutionListener;
this.currentState = initialState;
this.taskMetricsDecorator = taskMetricsDecorator;
subscriber = new ShardConsumerSubscriber(
recordsPublisher, executorService, bufferSize, this, readTimeoutsToIgnoreBeforeWarning);
this.bufferSize = bufferSize;
@ -484,17 +427,18 @@ public class ShardConsumer {
}
/**
* Requests the shutdown of the this ShardConsumer. This should give the record processor a chance to checkpoint
* Requests the shutdown of the ShardConsumer. This should give the record processor a chance to checkpoint
* before being shutdown.
*
* @param shutdownNotification
* used to signal that the record processor has been given the chance to shutdown.
* @param shutdownNotification used to signal that the record processor has been given the chance to shut down.
*/
public void gracefulShutdown(ShutdownNotification shutdownNotification) {
if (subscriber != null) {
subscriber.cancel();
}
this.shutdownNotification = shutdownNotification;
if (shutdownNotification != null) {
this.shutdownNotification = shutdownNotification;
}
markForShutdown(ShutdownReason.REQUESTED);
}
@ -542,21 +486,4 @@ public class ShardConsumer {
return shutdownReason != null;
}
}
/**
* Default task wrapping function for metrics
*
* @param metricsFactory
* the factory used for reporting metrics
* @return a function that will wrap the task with a metrics reporter
*/
private static Function<ConsumerTask, ConsumerTask> metricsWrappingFunction(MetricsFactory metricsFactory) {
return (task) -> {
if (task == null) {
return null;
} else {
return new MetricsCollectingTaskDecorator(task, metricsFactory);
}
};
}
}

View file

@ -18,7 +18,12 @@ import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseCoordinator;
import software.amazon.kinesis.leases.ShardInfo;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import software.amazon.kinesis.leases.exceptions.InvalidStateException;
import software.amazon.kinesis.leases.exceptions.ProvisionedThroughputException;
import software.amazon.kinesis.lifecycle.events.ShutdownRequestedInput;
import software.amazon.kinesis.processor.RecordProcessorCheckpointer;
import software.amazon.kinesis.processor.ShardRecordProcessor;
@ -33,23 +38,41 @@ public class ShutdownNotificationTask implements ConsumerTask {
private final ShardRecordProcessor shardRecordProcessor;
private final RecordProcessorCheckpointer recordProcessorCheckpointer;
private final ShutdownNotification shutdownNotification;
// TODO: remove if not used
private final ShardInfo shardInfo;
private final LeaseCoordinator leaseCoordinator;
@Override
public TaskResult call() {
final String leaseKey = ShardInfo.getLeaseKey(shardInfo);
final Lease currentShardLease = leaseCoordinator.getCurrentlyHeldLease(leaseKey);
try {
try {
shardRecordProcessor.shutdownRequested(ShutdownRequestedInput.builder()
.checkpointer(recordProcessorCheckpointer)
.build());
attemptLeaseTransfer(currentShardLease);
} catch (Exception ex) {
return new TaskResult(ex);
}
return new TaskResult(null);
} finally {
shutdownNotification.shutdownNotificationComplete();
if (shutdownNotification != null) {
shutdownNotification.shutdownNotificationComplete();
} else {
// shutdownNotification is null if this is a shard level graceful shutdown instead of a worker level
// one. We need to drop lease like what's done in the shutdownNotificationComplete so we can
// transition to next state.
leaseCoordinator.dropLease(currentShardLease);
}
}
}
private void attemptLeaseTransfer(Lease lease)
throws ProvisionedThroughputException, InvalidStateException, DependencyException {
if (lease != null && lease.shutdownRequested()) {
if (leaseCoordinator.workerIdentifier().equals(lease.checkpointOwner())) {
leaseCoordinator.leaseRefresher().assignLease(lease, lease.leaseOwner());
}
}
}

View file

@ -164,7 +164,6 @@ public class ShutdownTask implements ConsumerTask {
} else {
throwOnApplicationException(leaseKey, leaseLostAction, scope, startTime);
}
log.debug("Shutting down retrieval strategy for shard {}.", leaseKey);
recordsPublisher.shutdown();

View file

@ -49,7 +49,7 @@ public class RetrievalConfig {
*/
public static final String KINESIS_CLIENT_LIB_USER_AGENT = "amazon-kinesis-client-library-java";
public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "2.6.1-SNAPSHOT";
public static final String KINESIS_CLIENT_LIB_USER_AGENT_VERSION = "3.0.0";
/**
* Client used to make calls to Kinesis for records retrieval

View file

@ -26,6 +26,7 @@ import lombok.NonNull;
import lombok.Setter;
import lombok.ToString;
import lombok.experimental.Accessors;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.kinesis.KinesisAsyncClient;
import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
import software.amazon.kinesis.retrieval.DataFetcherProviderConfig;
@ -38,12 +39,15 @@ import software.amazon.kinesis.retrieval.RetrievalSpecificConfig;
@Setter
@ToString
@EqualsAndHashCode
@Slf4j
public class PollingConfig implements RetrievalSpecificConfig {
public static final Duration DEFAULT_REQUEST_TIMEOUT = Duration.ofSeconds(30);
public static final int DEFAULT_MAX_RECORDS = 10000;
public static final long MIN_IDLE_MILLIS_BETWEEN_READS = 200L;
/**
* Configurable functional interface to override the existing DataFetcher.
*/
@ -138,9 +142,18 @@ public class PollingConfig implements RetrievalSpecificConfig {
/**
* Set the value for how long the ShardConsumer should sleep in between calls to
* {@link KinesisAsyncClient#getRecords(GetRecordsRequest)}. If this is not specified here the value provided in
* {@link RecordsFetcherFactory} will be used.
* {@link RecordsFetcherFactory} will be used. Cannot set value below MIN_IDLE_MILLIS_BETWEEN_READS.
*/
public PollingConfig idleTimeBetweenReadsInMillis(long idleTimeBetweenReadsInMillis) {
if (idleTimeBetweenReadsInMillis < MIN_IDLE_MILLIS_BETWEEN_READS) {
log.warn(
"idleTimeBetweenReadsInMillis must be greater than or equal to {} but current value is {}."
+ " Defaulting to minimum {}.",
MIN_IDLE_MILLIS_BETWEEN_READS,
idleTimeBetweenReadsInMillis,
MIN_IDLE_MILLIS_BETWEEN_READS);
idleTimeBetweenReadsInMillis = MIN_IDLE_MILLIS_BETWEEN_READS;
}
usePollingConfigIdleTimeValue = true;
this.idleTimeBetweenReadsInMillis = idleTimeBetweenReadsInMillis;
return this;

View file

@ -61,6 +61,7 @@ import software.amazon.kinesis.retrieval.RecordsDeliveryAck;
import software.amazon.kinesis.retrieval.RecordsPublisher;
import software.amazon.kinesis.retrieval.RecordsRetrieved;
import software.amazon.kinesis.retrieval.RetryableRetrievalException;
import software.amazon.kinesis.retrieval.ThrottlingReporter;
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
import static software.amazon.kinesis.common.DiagnosticUtils.takeDelayedDeliveryActionIfRequired;
@ -109,6 +110,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
private boolean wasReset = false;
private Instant lastEventDeliveryTime = Instant.EPOCH;
private final RequestDetails lastSuccessfulRequestDetails = new RequestDetails();
private final ThrottlingReporter throttlingReporter;
@Data
@Accessors(fluent = true)
@ -233,6 +235,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
@NonNull final MetricsFactory metricsFactory,
@NonNull final String operation,
@NonNull final String shardId,
final ThrottlingReporter throttlingReporter,
final long awaitTerminationTimeoutMillis) {
this.getRecordsRetrievalStrategy = getRecordsRetrievalStrategy;
this.maxRecordsPerCall = maxRecordsPerCall;
@ -248,6 +251,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
this.idleMillisBetweenCalls = idleMillisBetweenCalls;
this.defaultGetRecordsCacheDaemon = new DefaultGetRecordsCacheDaemon();
Validate.notEmpty(operation, "Operation cannot be empty");
this.throttlingReporter = throttlingReporter;
this.operation = operation;
this.streamId = this.getRecordsRetrievalStrategy.dataFetcher().getStreamIdentifier();
this.streamAndShardId = this.streamId.serialize() + ":" + shardId;
@ -279,7 +283,8 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
final long idleMillisBetweenCalls,
final MetricsFactory metricsFactory,
final String operation,
final String shardId) {
final String shardId,
final ThrottlingReporter throttlingReporter) {
this(
maxPendingProcessRecordsInput,
maxByteSize,
@ -291,6 +296,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
metricsFactory,
operation,
shardId,
throttlingReporter,
DEFAULT_AWAIT_TERMINATION_TIMEOUT_MILLIS);
}
@ -555,6 +561,7 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
recordsRetrieved.lastBatchSequenceNumber);
addArrivedRecordsInput(recordsRetrieved);
drainQueueForRequests();
throttlingReporter.success();
} catch (PositionResetException pse) {
throw pse;
} catch (RetryableRetrievalException rre) {
@ -584,10 +591,11 @@ public class PrefetchRecordsPublisher implements RecordsPublisher {
publisherSession.dataFetcher().restartIterator();
} catch (ProvisionedThroughputExceededException e) {
// Update the lastSuccessfulCall if we get a throttling exception so that we back off idleMillis
// for the next call
lastSuccessfulCall = Instant.now();
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
log.error(
"{} : ProvisionedThroughputExceededException thrown while fetching records from Kinesis",
streamAndShardId,
e);
throttlingReporter.throttled();
} catch (SdkException e) {
log.error("{} : Exception thrown while fetching records from Kinesis", streamAndShardId, e);
} finally {

View file

@ -24,6 +24,7 @@ import software.amazon.kinesis.retrieval.DataFetchingStrategy;
import software.amazon.kinesis.retrieval.GetRecordsRetrievalStrategy;
import software.amazon.kinesis.retrieval.RecordsFetcherFactory;
import software.amazon.kinesis.retrieval.RecordsPublisher;
import software.amazon.kinesis.retrieval.ThrottlingReporter;
@Slf4j
@KinesisClientInternalApi
@ -32,6 +33,7 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory {
private int maxByteSize = 8 * 1024 * 1024;
private int maxRecordsCount = 30000;
private long idleMillisBetweenCalls = 1500L;
private int maxConsecutiveThrottles = 5;
private DataFetchingStrategy dataFetchingStrategy = DataFetchingStrategy.DEFAULT;
@Override
@ -56,7 +58,8 @@ public class SimpleRecordsFetcherFactory implements RecordsFetcherFactory {
idleMillisBetweenCalls,
metricsFactory,
"ProcessTask",
shardId);
shardId,
new ThrottlingReporter(maxConsecutiveThrottles, shardId));
}
@Override

View file

@ -7,7 +7,6 @@ import java.util.List;
import com.amazonaws.services.schemaregistry.common.Schema;
import com.amazonaws.services.schemaregistry.deserializers.GlueSchemaRegistryDeserializer;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.common.KinesisClientLibraryPackage;
import software.amazon.kinesis.retrieval.KinesisClientRecord;
/**
@ -15,7 +14,7 @@ import software.amazon.kinesis.retrieval.KinesisClientRecord;
*/
@Slf4j
public class SchemaRegistryDecoder {
private static final String USER_AGENT_APP_NAME = "kcl" + "-" + KinesisClientLibraryPackage.VERSION;
private static final String USER_AGENT_APP_NAME = "kcl" + "-" + "3.0.0";
private final GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer;
public SchemaRegistryDecoder(GlueSchemaRegistryDeserializer glueSchemaRegistryDeserializer) {

View file

@ -0,0 +1,61 @@
package software.amazon.kinesis.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class Cgroup {
public static String readSingleLineFile(String path) {
BufferedReader bufferedReader = null;
try {
final File file = new File(path);
if (file.exists()) {
bufferedReader = new BufferedReader(new FileReader(file));
return bufferedReader.readLine();
} else {
throw new IllegalArgumentException(String.format("Failed to read file. %s does not exist", path));
}
} catch (final Throwable t) {
if (t instanceof IllegalArgumentException) {
throw (IllegalArgumentException) t;
}
throw new IllegalArgumentException("Failed to read file.", t);
} finally {
try {
if (bufferedReader != null) {
bufferedReader.close();
}
} catch (Throwable x) {
log.warn("Failed to close bufferedReader ", x);
}
}
}
/**
* Calculates the number of available cpus from the cpuset
* See https://docs.kernel.org/admin-guide/cgroup-v2.html#cpuset for more information
* "0-7" represents 8 cores
* "0-4,6,8-10" represents 9 cores (cores 0,1,2,3,4 and core 6 and core 8,9,10)
* @param cpuSet a single line from the cgroup cpuset file
* @return the number of available cpus
*/
public static int getAvailableCpusFromEffectiveCpuSet(final String cpuSet) {
final String[] cpuSetArr = cpuSet.split(",");
int sumCpus = 0;
for (String cpuSetGroup : cpuSetArr) {
if (cpuSetGroup.contains("-")) {
final String[] cpuSetGroupSplit = cpuSetGroup.split("-");
// Values are inclusive
sumCpus += Integer.parseInt(cpuSetGroupSplit[1]) - Integer.parseInt(cpuSetGroupSplit[0]) + 1;
} else {
sumCpus += 1;
}
}
return sumCpus;
}
}

View file

@ -0,0 +1,31 @@
package software.amazon.kinesis.utils;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
/**
* Uses the formula mentioned below for simple ExponentialMovingAverage
* <a href="https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average"/>
*
* Values of alpha close to 1 have less of a smoothing effect and give greater weight to recent changes in the data,
* while values of alpha closer to 0 have a greater smoothing effect and are less responsive to recent changes.
*/
@RequiredArgsConstructor
public class ExponentialMovingAverage {
private final double alpha;
@Getter
private double value;
private boolean initialized = false;
public void add(final double newValue) {
if (!initialized) {
this.value = newValue;
initialized = true;
} else {
this.value = alpha * newValue + (1 - alpha) * this.value;
}
}
}

View file

@ -0,0 +1,44 @@
package software.amazon.kinesis.utils;
import java.util.AbstractMap;
import java.util.List;
import java.util.Map;
public class Statistics {
/**
* Calculates the simple mean of the given values
* @param values list of values (double)
* @return mean of the given values, if the {@param values} is empty then returns 0;
*/
public static double calculateSimpleMean(final List<Double> values) {
if (values.isEmpty()) {
return 0D;
}
double sum = 0.0;
for (final double i : values) {
sum += i;
}
return sum / values.size();
}
/**
* For the given values find the standard deviation (SD).
* For details of SD calculation ref : <a href="https://en.wikipedia.org/wiki/Standard_deviation"/>
* @param values list of values (double)
* @return Map.Entry of mean to standard deviation for {@param values}, if {@param values} is empty then return
* Map.Entry with 0 as mean and 0 as SD.
*/
public static Map.Entry<Double, Double> calculateStandardDeviationAndMean(final List<Double> values) {
if (values.isEmpty()) {
return new AbstractMap.SimpleEntry<>(0D, 0D);
}
final double mean = calculateSimpleMean(values);
// calculate the standard deviation
double standardDeviation = 0.0;
for (final double num : values) {
standardDeviation += Math.pow(num - mean, 2);
}
return new AbstractMap.SimpleEntry<>(mean, Math.sqrt(standardDeviation / values.size()));
}
}

View file

@ -0,0 +1,92 @@
package software.amazon.kinesis.worker;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.impl.container.Cgroupv1CpuWorkerMetric;
import software.amazon.kinesis.worker.metric.impl.container.Cgroupv2CpuWorkerMetric;
import software.amazon.kinesis.worker.metric.impl.container.EcsCpuWorkerMetric;
import software.amazon.kinesis.worker.metric.impl.linux.LinuxCpuWorkerMetric;
import software.amazon.kinesis.worker.platform.Ec2Resource;
import software.amazon.kinesis.worker.platform.EcsResource;
import software.amazon.kinesis.worker.platform.EksResource;
import software.amazon.kinesis.worker.platform.OperatingRangeDataProvider;
import software.amazon.kinesis.worker.platform.ResourceMetadataProvider;
/**
* Class to select appropriate WorkerMetricStats based on the operating range provider that is available on the instance.
*/
@Slf4j
@RequiredArgsConstructor
@KinesisClientInternalApi
public class WorkerMetricsSelector {
private static final OperatingRange DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE =
OperatingRange.builder().maxUtilization(100).build();
private final List<ResourceMetadataProvider> workerComputePlatforms;
/**
* Factory method to create an instance of WorkerMetricsSelector.
*
* @return WorkerMetricsSelector instance
*/
public static WorkerMetricsSelector create() {
final List<ResourceMetadataProvider> resourceMetadataProviders = new ArrayList<>();
resourceMetadataProviders.add(EcsResource.create());
resourceMetadataProviders.add(EksResource.create());
// ec2 has to be the last one to check
resourceMetadataProviders.add(Ec2Resource.create());
return new WorkerMetricsSelector(resourceMetadataProviders);
}
private Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider() {
for (ResourceMetadataProvider platform : workerComputePlatforms) {
if (platform.isOnPlatform()) {
final ResourceMetadataProvider.ComputePlatform computePlatform = platform.getPlatform();
log.info("Worker is running on {}", computePlatform);
return platform.getOperatingRangeDataProvider();
}
}
return Optional.empty();
}
/**
* Returns a list of WorkerMetricStats based on the operating range provider the worker uses.
*
* @return List of WorkerMetricStats
*/
public List<WorkerMetric> getDefaultWorkerMetrics() {
final List<WorkerMetric> workerMetrics = new ArrayList<>();
final Optional<OperatingRangeDataProvider> optionalProvider = getOperatingRangeDataProvider();
if (!optionalProvider.isPresent()) {
log.warn("Did not find an operating range metadata provider.");
return workerMetrics;
}
final OperatingRangeDataProvider dataProvider = optionalProvider.get();
log.info("Worker has operating range metadata provider {} ", dataProvider);
switch (dataProvider) {
case LINUX_PROC:
workerMetrics.add(new LinuxCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
break;
case LINUX_ECS_METADATA_KEY_V4:
workerMetrics.add(new EcsCpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
break;
case LINUX_EKS_CGROUP_V2:
workerMetrics.add(new Cgroupv2CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
break;
case LINUX_EKS_CGROUP_V1:
workerMetrics.add(new Cgroupv1CpuWorkerMetric(DEFAULT_100_PERC_UTILIZED_OPERATING_RANGE));
break;
default:
break;
}
return workerMetrics;
}
}

View file

@ -0,0 +1,20 @@
package software.amazon.kinesis.worker.metric;
import com.google.common.base.Preconditions;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class OperatingRange {
/**
* Max utilization percentage allowed for the workerMetrics.
*/
private final int maxUtilization;
private OperatingRange(final int maxUtilization) {
Preconditions.checkArgument(!(maxUtilization < 0 || maxUtilization > 100), "Invalid maxUtilization value");
this.maxUtilization = maxUtilization;
}
}

View file

@ -0,0 +1,52 @@
package software.amazon.kinesis.worker.metric;
import com.google.common.base.Preconditions;
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;
public interface WorkerMetric {
/**
* WorkerMetricStats short name that is used as attribute name for it in storage.
* @return short name for the WorkerMetricStats
*/
String getShortName();
/**
* Current WorkerMetricValue. WorkerMetricValue is a normalized percentage value to its max configured limits.
* E.g., if for a worker max network bandwidth is 10Gbps and current used bandwidth is 2Gbps, then WorkerMetricValue for
* NetworkWorkerMetrics will be 20 (%).
*
* @return WorkerMetricValue between 0 and 100 (both inclusive)
*/
WorkerMetricValue capture();
/**
* Gets the operating range for this workerMetrics
* @return Operating range for this workerMetrics
*/
OperatingRange getOperatingRange();
/**
* Type of the current WorkerMetricStats.
* @return WorkerMetricType
*/
WorkerMetricType getWorkerMetricType();
/**
* WorkerMetricValue model class is used as return type for the capture() method to have a strong checks at the build
* time of the object itself.
*/
@Builder
class WorkerMetricValue {
@Getter
private final Double value;
private WorkerMetricValue(@NonNull final Double value) {
Preconditions.checkArgument(
!(value < 0 || value > 100), value + " is either less than 0 or greater than 100");
this.value = value;
}
}
}

View file

@ -0,0 +1,16 @@
package software.amazon.kinesis.worker.metric;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public enum WorkerMetricType {
CPU("C"),
MEMORY("M"),
NETWORK_IN("NI"),
NETWORK_OUT("NO"),
THROUGHPUT("T");
@Getter
private final String shortName;
}

View file

@ -0,0 +1,128 @@
package software.amazon.kinesis.worker.metric.impl.container;
import java.time.Clock;
import java.util.concurrent.TimeUnit;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet;
import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile;
/**
* Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory.This works for Elastic
* Kubernetes Service (EKS) containers running on Linux instances which use cgroupv1.
*
* EC2 instances must use a Linux instance that uses cgroupv1. Amazon Linux 2 uses cgroupv1.
* Fargate versions 1.4.0 and 1.3.0 use Amazon Linux 2 and can use this.
*
* CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over
* a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%.
*
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
* to calculate the diff.
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
*/
@Slf4j
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class Cgroupv1CpuWorkerMetric implements WorkerMetric {
private static final Object LOCK_OBJECT = new Object();
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
private static final String CGROUP_ROOT = "/sys/fs/cgroup/";
private static final String CPU_TIME_FILE = CGROUP_ROOT + "cpu/cpuacct.usage";
private static final String CPU_CFS_QUOTA_FILE = CGROUP_ROOT + "cpu/cpu.cfs_quota_us";
private static final String CPU_CFS_PERIOD_FILE = CGROUP_ROOT + "cpu/cpu.cfs_period_us";
private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset/cpuset.effective_cpus";
private final OperatingRange operatingRange;
private final String cpuTimeFile;
private final String cfsQuotaFile;
private final String cfsPeriodFile;
private final String effectiveCpuSetFile;
private final Clock clock;
private double cpuLimit = -1;
private long lastCpuUseTimeNanos = 0;
private long lastSystemTimeNanos = 0;
public Cgroupv1CpuWorkerMetric(final OperatingRange operatingRange) {
this(
operatingRange,
CPU_TIME_FILE,
CPU_CFS_QUOTA_FILE,
CPU_CFS_PERIOD_FILE,
EFFECTIVE_CPU_SET_FILE,
Clock.systemUTC());
}
@Override
public String getShortName() {
return CPU_WORKER_METRICS_TYPE.getShortName();
}
@Override
public WorkerMetricValue capture() {
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
}
private double calculateCpuUsage() {
if (cpuLimit == -1) {
cpuLimit = calculateCpuLimit();
}
final long cpuTimeNanos = Long.parseLong(readSingleLineFile(cpuTimeFile));
final long currentTimeNanos = TimeUnit.MILLISECONDS.toNanos(clock.millis());
boolean skip = false;
double cpuCoreTimeUsed;
synchronized (LOCK_OBJECT) {
if (lastCpuUseTimeNanos == 0 && lastSystemTimeNanos == 0) {
// Case where this is a first call so no diff available
skip = true;
}
final long nanoTimeDiff = currentTimeNanos - lastSystemTimeNanos;
final long cpuUseDiff = cpuTimeNanos - lastCpuUseTimeNanos;
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
// that less than 1 CPU core was used.
cpuCoreTimeUsed = ((double) cpuUseDiff / nanoTimeDiff);
lastCpuUseTimeNanos = cpuTimeNanos;
lastSystemTimeNanos = currentTimeNanos;
}
if (skip) {
return 0D;
} else {
// In case of rounding error, treat everything above 100% as 100%
return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0);
}
}
private double calculateCpuLimit() {
// Documentation on these values:
// https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs
final long cfsQuota = Long.parseLong(readSingleLineFile(cfsQuotaFile));
final long cfsPeriod = Long.parseLong(readSingleLineFile(cfsPeriodFile));
if (cfsQuota == -1) {
// If quota is -1, a limit is not set on the container. The container can use all available cores.
return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile));
} else {
return ((double) cfsQuota) / cfsPeriod;
}
}
@Override
public OperatingRange getOperatingRange() {
return operatingRange;
}
@Override
public WorkerMetricType getWorkerMetricType() {
return CPU_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,128 @@
package software.amazon.kinesis.worker.metric.impl.container;
import java.time.Clock;
import java.util.concurrent.TimeUnit;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
import static software.amazon.kinesis.utils.Cgroup.getAvailableCpusFromEffectiveCpuSet;
import static software.amazon.kinesis.utils.Cgroup.readSingleLineFile;
/**
* Utilizes Linux Control Groups by reading cpu time and available cpu from cgroup directory. This works for Elastic
* Kubernetes Service (EKS) containers running on Linux instances which use cgroupv2.
*
* EC2 instances must use a Linux instance that uses cgroupv2. Amazon Linux 2023 uses cgroupv2.
*
* CPU time is measured in CPU cores time. A container is limited by amount of CPU core time it is allocated. So if over
* a second the container uses 0.5 CPU core time and is allocated 2 CPU cores, the cpu utilization would be 25%.
*
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
* to calculate the diff.
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
*/
@Slf4j
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class Cgroupv2CpuWorkerMetric implements WorkerMetric {
private static final Object LOCK_OBJECT = new Object();
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
private static final String CGROUP_ROOT = "/sys/fs/cgroup/";
private static final String CPU_MAX_FILE = CGROUP_ROOT + "cpu.max";
private static final String EFFECTIVE_CPU_SET_FILE = CGROUP_ROOT + "cpuset.cpus.effective";
private static final String CPU_STAT_FILE = CGROUP_ROOT + "cpu.stat";
private final OperatingRange operatingRange;
private final String cpuMaxFile;
private final String effectiveCpuSetFile;
private final String cpuStatFile;
private final Clock clock;
private double cpuLimit = -1;
private long lastCpuUseTimeMicros = 0;
private long lastSystemTimeMicros = 0;
public Cgroupv2CpuWorkerMetric(final OperatingRange operatingRange) {
this(operatingRange, CPU_MAX_FILE, EFFECTIVE_CPU_SET_FILE, CPU_STAT_FILE, Clock.systemUTC());
}
@Override
public String getShortName() {
return CPU_WORKER_METRICS_TYPE.getShortName();
}
@Override
public WorkerMetricValue capture() {
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
}
private double calculateCpuUsage() {
if (cpuLimit == -1) {
cpuLimit = calculateCpuLimit();
}
// The first line of this file is of the format
// usage_usec $MICROSECONDS
// where $MICROSECONDS is always a number
final String cpuUsageStat = readSingleLineFile(cpuStatFile);
final long cpuTimeMicros = Long.parseLong(cpuUsageStat.split(" ")[1]);
final long currentTimeMicros = TimeUnit.MILLISECONDS.toMicros(clock.millis());
boolean skip = false;
double cpuCoreTimeUsed;
synchronized (LOCK_OBJECT) {
if (lastCpuUseTimeMicros == 0 && lastSystemTimeMicros == 0) {
// Case where this is a first call so no diff available
skip = true;
}
final long microTimeDiff = currentTimeMicros - lastSystemTimeMicros;
final long cpuUseDiff = cpuTimeMicros - lastCpuUseTimeMicros;
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
// that less than 1 CPU core was used.
cpuCoreTimeUsed = ((double) cpuUseDiff / microTimeDiff);
lastCpuUseTimeMicros = cpuTimeMicros;
lastSystemTimeMicros = currentTimeMicros;
}
if (skip) {
return 0D;
} else {
// In case of rounding error, treat everything above 100% as 100%
return Math.min(100.0, cpuCoreTimeUsed / cpuLimit * 100.0);
}
}
private double calculateCpuLimit() {
// This file contains two values separated by space ($MAX $PERIOD).
// $MAX is either a number or "max"
// $PERIOD is always a number
final String cpuMax = readSingleLineFile(cpuMaxFile);
final String[] cpuMaxArr = cpuMax.split(" ");
final String max = cpuMaxArr[0];
final String period = cpuMaxArr[1];
if (max.equals("max")) {
// if first value in file is "max", a limit is not set on the container. The container can use all available
// cores
return getAvailableCpusFromEffectiveCpuSet(readSingleLineFile(effectiveCpuSetFile));
} else {
return Double.parseDouble(max) / Long.parseLong(period);
}
}
@Override
public OperatingRange getOperatingRange() {
return operatingRange;
}
@Override
public WorkerMetricType getWorkerMetricType() {
return CPU_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,203 @@
package software.amazon.kinesis.worker.metric.impl.container;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Iterator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
/**
* Queries the Amazon ECS task metadata endpoint version 4 to get CPU metric stats as well as allocated CPU to the ECS task and
* containers to calculate percent CPU utilization. This works for all ECS containers running on the following
* platforms:
*
* Fargate agent version 1.4.0
* EC2 instance running at least 1.39.0 of the Amazon ECS container agent
*
* For more information, see
* https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4.html
*/
@Slf4j
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class EcsCpuWorkerMetric implements WorkerMetric {
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
private static final String SYS_VAR_ECS_METADATA_URI = "ECS_CONTAINER_METADATA_URI_V4";
private final OperatingRange operatingRange;
private final String containerStatsUri;
private final String taskMetadataUri;
private final String containerMetadataUri;
private double containerCpuLimit = -1;
private double onlineCpus = -1;
public EcsCpuWorkerMetric(final OperatingRange operatingRange) {
this.operatingRange = operatingRange;
final String ecsMetadataRootUri = System.getenv(SYS_VAR_ECS_METADATA_URI);
if (ecsMetadataRootUri != null) {
this.containerStatsUri = ecsMetadataRootUri + "/stats";
this.taskMetadataUri = ecsMetadataRootUri + "/task";
this.containerMetadataUri = ecsMetadataRootUri;
} else {
this.containerStatsUri = null;
this.taskMetadataUri = null;
this.containerMetadataUri = null;
}
}
@Override
public String getShortName() {
return CPU_WORKER_METRICS_TYPE.getShortName();
}
@Override
public WorkerMetricValue capture() {
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
}
private double calculateCpuUsage() {
// Read current container metrics
final JsonNode containerStatsRootNode = readEcsMetadata(containerStatsUri);
final long cpuUsage = containerStatsRootNode
.path("cpu_stats")
.path("cpu_usage")
.path("total_usage")
.asLong();
final long systemCpuUsage = containerStatsRootNode
.path("cpu_stats")
.path("system_cpu_usage")
.asLong();
final long prevCpuUsage = containerStatsRootNode
.path("precpu_stats")
.path("cpu_usage")
.path("total_usage")
.asLong();
final long prevSystemCpuUsage = containerStatsRootNode
.path("precpu_stats")
.path("system_cpu_usage")
.asLong();
if (containerCpuLimit == -1 && onlineCpus == -1) {
onlineCpus =
containerStatsRootNode.path("cpu_stats").path("online_cpus").asDouble();
containerCpuLimit = calculateContainerCpuLimit(onlineCpus);
}
// precpu_stats values will be 0 if it is the first call
if (prevCpuUsage == 0 && prevSystemCpuUsage == 0) {
return 0D;
}
final long cpuUsageDiff = cpuUsage - prevCpuUsage;
final long systemCpuUsageDiff = systemCpuUsage - prevSystemCpuUsage;
// Edge case when there is no systemCpu usage, then that means that 100% of the cpu is used.
if (systemCpuUsageDiff == 0) {
return 100D;
}
// This value is not a percent, but rather how much CPU core time was consumed. i.e. this number can be
// 2.2 which stands for 2.2 CPU cores were fully utilized. If this number is less than 1 than that means
// that less than 1 CPU core was used.
final double cpuCoreTimeUsed = ((double) cpuUsageDiff) / systemCpuUsageDiff * onlineCpus;
// This calculated value is cpu utilization percent. This can burst past 100%, but we will take min with 100%
// because only this amount is guaranteed CPU time to the container
return Math.min(100.0, cpuCoreTimeUsed / containerCpuLimit * 100.0);
}
/**
* All containers in an ECS task can use up to the task level CPU limit. However, CPU is shared among all containers
* in the task according to the relative ratio of CPU shares allocated to each container.
* i.e.
* CPU limit of task is 8 cores
* Container 1 with 10 CPU shares
* Container 2 with 30 CPU shares
* Sum of CPU shares is 40
* Container 1 can use 25% of the 8 cores in CPU core time, so this function returns 2
* Container 2 can use 75% of the 8 cores in CPU core time, so this function returns 6
* @return the CPU core time allocated to the container
*/
private double calculateContainerCpuLimit(double onlineCpus) {
// Read task metadata
final JsonNode taskStatsRootNode = readEcsMetadata(taskMetadataUri);
double taskCpuLimit = calculateTaskCpuLimit(taskStatsRootNode, onlineCpus);
// Read current container metadata
final String currentContainerId =
readEcsMetadata(containerMetadataUri).path("DockerId").asText();
final Iterator<JsonNode> containersIterator =
taskStatsRootNode.path("Containers").iterator();
// The default if this value is not provided is 2 CPU shares (in ECS agent versions >= 1.2.0)
int currentContainerCpuShare = 2;
int containersCpuShareSum = 0;
while (containersIterator.hasNext()) {
final JsonNode containerNode = containersIterator.next();
final int containerCpuShare =
containerNode.path("Limits").path("CPU").asInt();
if (containerNode.path("DockerId").asText().equals(currentContainerId)) {
currentContainerCpuShare = containerCpuShare;
}
containersCpuShareSum += containerCpuShare;
}
return ((double) currentContainerCpuShare) / containersCpuShareSum * taskCpuLimit;
}
private double calculateTaskCpuLimit(JsonNode taskStatsRootNode, double onlineCpus) {
final JsonNode limitsNode = taskStatsRootNode.path("Limits");
if (limitsNode.isMissingNode()) {
// Neither a memory limit nor cpu limit is set at the task level (possible on EC2 instances)
return onlineCpus;
}
final JsonNode cpuLimitsNode = limitsNode.path("CPU");
if (cpuLimitsNode.isMissingNode()) {
// When only a memory limit is set at the task level (possible on ec2 instances)
return onlineCpus;
}
return cpuLimitsNode.asDouble();
}
private JsonNode readEcsMetadata(String uri) {
if (this.containerMetadataUri == null) {
throw new IllegalArgumentException("No ECS metadata endpoint found from environment variables.");
}
URL url;
try {
url = new URL(uri);
} catch (MalformedURLException e) {
throw new IllegalArgumentException(
"CpuWorkerMetrics is not configured properly. ECS metadata url is malformed", e);
}
try {
final ObjectMapper mapper = new ObjectMapper();
final JsonNode rootNode =
mapper.readValue(new InputStreamReader(url.openStream(), Charset.defaultCharset()), JsonNode.class);
return rootNode;
} catch (IOException e) {
throw new IllegalArgumentException("Error in parsing ECS metadata", e);
}
}
@Override
public OperatingRange getOperatingRange() {
return operatingRange;
}
@Override
public WorkerMetricType getWorkerMetricType() {
return CPU_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,108 @@
package software.amazon.kinesis.worker.metric.impl.jmx;
import java.lang.management.ManagementFactory;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import javax.management.MBeanServerConnection;
import javax.management.ObjectName;
import javax.management.openmbean.CompositeDataSupport;
import lombok.RequiredArgsConstructor;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
/**
* Memory WorkerMetricStats that reads the heap memory after GC. The way memory usage is calculated that, all the
* available memory pools are read except Eden (as this is allocation buffer) and used memory and total memory is
* computed.
* Then percentage is computed by dividing used memory by total memory.
*
*/
@RequiredArgsConstructor
public class HeapMemoryAfterGCWorkerMetric implements WorkerMetric {
private static final WorkerMetricType MEMORY_WORKER_METRICS_TYPE = WorkerMetricType.MEMORY;
private final OperatingRange operatingRange;
private Set<ObjectName> garbageCollectorMxBeans;
private Set<String> memoryPoolNames;
@Override
public String getShortName() {
return MEMORY_WORKER_METRICS_TYPE.getShortName();
}
@Override
public WorkerMetricValue capture() {
return WorkerMetricValue.builder()
.value(getAfterGCMemoryUsage(ManagementFactory.getPlatformMBeanServer()))
.build();
}
private double getAfterGCMemoryUsage(final MBeanServerConnection connection) {
try {
if (garbageCollectorMxBeans == null) {
garbageCollectorMxBeans = connection.queryNames(
new ObjectName(ManagementFactory.GARBAGE_COLLECTOR_MXBEAN_DOMAIN_TYPE + ",*"), null);
memoryPoolNames = new HashSet<String>();
for (ObjectName on : garbageCollectorMxBeans) {
String[] poolNames = (String[]) connection.getAttribute(on, "MemoryPoolNames");
// A given MemoryPool may be associated with multiple GarbageCollectors,
// but will appear only once in memoryPoolNames
Collections.addAll(memoryPoolNames, poolNames);
}
}
// Report on the sum of non-Eden HEAP spaces after the last gc
Long used, max;
long usedKb = 0, totalKb = 0;
for (String poolName : memoryPoolNames) {
if (!poolName.contains("Eden")) {
// Ignore Eden, since it's just an allocation buffer
ObjectName on =
new ObjectName(ManagementFactory.MEMORY_POOL_MXBEAN_DOMAIN_TYPE + ",name=" + poolName);
String mt = (String) connection.getAttribute(on, "Type");
if (mt.equals("HEAP")) {
// Paranoia: ignore non-HEAP memory pools
CompositeDataSupport data =
(CompositeDataSupport) connection.getAttribute(on, "CollectionUsage");
used = (Long) data.get("used");
usedKb += used / 1024;
max = (Long) data.get("max");
// max can be undefined (-1)
// http://docs.oracle.com/javase/7/docs/api/java/lang/management/MemoryUsage.html
totalKb += max == -1 ? 0 : max / 1024;
}
}
}
if (totalKb <= 0) {
throw new IllegalArgumentException("Total memory value for JVM is greater than zero");
}
return 100.0 * (double) usedKb / (double) totalKb;
} catch (final Exception e) {
if (e instanceof IllegalArgumentException) {
throw (IllegalArgumentException) e;
}
throw new IllegalArgumentException(e);
}
}
@Override
public OperatingRange getOperatingRange() {
return operatingRange;
}
@Override
public WorkerMetricType getWorkerMetricType() {
return MEMORY_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,133 @@
package software.amazon.kinesis.worker.metric.impl.linux;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
/**
* Reads CPU usage statistics out of /proc/stat file that is present on the EC2 instances. The value is % utilization
* of the CPU.
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
* to calculate the diff. If the file hasn't changed this also returns 0.
* In case the file is not present or any other exception occurs, this throws IllegalArgumentException.
*/
@Slf4j
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class LinuxCpuWorkerMetric implements WorkerMetric {
private static final Object LOCK_OBJECT = new Object();
private static final WorkerMetricType CPU_WORKER_METRICS_TYPE = WorkerMetricType.CPU;
private final OperatingRange operatingRange;
private final String statFile;
private long lastUsr, lastIow, lastSys, lastIdl, lastTot;
private String lastLine;
public LinuxCpuWorkerMetric(final OperatingRange operatingRange) {
this(operatingRange, "/proc/stat");
}
@Override
public String getShortName() {
return CPU_WORKER_METRICS_TYPE.getShortName();
}
@Override
public WorkerMetricValue capture() {
return WorkerMetricValue.builder().value(calculateCpuUsage()).build();
}
private double calculateCpuUsage() {
BufferedReader bufferedReader = null;
try {
final File stat = new File(statFile);
if (stat.exists()) {
bufferedReader = new BufferedReader(new FileReader(stat));
final String line = bufferedReader.readLine();
final String[] lineVals = line.split("\\s+");
long usr = Long.parseLong(lineVals[1]) + Long.parseLong(lineVals[2]);
long sys = Long.parseLong(lineVals[3]);
long idl = Long.parseLong(lineVals[4]);
long iow = Long.parseLong(lineVals[5]);
long tot = usr + sys + idl + iow;
long diffIdl = -1;
long diffTot = -1;
boolean skip = false;
synchronized (LOCK_OBJECT) {
if (lastUsr == 0 || line.equals(lastLine)) {
// Case where this is a first call so no diff available or
// /proc/stat file is not updated since last time.
skip = true;
}
diffIdl = Math.abs(idl - lastIdl);
diffTot = Math.abs(tot - lastTot);
if (diffTot < diffIdl) {
log.warn(
"diffTot is less than diff_idle. \nPrev cpu line : {} and current cpu line : {} ",
lastLine,
line);
if (iow < lastIow) {
// this is case where current iow value less than prev, this can happen in rare cases as per
// https://docs.kernel.org/filesystems/proc.html, and when the worker is idle
// there is no increase in usr or sys values as well resulting in diffTot < diffIdl as
// current tot increases less than current idl
// return 0 in this case as this is the case where worker is not doing anything anyways.
skip = true;
}
}
lastUsr = usr;
lastSys = sys;
lastIdl = idl;
lastIow = iow;
lastTot = usr + sys + idl + iow;
lastLine = line;
}
if (skip) {
return 0D;
}
return ((double) (diffTot - diffIdl) / (double) diffTot) * 100.0;
} else {
throw new IllegalArgumentException(String.format(
"LinuxCpuWorkerMetric is not configured properly, file : %s does not exists", this.statFile));
}
} catch (final Throwable t) {
if (t instanceof IllegalArgumentException) {
throw (IllegalArgumentException) t;
}
throw new IllegalArgumentException(
"LinuxCpuWorkerMetric failed to read metric stats or not configured properly.", t);
} finally {
try {
if (bufferedReader != null) {
bufferedReader.close();
}
} catch (Throwable x) {
log.warn("Failed to close bufferedReader ", x);
}
}
}
@Override
public OperatingRange getOperatingRange() {
return operatingRange;
}
@Override
public WorkerMetricType getWorkerMetricType() {
return CPU_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,42 @@
package software.amazon.kinesis.worker.metric.impl.linux;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
/**
* Ref java doc for {@link LinuxNetworkWorkerMetricBase}
*/
public class LinuxNetworkInWorkerMetric extends LinuxNetworkWorkerMetricBase {
private static final WorkerMetricType NETWORK_IN_WORKER_METRICS_TYPE = WorkerMetricType.NETWORK_IN;
public LinuxNetworkInWorkerMetric(
final OperatingRange operatingRange, final String interfaceName, final double maxBandwidthInMB) {
this(operatingRange, interfaceName, DEFAULT_NETWORK_STAT_FILE, maxBandwidthInMB, Stopwatch.createUnstarted());
}
public LinuxNetworkInWorkerMetric(final OperatingRange operatingRange, final double maxBandwidthInMB) {
this(
operatingRange,
DEFAULT_INTERFACE_NAME,
DEFAULT_NETWORK_STAT_FILE,
maxBandwidthInMB,
Stopwatch.createUnstarted());
}
@VisibleForTesting
LinuxNetworkInWorkerMetric(
final OperatingRange operatingRange,
final String interfaceName,
final String statFile,
final double maxBandwidthInMB,
final Stopwatch stopwatch) {
super(operatingRange, interfaceName, statFile, maxBandwidthInMB, stopwatch);
}
@Override
protected WorkerMetricType getWorkerMetricsType() {
return NETWORK_IN_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,42 @@
package software.amazon.kinesis.worker.metric.impl.linux;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
/**
* Ref java doc for {@link LinuxNetworkWorkerMetricBase}
*/
public class LinuxNetworkOutWorkerMetric extends LinuxNetworkWorkerMetricBase {
private static final WorkerMetricType NETWORK_OUT_WORKER_METRICS_TYPE = WorkerMetricType.NETWORK_OUT;
public LinuxNetworkOutWorkerMetric(
final OperatingRange operatingRange, final String interfaceName, final double maxBandwidthInMB) {
this(operatingRange, interfaceName, DEFAULT_NETWORK_STAT_FILE, maxBandwidthInMB, Stopwatch.createUnstarted());
}
public LinuxNetworkOutWorkerMetric(final OperatingRange operatingRange, final double maxBandwidthInMB) {
this(
operatingRange,
DEFAULT_INTERFACE_NAME,
DEFAULT_NETWORK_STAT_FILE,
maxBandwidthInMB,
Stopwatch.createUnstarted());
}
@VisibleForTesting
LinuxNetworkOutWorkerMetric(
final OperatingRange operatingRange,
final String interfaceName,
final String statFile,
final double maxBandwidthInMB,
final Stopwatch stopwatch) {
super(operatingRange, interfaceName, statFile, maxBandwidthInMB, stopwatch);
}
@Override
protected WorkerMetricType getWorkerMetricsType() {
return NETWORK_OUT_WORKER_METRICS_TYPE;
}
}

View file

@ -0,0 +1,188 @@
package software.amazon.kinesis.worker.metric.impl.linux;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.time.Duration;
import java.util.Map;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableMap;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.worker.metric.OperatingRange;
import software.amazon.kinesis.worker.metric.WorkerMetric;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
/**
* Base class for EC2NetworkWorkerMetrics, this reads and parses /proc/net/dev file and look for the specific
* interface and reads received and transmitted bytes.
* To get the percentage of bandwidth consumed, the fetch bytes are converted to per second (based on the interval
* between invocation) and percentage is calculated by dividing it by the maximum bandwidth in MBps.
*
* When this is invoked for the first time, the value returned is always 0 as the prev values are not available
* to calculate the diff.
* In case the stat file is not present or any other exception occurs, this throws IllegalArgumentException.
*/
@Slf4j
public abstract class LinuxNetworkWorkerMetricBase implements WorkerMetric {
protected static final String DEFAULT_NETWORK_STAT_FILE = "/proc/net/dev";
protected static final String DEFAULT_INTERFACE_NAME = "eth0";
private final Object lockObject = new Object();
private final OperatingRange operatingRange;
private final String interfaceName;
private final String statFile;
private final double maxBandwidthInMBps;
// Stopwatch to keep track of elapsed time between invocation.
private final Stopwatch stopwatch;
public LinuxNetworkWorkerMetricBase(
final OperatingRange operatingRange,
final String interfaceName,
final String statFile,
final double maxBandwidthInMBps,
final Stopwatch stopwatch) {
Preconditions.checkArgument(maxBandwidthInMBps > 0, "maxBandwidthInMBps should be greater than 0.");
this.operatingRange = operatingRange;
this.interfaceName = interfaceName;
this.statFile = statFile;
this.maxBandwidthInMBps = maxBandwidthInMBps;
this.stopwatch = stopwatch;
}
private long lastRx = -1;
private long lastTx = -1;
@Override
public String getShortName() {
return getWorkerMetricsType().getShortName();
}
@Override
public OperatingRange getOperatingRange() {
return this.operatingRange;
}
@Override
public WorkerMetricType getWorkerMetricType() {
return getWorkerMetricsType();
}
/**
* Reads the stat file and find the total bytes (in and out) and divide it by the time elapsed since last read to
* get the bytes per second.
* Converts the bytes per second to MBps and then normalizes it to a percentage of the maximum bandwidth.
* @return WorkerMetricValue with the % of network bandwidth consumed.
*/
@Override
public WorkerMetricValue capture() {
final double percentageOfMaxBandwidth =
convertToMBps(calculateNetworkUsage().get(getWorkerMetricsType())) / maxBandwidthInMBps * 100;
return WorkerMetricValue.builder()
// If maxBandwidthInMBps is less than utilized (could be wrong configuration),
// default to 100 % bandwidth utilization.
.value(Math.min(100, percentageOfMaxBandwidth))
.build();
}
private double convertToMBps(final long bytes) {
final double elapsedTimeInSecond;
if (!stopwatch.isRunning()) {
// stopwatch is not running during the first request only, in this case assume 1 second as elapsed as
// during the first request even bytes are zero, any value of elapsedTimeInSecond does not have any effect.
elapsedTimeInSecond = 1.0;
} else {
// Specifically, getting nanos and converting to seconds to get the decimal precision.
elapsedTimeInSecond = (double) stopwatch.elapsed().toNanos()
/ Duration.ofSeconds(1).toNanos();
}
stopwatch.reset().start();
// Convert bytes to MB
final double totalDataMB = (double) bytes / (1024 * 1024);
if (elapsedTimeInSecond == 0) {
// This should never happen, as getting called twice within 1 nanoSecond is never expected.
// If this happens something is real wrong.
throw new IllegalArgumentException("elapsedTimeInSecond is zero which in incorrect");
}
return totalDataMB / elapsedTimeInSecond;
}
protected abstract WorkerMetricType getWorkerMetricsType();
/**
* Returns the absolute bytes in and out since the last invocation of the method.
* @return Map of WorkerMetricType to bytes
*/
private Map<WorkerMetricType, Long> calculateNetworkUsage() {
BufferedReader bufferedReader = null;
try {
final File net = new File(statFile);
if (net.exists()) {
bufferedReader = new BufferedReader(new FileReader(net));
// skip over header lines
bufferedReader.readLine();
bufferedReader.readLine();
// find specified interface
String line = bufferedReader.readLine();
while (line != null && !line.matches("^\\s*" + interfaceName + ":.*")) {
line = bufferedReader.readLine();
}
if (line == null) {
throw new IllegalArgumentException(
"Failed to parse the file and find interface : " + interfaceName);
}
int n = line.indexOf(':') + 1;
line = line.substring(n).trim();
String[] parts = line.split("\\s+");
long rx = Long.parseLong(parts[0]);
long tx = Long.parseLong(parts[8]);
long diffRx = -1, diffTx = -1;
boolean skip = false;
synchronized (lockObject) {
if (lastRx == -1) {
skip = true;
} else {
diffRx = Math.abs(rx - lastRx);
diffTx = Math.abs(tx - lastTx);
}
lastRx = rx;
lastTx = tx;
}
if (skip) {
return createResponse(0L, 0L);
}
return createResponse(diffRx, diffTx);
} else {
throw new IllegalArgumentException(String.format(
"NetworkWorkerMetrics is not configured properly, file : %s does not exists", this.statFile));
}
} catch (final Throwable t) {
if (t instanceof IllegalArgumentException) {
throw (IllegalArgumentException) t;
}
throw new IllegalArgumentException("Cannot read/parse " + this.statFile, t);
} finally {
try {
if (bufferedReader != null) {
bufferedReader.close();
}
} catch (Throwable x) {
log.warn("Failed to close bufferedReader ", x);
}
}
}
private Map<WorkerMetricType, Long> createResponse(final long diffRx, final long diffTx) {
return ImmutableMap.of(
WorkerMetricType.NETWORK_IN, diffRx,
WorkerMetricType.NETWORK_OUT, diffTx);
}
}

View file

@ -0,0 +1,302 @@
package software.amazon.kinesis.worker.metricstats;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbAttribute;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbBean;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbIgnore;
import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.DynamoDbPartitionKey;
import software.amazon.kinesis.utils.ExponentialMovingAverage;
import software.amazon.kinesis.worker.metric.WorkerMetricType;
import static java.util.Objects.isNull;
/**
* DataModel for a WorkerMetric, this data model is used to store the current state of a Worker in terms of relevant
* WorkerMetric(CPU, Memory, Network).
*
* workerId : unique worker identifier, this is equivalent to the owner attribute from the lease table.
* lastUpdateTime : wall epoch in seconds when the entry was last updated
* metricStats : Map of WorkerMetric to last N values for it. e.g. entry "CPU" : [10,20,12,10] etc
* operatingRange : Map of WorkerMetric to its operating range. First item in the list of values defines the max limit.
* metricStatsMap : runtime computed WorkerMetric name to its average value map. This field is not stored in ddb
* and is used during Lease assignment only
*/
@Data
@Builder
@DynamoDbBean
@NoArgsConstructor
@AllArgsConstructor(access = AccessLevel.PRIVATE)
@Slf4j
public class WorkerMetricStats {
static final String KEY_LAST_UPDATE_TIME = "lut";
static final String KEY_WORKER_ID = "wid";
@Getter(onMethod_ = {@DynamoDbPartitionKey, @DynamoDbAttribute(KEY_WORKER_ID)})
private String workerId;
@Getter(onMethod_ = {@DynamoDbAttribute(KEY_LAST_UPDATE_TIME)})
private Long lastUpdateTime;
@Getter(onMethod_ = {@DynamoDbAttribute("sts")})
private Map<String, List<Double>> metricStats;
@Getter(onMethod_ = {@DynamoDbAttribute("opr")})
private Map<String, List<Long>> operatingRange;
/**
* This map contains the WorkerMetric to its metric stat value. Metric stat value stored in this is exponentially averaged over
* available number of different datapoints.
*/
@Getter(onMethod_ = {@DynamoDbIgnore})
@EqualsAndHashCode.Exclude
@Builder.Default
private Map<String, Double> metricStatsMap = new HashMap<>();
/**
* Alpha value used to compute the exponential moving average for worker metrics values.
*/
@Getter(onMethod_ = {@DynamoDbIgnore})
@EqualsAndHashCode.Exclude
@Builder.Default
private double emaAlpha = 0.2;
/**
* Returns true if given {@param workerMetricName} is available for the current worker else false
*/
public boolean containsMetricStat(final String workerMetricName) {
return metricStats.containsKey(workerMetricName);
}
/**
* Returns the value for given WorkerMetricStats name.
*/
public double getMetricStat(final String workerMetricName) {
return metricStatsMap.computeIfAbsent(workerMetricName, (key) -> computeAverage(metricStats.get(key)));
}
/**
* Increase the WorkerMetricStats value by given increaseLoadPercentage. This is done during execution of LAM and
* as assignments are happening the current metric stat value is increased based on increaseLoadPercentage.
*/
public void extrapolateMetricStatValuesForAddedThroughput(
final Map<String, Double> workerMetricsToFleetLevelAverageMap,
final double averageThroughput,
final double increaseThroughput,
final double averageLeaseCount) {
metricStatsMap.replaceAll((key, value) -> extrapolateMetricsValue(
key,
workerMetricsToFleetLevelAverageMap.get(key),
averageThroughput,
increaseThroughput,
averageLeaseCount));
}
private double extrapolateMetricsValue(
final String metricName,
final double fleetLevelMetricAverage,
final double averageThroughput,
final double increaseThroughput,
final double averageLeaseCount) {
if (averageThroughput > 0) {
return metricStatsMap.get(metricName) + increaseThroughput * fleetLevelMetricAverage / averageThroughput;
} else {
return metricStatsMap.get(metricName) + fleetLevelMetricAverage / averageLeaseCount;
}
}
public boolean willAnyMetricStatsGoAboveAverageUtilizationOrOperatingRange(
final Map<String, Double> workerMetricsToFleetLevelAverageMap,
final double averageThroughput,
final double increaseThroughput,
final double averageLeaseCount) {
for (final String metricStatName : metricStats.keySet()) {
final double fleetLevelAverageForMetric = workerMetricsToFleetLevelAverageMap.get(metricStatName);
final double updatedValueToBe = extrapolateMetricsValue(
metricStatName,
fleetLevelAverageForMetric,
averageThroughput,
increaseThroughput,
averageLeaseCount);
if (updatedValueToBe > fleetLevelAverageForMetric
|| updatedValueToBe > operatingRange.get(metricStatName).get(0)) {
return true;
}
}
return false;
}
/**
* Increase the metric stat value corresponding to the added single lease. This is done during execution of LAM and
* as assignments are happening the load is increase for LAM to determine workers for assignment.
* The increase is done considering that for a WorkerMetric the fleet level average would be met when fleet level
* average leases are assigned to a worker and thus 1 lease addition increases the metric stat value by fleet level
* average of metric stat by averageLeaseCount
*/
public void extrapolateMetricStatValuesForAddedLease(
final Map<String, Double> workerMetricToFleetLevelAverage, final int averageLeaseCount) {
for (Map.Entry<String, Double> workerMetricToMetricStat : metricStatsMap.entrySet()) {
final String workerMetricName = workerMetricToMetricStat.getKey();
final Double updatedValue = workerMetricToMetricStat.getValue()
+ workerMetricToFleetLevelAverage.get(workerMetricName) / averageLeaseCount;
metricStatsMap.replace(workerMetricName, updatedValue);
}
}
/**
* Determines percentage of load to reach the mean for the worker. In case of multiple worker metrics the metric stat
* value closest to mean is used to determine the percentage value. This value is indication of how much load in
* percentage to current load the worker can take to reach mean value.
* @param workerMetricToFleetLevelAverage : WorkerMetric to fleet level mean value.
* @return percentage to reach mean based on the WorkerMetric closest to its corresponding average.
*/
public double computePercentageToReachAverage(final Map<String, Double> workerMetricToFleetLevelAverage) {
double minDifferencePercentage = Double.MAX_VALUE;
for (final String workerMetricName : metricStats.keySet()) {
final double metricStatValue = getMetricStat(workerMetricName);
final double differenceRatio;
if (metricStatValue == 0D) {
// If metric stat value is 0 that means this worker does not have any load so we assume that this worker
// can take 100% more load than the current to reach average.
differenceRatio = 1;
} else {
differenceRatio =
(workerMetricToFleetLevelAverage.get(workerMetricName) - metricStatValue) / metricStatValue;
}
minDifferencePercentage = Math.min(minDifferencePercentage, differenceRatio);
}
return minDifferencePercentage;
}
private Double computeAverage(final List<Double> values) {
if (values.isEmpty()) {
return 0D;
}
final ExponentialMovingAverage average = new ExponentialMovingAverage(emaAlpha);
// Ignore -1 which denotes the WorkerMetric failure when calculating average, as it possible in past
// one of the value is -1 due to some intermediate failure, and it has recovered since.
values.forEach(value -> {
if (value != -1) {
average.add(value);
}
});
return average.getValue();
}
/**
* Returns true if any of the metric stat values has -1 in last index which represents that the metric stat value
* was not successfully fetched in last attempt by worker.
*
* @return true if any metric stat value has -1 in last index, false otherwise.
*/
public boolean isAnyWorkerMetricFailing() {
boolean response = false;
if (isUsingDefaultWorkerMetric()) {
return response;
}
for (final Map.Entry<String, List<Double>> resourceStatsEntry : metricStats.entrySet()) {
if (resourceStatsEntry.getValue().isEmpty()) {
continue;
}
final Double lastEntry = resourceStatsEntry
.getValue()
.get(resourceStatsEntry.getValue().size() - 1);
if (lastEntry != null && lastEntry == -1D) {
response = true;
break;
}
}
if (response) {
log.warn("WorkerStats: {} has a WorkerMetric which is failing.", this);
}
return response;
}
/**
* WorkerMetricStats entry is invalid
* if any of the field from lastUpdateTime, operatingRange, resourcesStats are not present or
* if resourcesStats is empty or
* if any of the WorkerMetrics having resourceStats does not have operatingRange or
* if operating range values are not present or
* if maxUtilization is 0 for any WorkerMetric
* @return true if the entry is valid false otherwise.
*/
public boolean isValidWorkerMetric() {
if (isNull(lastUpdateTime)) {
return false;
}
if (isUsingDefaultWorkerMetric()) {
return true;
}
if (isNull(metricStats) || isNull(operatingRange)) {
return false;
}
for (final Map.Entry<String, List<Double>> entry : metricStats.entrySet()) {
if (!operatingRange.containsKey(entry.getKey())) {
return false;
}
}
for (final Map.Entry<String, List<Long>> operatingRangeEntry : operatingRange.entrySet()) {
// If operatingRange for a WorkerMetric is missing or if maxUtilization is 0 then its not valid entry.
if (operatingRangeEntry.getValue().isEmpty()
|| operatingRangeEntry.getValue().get(0) == 0) {
return false;
}
}
return true;
}
public boolean isAnyWorkerMetricAboveAverageUtilizationOrOperatingRange(
final Map<String, Double> workerMetricToFleetLevelAverage) {
for (final String workerMetricName : metricStats.keySet()) {
final double value = getMetricStat(workerMetricName);
if (value > workerMetricToFleetLevelAverage.get(workerMetricName)) {
return true;
}
}
// check if any metric stat value is above operating range.
return workerMetricToFleetLevelAverage.keySet().stream().anyMatch(this::isWorkerMetricAboveOperatingRange);
}
/**
* If a worker is not using an explicit WorkerMetric such as CPU, Memory, or Network, then it
* is said to be using the default WorkerMetric. Load management then falls back to throughput.
* @return true if the worker is not using an explicit WorkerMetric.
*/
public boolean isUsingDefaultWorkerMetric() {
if ((metricStats == null || metricStats.isEmpty()) && (operatingRange == null || operatingRange.isEmpty())) {
return true;
}
if (metricStats != null) {
return metricStats.entrySet().stream()
.anyMatch(entry -> entry.getKey().equals(WorkerMetricType.THROUGHPUT.name()));
}
return false;
}
/**
* Evaluates if the given metric stat is above operatingRange for the given WorkerMetric name. If the WorkerMetric
* does not exist returns false
* @param workerMetricName WorkerMetric name to evaluate
* @return true if metric stat exists and is above operatingRange for the WorkerMetric
*/
public boolean isWorkerMetricAboveOperatingRange(final String workerMetricName) {
return metricStatsMap.containsKey(workerMetricName)
&& metricStatsMap.get(workerMetricName)
> operatingRange.get(workerMetricName).get(0);
}
}

View file

@ -0,0 +1,219 @@
package software.amazon.kinesis.worker.metricstats;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.NotNull;
import software.amazon.awssdk.core.waiters.WaiterResponse;
import software.amazon.awssdk.enhanced.dynamodb.DynamoDbAsyncTable;
import software.amazon.awssdk.enhanced.dynamodb.DynamoDbEnhancedAsyncClient;
import software.amazon.awssdk.enhanced.dynamodb.Expression;
import software.amazon.awssdk.enhanced.dynamodb.Key;
import software.amazon.awssdk.enhanced.dynamodb.TableSchema;
import software.amazon.awssdk.enhanced.dynamodb.model.DeleteItemEnhancedRequest;
import software.amazon.awssdk.enhanced.dynamodb.model.UpdateItemEnhancedRequest;
import software.amazon.awssdk.services.dynamodb.DynamoDbAsyncClient;
import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
import software.amazon.awssdk.services.dynamodb.model.BillingMode;
import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException;
import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest;
import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse;
import software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput;
import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException;
import software.amazon.awssdk.services.dynamodb.model.TableDescription;
import software.amazon.awssdk.services.dynamodb.model.TableStatus;
import software.amazon.awssdk.services.dynamodb.waiters.DynamoDbAsyncWaiter;
import software.amazon.kinesis.leases.LeaseManagementConfig.WorkerMetricsTableConfig;
import software.amazon.kinesis.leases.exceptions.DependencyException;
import static java.util.Objects.nonNull;
import static software.amazon.kinesis.worker.metricstats.WorkerMetricStats.KEY_LAST_UPDATE_TIME;
import static software.amazon.kinesis.worker.metricstats.WorkerMetricStats.KEY_WORKER_ID;
@Slf4j
public class WorkerMetricStatsDAO {
private final DynamoDbEnhancedAsyncClient dynamoDbEnhancedAsyncClient;
private final DynamoDbAsyncTable<WorkerMetricStats> table;
private final DynamoDbAsyncClient dynamoDbAsyncClient;
private final WorkerMetricsTableConfig tableConfig;
private final Long workerMetricsReporterFrequencyMillis;
public WorkerMetricStatsDAO(
final DynamoDbAsyncClient dynamoDbAsyncClient,
final WorkerMetricsTableConfig tableConfig,
final Long workerMetricsReporterFrequencyMillis) {
this.dynamoDbAsyncClient = dynamoDbAsyncClient;
this.dynamoDbEnhancedAsyncClient = DynamoDbEnhancedAsyncClient.builder()
.dynamoDbClient(dynamoDbAsyncClient)
.build();
this.table = dynamoDbEnhancedAsyncClient.table(
tableConfig.tableName(), TableSchema.fromBean(WorkerMetricStats.class));
this.tableConfig = tableConfig;
this.workerMetricsReporterFrequencyMillis = workerMetricsReporterFrequencyMillis;
}
/**
* Performs initialization of the WorkerMetricStats DAO and table.
* This will create the table if it doesn't exist.
*/
public void initialize() throws DependencyException {
createTableIfDoesNotExist();
}
/**
* Updates the workerMetrics for the provided worker, method ignores the null attributes and overrides
* the only non-null from {@param workerMetrics}. This is a blocking call.
*
* @param workerMetrics : Updated WorkerMetricStats object, resourceStats, workerId and lastUpdateTime are
* required fields from {@param workerMetrics}
*/
public void updateMetrics(final WorkerMetricStats workerMetrics) {
validateWorkerMetrics(workerMetrics);
final UpdateItemEnhancedRequest<WorkerMetricStats> request = UpdateItemEnhancedRequest.builder(
WorkerMetricStats.class)
.item(workerMetrics)
.ignoreNulls(true)
.build();
unwrappingFuture(() -> table.updateItem(request));
}
/**
* Deletes the WorkerMetricStats entry with conditional check on lastUpdateTime, if the worker has come alive and
* updated the lastUpdateTime then we no longer need to perform the deletion.
* @param workerMetrics WorkerMetricStats that needs to be deleted.
* @return
*/
public boolean deleteMetrics(final WorkerMetricStats workerMetrics) {
Preconditions.checkArgument(nonNull(workerMetrics.getWorkerId()), "WorkerID is not provided");
Preconditions.checkArgument(nonNull(workerMetrics.getLastUpdateTime()), "LastUpdateTime is not provided");
final DeleteItemEnhancedRequest request = DeleteItemEnhancedRequest.builder()
.key(Key.builder().partitionValue(workerMetrics.getWorkerId()).build())
.conditionExpression(Expression.builder()
.expression(String.format("#key = :value AND attribute_exists (%s)", KEY_WORKER_ID))
.expressionNames(ImmutableMap.of("#key", KEY_LAST_UPDATE_TIME))
.expressionValues(ImmutableMap.of(
":value", AttributeValue.fromN(Long.toString(workerMetrics.getLastUpdateTime()))))
.build())
.build();
try {
unwrappingFuture(() -> table.deleteItem(request));
return true;
} catch (final ConditionalCheckFailedException e) {
log.warn(
"Failed to delete the WorkerMetricStats due to conditional failure for worker : {}",
workerMetrics,
e);
return false;
}
}
private void validateWorkerMetrics(final WorkerMetricStats workerMetrics) {
Preconditions.checkArgument(nonNull(workerMetrics.getMetricStats()), "ResourceMetrics not provided");
final List<String> entriesWithoutValues = workerMetrics.getMetricStats().entrySet().stream()
.filter(entry -> entry.getValue() == null || entry.getValue().isEmpty())
.map(Map.Entry::getKey)
.collect(Collectors.toList());
Preconditions.checkArgument(
entriesWithoutValues.isEmpty(), "Following metric stats dont have any values " + entriesWithoutValues);
Preconditions.checkArgument(nonNull(workerMetrics.getLastUpdateTime()), "LastUpdateTime field not set");
// If the LastUpdateTime field is 2x older than the reporter interval, it is considered stale.
Preconditions.checkArgument(
Duration.between(Instant.ofEpochSecond(workerMetrics.getLastUpdateTime()), Instant.now())
.toMillis()
< 2 * workerMetricsReporterFrequencyMillis,
"LastUpdateTime is more than 2x older than workerMetricsReporterFrequencyMillis");
}
/**
* Performs the scan on the storage and returns list of all workerMetricStats objects.
*
* @return : List of all worker metric stats
*/
public List<WorkerMetricStats> getAllWorkerMetricStats() {
log.debug("Scanning DDB table {}", table.tableName());
final List<WorkerMetricStats> workerMetricStats = new ArrayList<>();
unwrappingFuture(() -> table.scan().items().subscribe(workerMetricStats::add));
return workerMetricStats;
}
private TableDescription getTableDescription() {
try {
final DescribeTableResponse response = unwrappingFuture(() -> dynamoDbAsyncClient.describeTable(
DescribeTableRequest.builder().tableName(table.tableName()).build()));
return response.table();
} catch (final ResourceNotFoundException e) {
return null;
}
}
private void createTableIfDoesNotExist() throws DependencyException {
TableDescription tableDescription = getTableDescription();
if (tableDescription == null) {
unwrappingFuture(getWorkerMetricsDynamoTableCreator());
tableDescription = getTableDescription();
log.info("Table : {} created.", table.tableName());
} else {
log.info("Table : {} already existing, skipping creation...", table.tableName());
}
if (tableDescription.tableStatus() != TableStatus.ACTIVE) {
log.info("Waiting for DDB Table: {} to become active", table.tableName());
try (final DynamoDbAsyncWaiter waiter = dynamoDbAsyncClient.waiter()) {
final WaiterResponse<DescribeTableResponse> response =
unwrappingFuture(() -> waiter.waitUntilTableExists(
r -> r.tableName(table.tableName()), o -> o.waitTimeout(Duration.ofMinutes(10))));
response.matched()
.response()
.orElseThrow(() -> new DependencyException(new IllegalStateException(
"Creating WorkerMetricStats table timed out",
response.matched().exception().orElse(null))));
}
}
}
@NotNull
private Supplier<CompletableFuture<Void>> getWorkerMetricsDynamoTableCreator() {
final Supplier<CompletableFuture<Void>> tableCreator;
if (tableConfig.billingMode() == BillingMode.PROVISIONED) {
log.info(
"Creating worker metric stats table {} in provisioned mode with {}wcu and {}rcu",
tableConfig.tableName(),
tableConfig.writeCapacity(),
tableConfig.readCapacity());
tableCreator = () -> table.createTable(r -> r.provisionedThroughput(ProvisionedThroughput.builder()
.readCapacityUnits(tableConfig.readCapacity())
.writeCapacityUnits(tableConfig.writeCapacity())
.build()));
} else {
tableCreator = table::createTable;
}
return tableCreator;
}
static <T> T unwrappingFuture(final Supplier<CompletableFuture<T>> supplier) {
try {
return supplier.get().join();
} catch (final CompletionException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
}
throw e;
}
}
}

View file

@ -0,0 +1,227 @@
package software.amazon.kinesis.worker.metricstats;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.EvictingQueue;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Queues;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import software.amazon.awssdk.services.cloudwatch.model.StandardUnit;
import software.amazon.awssdk.utils.ThreadFactoryBuilder;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
import software.amazon.kinesis.worker.metric.WorkerMetric;
/**
* WorkerMetricStatsManager is a class that manages the collection of raw WorkerMetricStats values for the list of WorkerMetricStats
* periodically and store in a bounded in-memory queue.
* This class runs a periodic thread at every {@link #inMemoryStatsCaptureThreadFrequencyMillis} interval which
* captures each WorkerMetricStats's raw value and stores them in {@link #workerMetricsToRawHighFreqValuesMap} for each.
* When computeStats is invoked, the method drains the in-memory raw values queue for each WorkerMetricStats and computes the
* average and stores the computed average in #computedAverageStats for each WorkerMetricStats.
* For each WorkerMetricStats last {@link #maxMetricStatsCount} values are captured in {@link #computedAverageMetrics}
*
* This class is thread safe.
*/
@Slf4j
@KinesisClientInternalApi
public final class WorkerMetricStatsManager {
/**
* 6 digit after decimal
*/
private static final int DEFAULT_AVERAGE_VALUES_DIGIT_AFTER_DECIMAL = 6;
private static final String METRICS_OPERATION_WORKER_STATS_REPORTER = "WorkerMetricStatsReporter";
static final String METRICS_IN_MEMORY_REPORTER_FAILURE = "InMemoryMetricStatsReporterFailure";
// 1 value per sec gives 5 minutes worth of past data for 300 count which is sufficient.
// In case of reporter running more frequently than 5 minutes the queue will not reach this value anyway.
private static final int HIGH_FREQUENCY_STATS_COUNT = 300;
private static final long SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS = 60L;
private final ScheduledExecutorService scheduledExecutorService;
/**
* Max count of values per WorkerMetricStats that is recorded in the storage.
*/
private final int maxMetricStatsCount;
/**
* List of WorkerMetricStats configured for the application, the values from these will be recorded in the storage.
*/
private final List<WorkerMetric> workerMetricList;
/**
* Map of WorkerMetricStats to its trailing (#maxMetricStatsCount) values.
*/
@Getter(AccessLevel.PACKAGE)
private final Map<WorkerMetric, Queue<Double>> computedAverageMetrics;
/**
* Map of the WorkerMetricStats to its raw values since the last flush to storage was done.
*/
@Getter(AccessLevel.PACKAGE)
private final Map<WorkerMetric, Queue<Double>> workerMetricsToRawHighFreqValuesMap;
/**
* Frequency for capturing raw WorkerMetricsValues in millis.
*/
private final long inMemoryStatsCaptureThreadFrequencyMillis;
private final MetricsFactory metricsFactory;
private ScheduledFuture<?> managerProcessFuture;
public WorkerMetricStatsManager(
final int maxMetricStatsCount,
final List<WorkerMetric> workerMetricList,
final MetricsFactory metricsFactory,
long inMemoryStatsCaptureThreadFrequencyMillis) {
// Set thread as daemon to not block VM from exit.
this.scheduledExecutorService = Executors.newScheduledThreadPool(
1,
new ThreadFactoryBuilder()
.daemonThreads(true)
.threadNamePrefix("worker-metrics-manager")
.build());
this.maxMetricStatsCount = maxMetricStatsCount;
this.workerMetricList = workerMetricList;
this.computedAverageMetrics = new HashMap<>();
this.workerMetricsToRawHighFreqValuesMap = new HashMap<>();
this.metricsFactory = metricsFactory;
this.inMemoryStatsCaptureThreadFrequencyMillis = inMemoryStatsCaptureThreadFrequencyMillis;
init();
}
private void init() {
for (final WorkerMetric workerMetric : workerMetricList) {
computedAverageMetrics.put(workerMetric, EvictingQueue.create(maxMetricStatsCount));
workerMetricsToRawHighFreqValuesMap.put(
workerMetric, Queues.synchronizedQueue(EvictingQueue.create(HIGH_FREQUENCY_STATS_COUNT)));
}
log.info(
"Completed initialization with maxMetricStatsCount : {} and total WorkerMetricStats : {}",
maxMetricStatsCount,
workerMetricList.size());
}
public void startManager() {
managerProcessFuture = scheduledExecutorService.scheduleWithFixedDelay(
this::recordWorkerMetrics, 0, inMemoryStatsCaptureThreadFrequencyMillis, TimeUnit.MILLISECONDS);
log.info("Started manager process...");
}
public void stopManager() {
if (managerProcessFuture != null) {
managerProcessFuture.cancel(false);
}
if (!scheduledExecutorService.isShutdown()) {
scheduledExecutorService.shutdown();
try {
if (scheduledExecutorService.awaitTermination(SCHEDULER_SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
scheduledExecutorService.shutdownNow();
}
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("Interrupted when shutting down the scheduler, forcing shutdown", e);
scheduledExecutorService.shutdownNow();
}
}
}
private void recordWorkerMetrics() {
for (final WorkerMetric workerMetric : workerMetricList) {
final Optional<Double> value = fetchWorkerMetricsValue(workerMetric);
value.ifPresent(aDouble ->
workerMetricsToRawHighFreqValuesMap.get(workerMetric).add(aDouble));
}
}
private Optional<Double> fetchWorkerMetricsValue(final WorkerMetric workerMetric) {
try {
final Double value = workerMetric.capture().getValue();
return Optional.of(value);
} catch (final Throwable throwable) {
log.error(
"WorkerMetricStats {} failure : ",
workerMetric.getWorkerMetricType().name(),
throwable);
final MetricsScope scope =
MetricsUtil.createMetricsWithOperation(metricsFactory, METRICS_OPERATION_WORKER_STATS_REPORTER);
try {
scope.addData(METRICS_IN_MEMORY_REPORTER_FAILURE, 1, StandardUnit.COUNT, MetricsLevel.SUMMARY);
} finally {
MetricsUtil.endScope(scope);
}
return Optional.empty();
}
}
/**
* Computes the metric stats for each WorkerMetricStats by averaging the values in inMemoryQueue and returns last
* {@link WorkerMetricStatsManager#maxMetricStatsCount } averaged values for each WorkerMetricStats.
*
* In the case of empty inMemoryQueue, computedStats has -1 value to denote that specific WorkerMetricStats has failed.
* @return Map of WorkerMetricStats shortName to averaged {@link WorkerMetricStatsManager#maxMetricStatsCount } values.
*/
public synchronized Map<String, List<Double>> computeMetrics() {
final Map<String, List<Double>> result = new HashMap<>();
workerMetricsToRawHighFreqValuesMap.forEach((workerMetrics, statsQueue) -> {
final List<Double> currentWorkerMetricsStats = drainQueue(statsQueue);
final Queue<Double> computedMetrics = computedAverageMetrics.get(workerMetrics);
if (currentWorkerMetricsStats.isEmpty()) {
// In case currentWorkerMetricsStats is empty that means values from workerMetrics were not capture due
// to some
// reason, and thus there are no recent values, compute the value to be -1 to denote workerMetrics
// failure
computedMetrics.add(-1D);
} else {
computedMetrics.add(computeAverage(currentWorkerMetricsStats));
}
result.put(workerMetrics.getShortName(), new ArrayList<>(computedMetrics));
});
return result;
}
/**
* Gets the operating range for each WorkerMetricStats that is registered.
* @return Map of WorkerMetricStats to list of two values, first value is max utilization, and second value is variance %.
*/
public Map<String, List<Long>> getOperatingRange() {
final Map<String, List<Long>> operatingRange = new HashMap<>();
workerMetricList.forEach(
workerMetrics -> operatingRange.put(workerMetrics.getShortName(), ImmutableList.of((long)
workerMetrics.getOperatingRange().getMaxUtilization())));
return operatingRange;
}
private static List<Double> drainQueue(final Queue<Double> queue) {
final List<Double> elements = new ArrayList<>();
final int queueLength = queue.size();
for (int i = 0; i < queueLength; ++i) {
elements.add(queue.poll());
}
return elements;
}
private Double computeAverage(final List<Double> values) {
final double average =
values.stream().mapToDouble(Double::doubleValue).average().orElse(0D);
return BigDecimal.valueOf(average)
.setScale(DEFAULT_AVERAGE_VALUES_DIGIT_AFTER_DECIMAL, RoundingMode.HALF_UP)
.doubleValue();
}
}

View file

@ -0,0 +1,68 @@
/*
* Copyright 2024 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.worker.metricstats;
import java.time.Instant;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.metrics.MetricsLevel;
import software.amazon.kinesis.metrics.MetricsScope;
import software.amazon.kinesis.metrics.MetricsUtil;
/**
* Reporter that is periodically executed to report WorkerMetricStats. It collects
* the in memory metric stats and writes into the DDB WorkerMetricStats table.
*/
@Slf4j
@RequiredArgsConstructor
@KinesisClientInternalApi
public class WorkerMetricStatsReporter implements Runnable {
private final MetricsFactory metricsFactory;
private final String workerIdentifier;
private final WorkerMetricStatsManager workerMetricsManager;
private final WorkerMetricStatsDAO workerMetricsDAO;
@Override
public void run() {
final MetricsScope scope = MetricsUtil.createMetricsWithOperation(metricsFactory, "WorkerMetricStatsReporter");
final long startTime = System.currentTimeMillis();
boolean success = false;
try {
/*
* OperatingRange value fetched during the initialization and is same afterwards. It's possible
* to update OperatingRange only in first call and then skip, but we do not want to do that to avoid
* case where a worker can have a failure for some time and thus does not update the workerMetrics entry
* and LeaseAssigmentManager cleans it and then worker ends updating entry without operating range.
*/
final WorkerMetricStats workerMetrics = WorkerMetricStats.builder()
.workerId(workerIdentifier)
.metricStats(workerMetricsManager.computeMetrics())
.operatingRange(workerMetricsManager.getOperatingRange())
.lastUpdateTime(Instant.now().getEpochSecond())
.build();
workerMetricsDAO.updateMetrics(workerMetrics);
success = true;
} catch (final Exception e) {
log.error("Failed to update worker metric stats for worker : {}", workerIdentifier, e);
} finally {
MetricsUtil.addWorkerIdentifier(scope, workerIdentifier);
MetricsUtil.addSuccessAndLatency(scope, success, startTime, MetricsLevel.SUMMARY);
MetricsUtil.endScope(scope);
}
}
}

View file

@ -0,0 +1,111 @@
package software.amazon.kinesis.worker.platform;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Optional;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.VisibleForTesting;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_PROC;
/**
* Provides resource metadata for EC2.
*/
@KinesisClientInternalApi
@Slf4j
public class Ec2Resource implements ResourceMetadataProvider {
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/retrieve-iid.html
private static final String IMDS_URL = "http://169.254.169.254/latest/dynamic/instance-identity/document";
private static final String TOKEN_URL = "http://169.254.169.254/latest/api/token";
private static final int EC2_INSTANCE_METADATA_TIMEOUT_MILLIS = 5000;
private final UrlOpener identityDocumentUrl;
private final UrlOpener tokenUrl;
@VisibleForTesting
Ec2Resource(UrlOpener identityDocumentUrl, UrlOpener tokenUrl) {
this.identityDocumentUrl = identityDocumentUrl;
this.tokenUrl = tokenUrl;
}
/**
* Factory method to create an instance of Ec2Resource.
*
* @return Ec2Resource instance
*/
public static Ec2Resource create() {
try {
return new Ec2Resource(new UrlOpener(new URL(IMDS_URL)), new UrlOpener(new URL(TOKEN_URL)));
} catch (MalformedURLException e) {
// It should not throw unless it's unit testing.
throw new IllegalArgumentException(e);
}
}
private boolean isEc2() {
try {
final HttpURLConnection connection = identityDocumentUrl.openConnection();
connection.setRequestMethod("GET");
// IMDS v2 requires IMDS token
connection.setRequestProperty("X-aws-ec2-metadata-token", fetchImdsToken());
connection.setConnectTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS);
connection.setReadTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS);
if (connection.getResponseCode() == 200) {
return true;
}
} catch (Exception e) {
// TODO: probably need to add retries as well.
log.error("Unable to retrieve instance metadata", e);
}
return false;
}
private String fetchImdsToken() {
try {
final HttpURLConnection connection = tokenUrl.openConnection();
connection.setRequestMethod("PUT");
connection.setRequestProperty("X-aws-ec2-metadata-token-ttl-seconds", "600");
connection.setConnectTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS);
connection.setReadTimeout(EC2_INSTANCE_METADATA_TIMEOUT_MILLIS);
if (connection.getResponseCode() == 200) {
return new BufferedReader(new InputStreamReader(tokenUrl.getInputStream(connection)))
.lines()
.collect(Collectors.joining());
}
} catch (Exception e) {
log.warn(
"Unable to retrieve IMDS token. It could mean that the instance is not EC2 or is using IMDS V1", e);
}
return null;
}
/**
* {@inheritDoc}
*/
@Override
public boolean isOnPlatform() {
return isEc2();
}
/**
* {@inheritDoc}
*/
@Override
public ComputePlatform getPlatform() {
return ComputePlatform.EC2;
}
/**
* {@inheritDoc}
*/
@Override
public Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider() {
return Optional.of(LINUX_PROC).filter(OperatingRangeDataProvider::isProvider);
}
}

View file

@ -0,0 +1,59 @@
package software.amazon.kinesis.worker.platform;
import java.util.Map;
import java.util.Optional;
import org.jetbrains.annotations.VisibleForTesting;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_ECS_METADATA_KEY_V4;
/**
* Provides resource metadata for ECS.
*/
@KinesisClientInternalApi
public class EcsResource implements ResourceMetadataProvider {
static final String ECS_METADATA_KEY_V3 = "ECS_CONTAINER_METADATA_URI";
static final String ECS_METADATA_KEY_V4 = "ECS_CONTAINER_METADATA_URI_V4";
private final Map<String, String> sysEnv;
@VisibleForTesting
EcsResource(Map<String, String> sysEnv) {
this.sysEnv = sysEnv;
}
/**
* Factory method to create an instance of EcsResource.
*
* @return an instance of EcsResource
*/
public static EcsResource create() {
return new EcsResource(System.getenv());
}
/**
* {@inheritDoc}
*/
@Override
public boolean isOnPlatform() {
return !sysEnv.getOrDefault(ECS_METADATA_KEY_V3, "").isEmpty()
|| !sysEnv.getOrDefault(ECS_METADATA_KEY_V4, "").isEmpty();
}
/**
* {@inheritDoc}
*/
@Override
public ComputePlatform getPlatform() {
return ComputePlatform.ECS;
}
/**
* {@inheritDoc}
*/
@Override
public Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider() {
return Optional.of(LINUX_ECS_METADATA_KEY_V4).filter(OperatingRangeDataProvider::isProvider);
}
}

View file

@ -0,0 +1,61 @@
package software.amazon.kinesis.worker.platform;
import java.io.File;
import java.util.Optional;
import java.util.stream.Stream;
import org.jetbrains.annotations.VisibleForTesting;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_EKS_CGROUP_V1;
import static software.amazon.kinesis.worker.platform.OperatingRangeDataProvider.LINUX_EKS_CGROUP_V2;
/**
* Provides resource metadata for EKS.
*/
@KinesisClientInternalApi
public class EksResource implements ResourceMetadataProvider {
private static final String K8S_TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token";
private final String k8sTokenPath;
@VisibleForTesting
EksResource(String k8sTokenPath) {
this.k8sTokenPath = k8sTokenPath;
}
/**
* Factory method to create an instance of EksResource.
*
* @return an instance of EksResource
*/
public static EksResource create() {
return new EksResource(K8S_TOKEN_PATH);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isOnPlatform() {
return new File(this.k8sTokenPath).exists();
}
/**
* {@inheritDoc}
*/
@Override
public ComputePlatform getPlatform() {
return ComputePlatform.EKS;
}
/**
* {@inheritDoc}
*/
@Override
public Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider() {
// It is only possible that either cgroupv1 or cgroupv2 is mounted
return Stream.of(LINUX_EKS_CGROUP_V2, LINUX_EKS_CGROUP_V1)
.filter(OperatingRangeDataProvider::isProvider)
.findFirst();
}
}

View file

@ -0,0 +1,73 @@
package software.amazon.kinesis.worker.platform;
import java.io.File;
import static software.amazon.kinesis.worker.platform.EcsResource.ECS_METADATA_KEY_V4;
/**
* Enum representing the different operating range metadata providers.
*/
public enum OperatingRangeDataProvider {
LINUX_EKS_CGROUP_V1 {
@Override
public boolean isProvider() {
if (!OperatingRangeDataProvider.isLinux()) {
return false;
}
// Check if the cgroup v2 specific file does NOT exist
final File cgroupV2File = new File("/sys/fs/cgroup/cgroup.controllers");
if (cgroupV2File.exists()) {
return false;
}
// Check for common cgroup v1 directories like memory or cpu
final File memoryCgroup = new File("/sys/fs/cgroup/memory");
final File cpuCgroup = new File("/sys/fs/cgroup/cpu");
return memoryCgroup.exists() || cpuCgroup.exists();
}
},
LINUX_EKS_CGROUP_V2 {
@Override
public boolean isProvider() {
if (!OperatingRangeDataProvider.isLinux()) {
return false;
}
// Check if the cgroup v2 specific file exists
final File cgroupV2File = new File("/sys/fs/cgroup/cgroup.controllers");
return cgroupV2File.exists();
}
},
LINUX_ECS_METADATA_KEY_V4 {
@Override
public boolean isProvider() {
if (!OperatingRangeDataProvider.isLinux()) {
return false;
}
return !System.getenv().getOrDefault(ECS_METADATA_KEY_V4, "").isEmpty();
}
},
LINUX_PROC {
@Override
public boolean isProvider() {
if (!OperatingRangeDataProvider.isLinux()) {
return false;
}
// Check if /proc directory exists (common in Linux environments)
return new File("/proc").exists();
}
};
private static boolean isLinux() {
return System.getProperty("os.name").toLowerCase().contains("linux");
}
/**
* Abstract method to check if the provider is supported on the current platform.
*
* @return true if the provider is supported, false otherwise.
*/
public abstract boolean isProvider();
}

View file

@ -0,0 +1,42 @@
package software.amazon.kinesis.worker.platform;
import java.util.Optional;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
/**
* Interface for providing resource metadata for worker.
*/
@KinesisClientInternalApi
public interface ResourceMetadataProvider {
/**
* Enum representing the different compute platforms.
*/
enum ComputePlatform {
EC2,
ECS,
EKS,
UNKNOWN
}
/**
* Check if the worker is running on the specific platform.
*
* @return true if the worker is running on the specific platform, false otherwise.
*/
boolean isOnPlatform();
/**
* Get the name of the compute platform.
*
* @return the platform represent by the class.
*/
ComputePlatform getPlatform();
/**
* Get the operating range data provider.
*
* @return the operating range data provider.
*/
Optional<OperatingRangeDataProvider> getOperatingRangeDataProvider();
}

View file

@ -0,0 +1,39 @@
package software.amazon.kinesis.worker.platform;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import lombok.RequiredArgsConstructor;
import software.amazon.kinesis.annotations.KinesisClientInternalApi;
/**
* Utility class to open a URL and get the input stream.
*/
@RequiredArgsConstructor
@KinesisClientInternalApi
class UrlOpener {
private final URL url;
/**
* Open the URL and return the connection.
*
* @return a HttpURLConnection.
* @throws IOException if a connection cannot be established.
*/
public HttpURLConnection openConnection() throws IOException {
return (HttpURLConnection) url.openConnection();
}
/**
* Get the input stream from the connection.
*
* @param connection the connection to get the input stream from.
* @return the InputStream for the data.
* @throws IOException if an error occurs while getting the input stream.
*/
public InputStream getInputStream(HttpURLConnection connection) throws IOException {
return connection.getInputStream();
}
}

View file

@ -0,0 +1 @@
Sample test ECS metadata for Amazon ECS task metadata v4. For more information, see https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint-v4-examples.html

View file

@ -0,0 +1,44 @@
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "curl",
"DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600",
"Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest",
"ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "curl",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "24"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:15:07.620912337Z",
"StartedAt": "2020-10-02T00:15:08.062559351Z",
"Type": "NORMAL",
"LogDriver": "awslogs",
"LogOptions": {
"awslogs-create-group": "true",
"awslogs-group": "/ecs/metadata",
"awslogs-region": "us-west-2",
"awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665"
},
"ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.100"
],
"AttachmentIndex": 0,
"MACAddress": "0e:9e:32:c7:48:85",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}

View file

@ -0,0 +1,130 @@
{
"read": "2020-10-02T00:61:13.410254284Z",
"preread": "2020-10-02T00:51:12.406202398Z",
"pids_stats": {
"current": 3
},
"blkio_stats": {
"io_service_bytes_recursive": [
],
"io_serviced_recursive": [
],
"io_queue_recursive": [
],
"io_service_time_recursive": [
],
"io_wait_time_recursive": [
],
"io_merged_recursive": [
],
"io_time_recursive": [
],
"sectors_recursive": [
]
},
"num_procs": 0,
"storage_stats": {
},
"cpu_stats": {
"cpu_usage": {
"total_usage": 150000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 200000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"precpu_stats": {
"cpu_usage": {
"total_usage": 0,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 0,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"memory_stats": {
"usage": 1806336,
"max_usage": 6299648,
"stats": {
"active_anon": 606208,
"active_file": 0,
"cache": 0,
"dirty": 0,
"hierarchical_memory_limit": 134217728,
"hierarchical_memsw_limit": 268435456,
"inactive_anon": 0,
"inactive_file": 0,
"mapped_file": 0,
"pgfault": 4185,
"pgmajfault": 0,
"pgpgin": 2926,
"pgpgout": 2778,
"rss": 606208,
"rss_huge": 0,
"total_active_anon": 606208,
"total_active_file": 0,
"total_cache": 0,
"total_dirty": 0,
"total_inactive_anon": 0,
"total_inactive_file": 0,
"total_mapped_file": 0,
"total_pgfault": 4185,
"total_pgmajfault": 0,
"total_pgpgin": 2926,
"total_pgpgout": 2778,
"total_rss": 606208,
"total_rss_huge": 0,
"total_unevictable": 0,
"total_writeback": 0,
"unevictable": 0,
"writeback": 0
},
"limit": 134217728
},
"name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01",
"id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af",
"networks": {
"eth0": {
"rx_bytes": 84,
"rx_packets": 2,
"rx_errors": 0,
"rx_dropped": 0,
"tx_bytes": 84,
"tx_packets": 2,
"tx_errors": 0,
"tx_dropped": 0
}
},
"network_rate_stats": {
"rx_bytes_per_sec": 0,
"tx_bytes_per_sec": 0
}
}

View file

@ -0,0 +1,56 @@
{
"Cluster": "default",
"TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"Family": "curltest",
"ServiceName": "MyService",
"Revision": "26",
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 4,
"Memory": 128
},
"PullStartedAt": "2020-10-02T00:43:06.202617438Z",
"PullStoppedAt": "2020-10-02T00:43:06.31288465Z",
"AvailabilityZone": "us-west-2d",
"VPCID": "vpc-1234567890abcdef0",
"LaunchType": "EC2",
"Containers": [
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "~internal~ecs~pause",
"DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00",
"Image": "amazon/amazon-ecs-pause:0.1.0",
"ImageID": "",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "~internal~ecs~pause",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "26"
},
"DesiredStatus": "RESOURCES_PROVISIONED",
"KnownStatus": "RESOURCES_PROVISIONED",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:43:05.602352471Z",
"StartedAt": "2020-10-02T00:43:06.076707576Z",
"Type": "CNI_PAUSE",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.61"
],
"AttachmentIndex": 0,
"MACAddress": "0e:10:e2:01:bd:91",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}
]
}

View file

@ -0,0 +1,44 @@
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "curl",
"DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600",
"Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest",
"ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "curl",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "24"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:15:07.620912337Z",
"StartedAt": "2020-10-02T00:15:08.062559351Z",
"Type": "NORMAL",
"LogDriver": "awslogs",
"LogOptions": {
"awslogs-create-group": "true",
"awslogs-group": "/ecs/metadata",
"awslogs-region": "us-west-2",
"awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665"
},
"ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.100"
],
"AttachmentIndex": 0,
"MACAddress": "0e:9e:32:c7:48:85",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}

View file

@ -0,0 +1,130 @@
{
"read": "2020-10-02T00:61:13.410254284Z",
"preread": "2020-10-02T00:51:12.406202398Z",
"pids_stats": {
"current": 3
},
"blkio_stats": {
"io_service_bytes_recursive": [
],
"io_serviced_recursive": [
],
"io_queue_recursive": [
],
"io_service_time_recursive": [
],
"io_wait_time_recursive": [
],
"io_merged_recursive": [
],
"io_time_recursive": [
],
"sectors_recursive": [
]
},
"num_procs": 0,
"storage_stats": {
},
"cpu_stats": {
"cpu_usage": {
"total_usage": 150000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 100000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"precpu_stats": {
"cpu_usage": {
"total_usage": 100000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 100000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"memory_stats": {
"usage": 1806336,
"max_usage": 6299648,
"stats": {
"active_anon": 606208,
"active_file": 0,
"cache": 0,
"dirty": 0,
"hierarchical_memory_limit": 134217728,
"hierarchical_memsw_limit": 268435456,
"inactive_anon": 0,
"inactive_file": 0,
"mapped_file": 0,
"pgfault": 4185,
"pgmajfault": 0,
"pgpgin": 2926,
"pgpgout": 2778,
"rss": 606208,
"rss_huge": 0,
"total_active_anon": 606208,
"total_active_file": 0,
"total_cache": 0,
"total_dirty": 0,
"total_inactive_anon": 0,
"total_inactive_file": 0,
"total_mapped_file": 0,
"total_pgfault": 4185,
"total_pgmajfault": 0,
"total_pgpgin": 2926,
"total_pgpgout": 2778,
"total_rss": 606208,
"total_rss_huge": 0,
"total_unevictable": 0,
"total_writeback": 0,
"unevictable": 0,
"writeback": 0
},
"limit": 134217728
},
"name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01",
"id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af",
"networks": {
"eth0": {
"rx_bytes": 84,
"rx_packets": 2,
"rx_errors": 0,
"rx_dropped": 0,
"tx_bytes": 84,
"tx_packets": 2,
"tx_errors": 0,
"tx_dropped": 0
}
},
"network_rate_stats": {
"rx_bytes_per_sec": 0,
"tx_bytes_per_sec": 0
}
}

View file

@ -0,0 +1,56 @@
{
"Cluster": "default",
"TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"Family": "curltest",
"ServiceName": "MyService",
"Revision": "26",
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 4,
"Memory": 128
},
"PullStartedAt": "2020-10-02T00:43:06.202617438Z",
"PullStoppedAt": "2020-10-02T00:43:06.31288465Z",
"AvailabilityZone": "us-west-2d",
"VPCID": "vpc-1234567890abcdef0",
"LaunchType": "EC2",
"Containers": [
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "~internal~ecs~pause",
"DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00",
"Image": "amazon/amazon-ecs-pause:0.1.0",
"ImageID": "",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "~internal~ecs~pause",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "26"
},
"DesiredStatus": "RESOURCES_PROVISIONED",
"KnownStatus": "RESOURCES_PROVISIONED",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:43:05.602352471Z",
"StartedAt": "2020-10-02T00:43:06.076707576Z",
"Type": "CNI_PAUSE",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.61"
],
"AttachmentIndex": 0,
"MACAddress": "0e:10:e2:01:bd:91",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}
]
}

View file

@ -0,0 +1,44 @@
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "curl",
"DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600",
"Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest",
"ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "curl",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "24"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:15:07.620912337Z",
"StartedAt": "2020-10-02T00:15:08.062559351Z",
"Type": "NORMAL",
"LogDriver": "awslogs",
"LogOptions": {
"awslogs-create-group": "true",
"awslogs-group": "/ecs/metadata",
"awslogs-region": "us-west-2",
"awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665"
},
"ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.100"
],
"AttachmentIndex": 0,
"MACAddress": "0e:9e:32:c7:48:85",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}

View file

@ -0,0 +1,130 @@
{
"read": "2020-10-02T00:51:13.410254284Z",
"preread": "2020-10-02T00:51:12.406202398Z",
"pids_stats": {
"current": 3
},
"blkio_stats": {
"io_service_bytes_recursive": [
],
"io_serviced_recursive": [
],
"io_queue_recursive": [
],
"io_service_time_recursive": [
],
"io_wait_time_recursive": [
],
"io_merged_recursive": [
],
"io_time_recursive": [
],
"sectors_recursive": [
]
},
"num_procs": 0,
"storage_stats": {
},
"cpu_stats": {
"cpu_usage": {
"total_usage": 150000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 200000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"precpu_stats": {
"cpu_usage": {
"total_usage": 100000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 100000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"memory_stats": {
"usage": 1806336,
"max_usage": 6299648,
"stats": {
"active_anon": 606208,
"active_file": 0,
"cache": 0,
"dirty": 0,
"hierarchical_memory_limit": 134217728,
"hierarchical_memsw_limit": 268435456,
"inactive_anon": 0,
"inactive_file": 0,
"mapped_file": 0,
"pgfault": 4185,
"pgmajfault": 0,
"pgpgin": 2926,
"pgpgout": 2778,
"rss": 606208,
"rss_huge": 0,
"total_active_anon": 606208,
"total_active_file": 0,
"total_cache": 0,
"total_dirty": 0,
"total_inactive_anon": 0,
"total_inactive_file": 0,
"total_mapped_file": 0,
"total_pgfault": 4185,
"total_pgmajfault": 0,
"total_pgpgin": 2926,
"total_pgpgout": 2778,
"total_rss": 606208,
"total_rss_huge": 0,
"total_unevictable": 0,
"total_writeback": 0,
"unevictable": 0,
"writeback": 0
},
"limit": 134217728
},
"name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01",
"id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af",
"networks": {
"eth0": {
"rx_bytes": 84,
"rx_packets": 2,
"rx_errors": 0,
"rx_dropped": 0,
"tx_bytes": 84,
"tx_packets": 2,
"tx_errors": 0,
"tx_dropped": 0
}
},
"network_rate_stats": {
"rx_bytes_per_sec": 0,
"tx_bytes_per_sec": 0
}
}

View file

@ -0,0 +1,55 @@
{
"Cluster": "default",
"TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"Family": "curltest",
"ServiceName": "MyService",
"Revision": "26",
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"Memory": 128
},
"PullStartedAt": "2020-10-02T00:43:06.202617438Z",
"PullStoppedAt": "2020-10-02T00:43:06.31288465Z",
"AvailabilityZone": "us-west-2d",
"VPCID": "vpc-1234567890abcdef0",
"LaunchType": "EC2",
"Containers": [
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "~internal~ecs~pause",
"DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00",
"Image": "amazon/amazon-ecs-pause:0.1.0",
"ImageID": "",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "~internal~ecs~pause",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "26"
},
"DesiredStatus": "RESOURCES_PROVISIONED",
"KnownStatus": "RESOURCES_PROVISIONED",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:43:05.602352471Z",
"StartedAt": "2020-10-02T00:43:06.076707576Z",
"Type": "CNI_PAUSE",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.61"
],
"AttachmentIndex": 0,
"MACAddress": "0e:10:e2:01:bd:91",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}
]
}

View file

@ -0,0 +1,44 @@
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "curl",
"DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600",
"Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest",
"ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "curl",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "24"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:15:07.620912337Z",
"StartedAt": "2020-10-02T00:15:08.062559351Z",
"Type": "NORMAL",
"LogDriver": "awslogs",
"LogOptions": {
"awslogs-create-group": "true",
"awslogs-group": "/ecs/metadata",
"awslogs-region": "us-west-2",
"awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665"
},
"ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.100"
],
"AttachmentIndex": 0,
"MACAddress": "0e:9e:32:c7:48:85",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}

View file

@ -0,0 +1,130 @@
{
"read": "2020-10-02T00:51:13.410254284Z",
"preread": "2020-10-02T00:51:12.406202398Z",
"pids_stats": {
"current": 3
},
"blkio_stats": {
"io_service_bytes_recursive": [
],
"io_serviced_recursive": [
],
"io_queue_recursive": [
],
"io_service_time_recursive": [
],
"io_wait_time_recursive": [
],
"io_merged_recursive": [
],
"io_time_recursive": [
],
"sectors_recursive": [
]
},
"num_procs": 0,
"storage_stats": {
},
"cpu_stats": {
"cpu_usage": {
"total_usage": 150000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 200000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"precpu_stats": {
"cpu_usage": {
"total_usage": 100000000,
"percpu_usage": [
182359190,
178608875
],
"usage_in_kernelmode": 40000000,
"usage_in_usermode": 290000000
},
"system_cpu_usage": 100000000,
"online_cpus": 2,
"throttling_data": {
"periods": 0,
"throttled_periods": 0,
"throttled_time": 0
}
},
"memory_stats": {
"usage": 1806336,
"max_usage": 6299648,
"stats": {
"active_anon": 606208,
"active_file": 0,
"cache": 0,
"dirty": 0,
"hierarchical_memory_limit": 134217728,
"hierarchical_memsw_limit": 268435456,
"inactive_anon": 0,
"inactive_file": 0,
"mapped_file": 0,
"pgfault": 4185,
"pgmajfault": 0,
"pgpgin": 2926,
"pgpgout": 2778,
"rss": 606208,
"rss_huge": 0,
"total_active_anon": 606208,
"total_active_file": 0,
"total_cache": 0,
"total_dirty": 0,
"total_inactive_anon": 0,
"total_inactive_file": 0,
"total_mapped_file": 0,
"total_pgfault": 4185,
"total_pgmajfault": 0,
"total_pgpgin": 2926,
"total_pgpgout": 2778,
"total_rss": 606208,
"total_rss_huge": 0,
"total_unevictable": 0,
"total_writeback": 0,
"unevictable": 0,
"writeback": 0
},
"limit": 134217728
},
"name": "/ecs-curltest-26-curl-c2e5f6e0cf91b0bead01",
"id": "5fc21e5b015f899d22618f8aede80b6d70d71b2a75465ea49d9462c8f3d2d3af",
"networks": {
"eth0": {
"rx_bytes": 84,
"rx_packets": 2,
"rx_errors": 0,
"rx_dropped": 0,
"tx_bytes": 84,
"tx_packets": 2,
"tx_errors": 0,
"tx_dropped": 0
}
},
"network_rate_stats": {
"rx_bytes_per_sec": 0,
"tx_bytes_per_sec": 0
}
}

View file

@ -0,0 +1,52 @@
{
"Cluster": "default",
"TaskARN": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"Family": "curltest",
"ServiceName": "MyService",
"Revision": "26",
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"PullStartedAt": "2020-10-02T00:43:06.202617438Z",
"PullStoppedAt": "2020-10-02T00:43:06.31288465Z",
"AvailabilityZone": "us-west-2d",
"VPCID": "vpc-1234567890abcdef0",
"LaunchType": "EC2",
"Containers": [
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "~internal~ecs~pause",
"DockerName": "ecs-curltest-26-internalecspause-e292d586b6f9dade4a00",
"Image": "amazon/amazon-ecs-pause:0.1.0",
"ImageID": "",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "~internal~ecs~pause",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/158d1c8083dd49d6b527399fd6414f5c",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "26"
},
"DesiredStatus": "RESOURCES_PROVISIONED",
"KnownStatus": "RESOURCES_PROVISIONED",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:43:05.602352471Z",
"StartedAt": "2020-10-02T00:43:06.076707576Z",
"Type": "CNI_PAUSE",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.61"
],
"AttachmentIndex": 0,
"MACAddress": "0e:10:e2:01:bd:91",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-61.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}
]
}

View file

@ -0,0 +1,44 @@
{
"DockerId": "ea32192c8553fbff06c9340478a2ff089b2bb5646fb718b4ee206641c9086d66",
"Name": "curl",
"DockerName": "ecs-curltest-24-curl-cca48e8dcadd97805600",
"Image": "111122223333.dkr.ecr.us-west-2.amazonaws.com/curltest:latest",
"ImageID": "sha256:d691691e9652791a60114e67b365688d20d19940dde7c4736ea30e660d8d3553",
"Labels": {
"com.amazonaws.ecs.cluster": "default",
"com.amazonaws.ecs.container-name": "curl",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:us-west-2:111122223333:task/default/8f03e41243824aea923aca126495f665",
"com.amazonaws.ecs.task-definition-family": "curltest",
"com.amazonaws.ecs.task-definition-version": "24"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 50,
"Memory": 128
},
"CreatedAt": "2020-10-02T00:15:07.620912337Z",
"StartedAt": "2020-10-02T00:15:08.062559351Z",
"Type": "NORMAL",
"LogDriver": "awslogs",
"LogOptions": {
"awslogs-create-group": "true",
"awslogs-group": "/ecs/metadata",
"awslogs-region": "us-west-2",
"awslogs-stream": "ecs/curl/8f03e41243824aea923aca126495f665"
},
"ContainerARN": "arn:aws:ecs:us-west-2:111122223333:container/0206b271-b33f-47ab-86c6-a0ba208a70a9",
"Networks": [
{
"NetworkMode": "awsvpc",
"IPv4Addresses": [
"10.0.2.100"
],
"AttachmentIndex": 0,
"MACAddress": "0e:9e:32:c7:48:85",
"IPv4SubnetCIDRBlock": "10.0.2.0/24",
"PrivateDNSName": "ip-10-0-2-100.us-west-2.compute.internal",
"SubnetGatewayIpv4Address": "10.0.2.1/24"
}
]
}

Some files were not shown because too many files have changed in this diff Show more