KinesisShardSyncer only create leases for one level of leases (#49)
Co-authored-by: Joshua Kim <kimjos@amazon.com>
This commit is contained in:
parent
3a88a60a4e
commit
0760688375
4 changed files with 1376 additions and 323 deletions
|
|
@ -31,6 +31,7 @@ import com.amazonaws.services.kinesis.leases.impl.LeaseManager;
|
|||
import com.amazonaws.services.kinesis.model.ShardFilter;
|
||||
import com.amazonaws.services.kinesis.model.ShardFilterType;
|
||||
import com.amazonaws.util.CollectionUtils;
|
||||
import lombok.NoArgsConstructor;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
|
@ -457,7 +458,7 @@ class KinesisShardSyncer implements ShardSyncer {
|
|||
/**
|
||||
* Note: Package level access for testing purposes only.
|
||||
* Check if this shard is a descendant of a shard that is (or will be) processed.
|
||||
* Create leases for the ancestors of this shard as required.
|
||||
* Create leases for the first ancestor of this shard that needs to be processed, as required.
|
||||
* See javadoc of determineNewLeasesToCreate() for rules and example.
|
||||
*
|
||||
* @param shardId The shardId to check.
|
||||
|
|
@ -473,9 +474,10 @@ class KinesisShardSyncer implements ShardSyncer {
|
|||
static boolean checkIfDescendantAndAddNewLeasesForAncestors(String shardId,
|
||||
InitialPositionInStreamExtended initialPosition, Set<String> shardIdsOfCurrentLeases,
|
||||
Map<String, Shard> shardIdToShardMapOfAllKinesisShards,
|
||||
Map<String, KinesisClientLease> shardIdToLeaseMapOfNewShards, Map<String, Boolean> memoizationContext) {
|
||||
Map<String, KinesisClientLease> shardIdToLeaseMapOfNewShards, MemoizationContext memoizationContext) {
|
||||
|
||||
final Boolean previousValue = memoizationContext.isDescendant(shardId);
|
||||
|
||||
Boolean previousValue = memoizationContext.get(shardId);
|
||||
if (previousValue != null) {
|
||||
return previousValue;
|
||||
}
|
||||
|
|
@ -495,10 +497,13 @@ class KinesisShardSyncer implements ShardSyncer {
|
|||
shard = shardIdToShardMapOfAllKinesisShards.get(shardId);
|
||||
parentShardIds = getParentShardIds(shard, shardIdToShardMapOfAllKinesisShards);
|
||||
for (String parentShardId : parentShardIds) {
|
||||
// Check if the parent is a descendant, and include its ancestors.
|
||||
if (checkIfDescendantAndAddNewLeasesForAncestors(parentShardId, initialPosition,
|
||||
shardIdsOfCurrentLeases, shardIdToShardMapOfAllKinesisShards, shardIdToLeaseMapOfNewShards,
|
||||
memoizationContext)) {
|
||||
// Check if the parent is a descendant, and include its ancestors. Or, if the parent is NOT a
|
||||
// descendant but we should create a lease for it anyway (e.g. to include in processing from
|
||||
// TRIM_HORIZON or AT_TIMESTAMP). If either is true, then we mark the current shard as a descendant.
|
||||
final boolean isParentDescendant = checkIfDescendantAndAddNewLeasesForAncestors(parentShardId,
|
||||
initialPosition, shardIdsOfCurrentLeases, shardIdToShardMapOfAllKinesisShards,
|
||||
shardIdToLeaseMapOfNewShards, memoizationContext);
|
||||
if (isParentDescendant || memoizationContext.shouldCreateLease(parentShardId)) {
|
||||
isDescendant = true;
|
||||
descendantParentShardIds.add(parentShardId);
|
||||
LOG.debug("Parent shard " + parentShardId + " is a descendant.");
|
||||
|
|
@ -511,37 +516,76 @@ class KinesisShardSyncer implements ShardSyncer {
|
|||
if (isDescendant) {
|
||||
for (String parentShardId : parentShardIds) {
|
||||
if (!shardIdsOfCurrentLeases.contains(parentShardId)) {
|
||||
LOG.debug("Need to create a lease for shardId " + parentShardId);
|
||||
KinesisClientLease lease = shardIdToLeaseMapOfNewShards.get(parentShardId);
|
||||
|
||||
// If the lease for the parent shard does not already exist, there are two cases in which we
|
||||
// would want to create it:
|
||||
// - If we have already marked the parentShardId for lease creation in a prior recursive
|
||||
// call. This could happen if we are trying to process from TRIM_HORIZON or AT_TIMESTAMP.
|
||||
// - If the parent shard is not a descendant but the current shard is a descendant, then
|
||||
// the parent shard is the oldest shard in the shard hierarchy that does not have an
|
||||
// ancestor in the lease table (the adjacent parent is necessarily a descendant, and
|
||||
// therefore covered in the lease table). So we should create a lease for the parent.
|
||||
|
||||
if (lease == null) {
|
||||
lease = newKCLLease(shardIdToShardMapOfAllKinesisShards.get(parentShardId));
|
||||
shardIdToLeaseMapOfNewShards.put(parentShardId, lease);
|
||||
if (memoizationContext.shouldCreateLease(parentShardId) ||
|
||||
!descendantParentShardIds.contains(parentShardId)) {
|
||||
LOG.debug("Need to create a lease for shardId " + parentShardId);
|
||||
lease = newKCLLease(shardIdToShardMapOfAllKinesisShards.get(parentShardId));
|
||||
shardIdToLeaseMapOfNewShards.put(parentShardId, lease);
|
||||
}
|
||||
}
|
||||
|
||||
if (descendantParentShardIds.contains(parentShardId) && !initialPosition
|
||||
.getInitialPositionInStream().equals(InitialPositionInStream.AT_TIMESTAMP)) {
|
||||
lease.setCheckpoint(ExtendedSequenceNumber.TRIM_HORIZON);
|
||||
} else {
|
||||
lease.setCheckpoint(convertToCheckpoint(initialPosition));
|
||||
/**
|
||||
* If the shard is a descendant and the specified initial position is AT_TIMESTAMP, then the
|
||||
* checkpoint should be set to AT_TIMESTAMP, else to TRIM_HORIZON. For AT_TIMESTAMP, we will
|
||||
* add a lease just like we do for TRIM_HORIZON. However we will only return back records
|
||||
* with server-side timestamp at or after the specified initial position timestamp.
|
||||
*
|
||||
* Shard structure (each level depicts a stream segment):
|
||||
* 0 1 2 3 4 5 - shards till epoch 102
|
||||
* \ / \ / | |
|
||||
* 6 7 4 5 - shards from epoch 103 - 205
|
||||
* \ / | /\
|
||||
* 8 4 9 10 - shards from epoch 206 (open - no ending sequenceNumber)
|
||||
*
|
||||
* Current leases: (4, 5, 7)
|
||||
*
|
||||
* For the above example, suppose the initial position in stream is set to AT_TIMESTAMP with
|
||||
* timestamp value 206. We will then create new leases for all the shards 0 and 1 (with
|
||||
* checkpoint set AT_TIMESTAMP), even though these ancestor shards have an epoch less than
|
||||
* 206. However as we begin processing the ancestor shards, their checkpoints would be
|
||||
* updated to SHARD_END and their leases would then be deleted since they won't have records
|
||||
* with server-side timestamp at/after 206. And after that we will begin processing the
|
||||
* descendant shards with epoch at/after 206 and we will return the records that meet the
|
||||
* timestamp requirement for these shards.
|
||||
*/
|
||||
if (lease != null) {
|
||||
if (descendantParentShardIds.contains(parentShardId) && !initialPosition
|
||||
.getInitialPositionInStream().equals(InitialPositionInStream.AT_TIMESTAMP)) {
|
||||
lease.setCheckpoint(ExtendedSequenceNumber.TRIM_HORIZON);
|
||||
} else {
|
||||
lease.setCheckpoint(convertToCheckpoint(initialPosition));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// This shard should be included, if the customer wants to process all records in the stream or
|
||||
// if the initial position is AT_TIMESTAMP. For AT_TIMESTAMP, we will add a lease just like we do
|
||||
// for TRIM_HORIZON. However we will only return back records with server-side timestamp at or
|
||||
// after the specified initial position timestamp.
|
||||
// This shard is not a descendant, but should still be included if the customer wants to process all
|
||||
// records in the stream or if the initial position is AT_TIMESTAMP. For AT_TIMESTAMP, we will add a
|
||||
// lease just like we do for TRIM_HORIZON. However we will only return back records with server-side
|
||||
// timestamp at or after the specified initial position timestamp.
|
||||
if (initialPosition.getInitialPositionInStream().equals(InitialPositionInStream.TRIM_HORIZON)
|
||||
|| initialPosition.getInitialPositionInStream()
|
||||
.equals(InitialPositionInStream.AT_TIMESTAMP)) {
|
||||
isDescendant = true;
|
||||
memoizationContext.setShouldCreateLease(shardId, true);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
memoizationContext.put(shardId, isDescendant);
|
||||
memoizationContext.setIsDescendant(shardId, isDescendant);
|
||||
return isDescendant;
|
||||
}
|
||||
// CHECKSTYLE:ON CyclomaticComplexity
|
||||
|
|
@ -834,4 +878,28 @@ class KinesisShardSyncer implements ShardSyncer {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class to pass around state between recursive traversals of shard hierarchy.
|
||||
*/
|
||||
@NoArgsConstructor
|
||||
static class MemoizationContext {
|
||||
private Map<String, Boolean> isDescendantMap = new HashMap<>();
|
||||
private Map<String, Boolean> shouldCreateLeaseMap = new HashMap<>();
|
||||
|
||||
Boolean isDescendant(String shardId) {
|
||||
return isDescendantMap.get(shardId);
|
||||
}
|
||||
|
||||
void setIsDescendant(String shardId, Boolean isDescendant) {
|
||||
isDescendantMap.put(shardId, isDescendant);
|
||||
}
|
||||
|
||||
Boolean shouldCreateLease(String shardId) {
|
||||
return shouldCreateLeaseMap.computeIfAbsent(shardId, x -> Boolean.FALSE);
|
||||
}
|
||||
|
||||
void setShouldCreateLease(String shardId, Boolean shouldCreateLease) {
|
||||
shouldCreateLeaseMap.put(shardId, shouldCreateLease);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,8 +47,10 @@ class NonEmptyLeaseTableSynchronizer implements LeaseSynchronizer {
|
|||
* Note: Package level access only for testing purposes.
|
||||
*
|
||||
* For each open (no ending sequence number) shard without open parents that doesn't already have a lease,
|
||||
* determine if it is a descendent of any shard which is or will be processed (e.g. for which a lease exists):
|
||||
* If so, set checkpoint of the shard to TrimHorizon and also create leases for ancestors if needed.
|
||||
* determine if it is a descendant of any shard which is or will be processed (e.g. for which a lease exists):
|
||||
* If so, create a lease for the first ancestor that needs to be processed (if needed). We will create leases
|
||||
* for no more than one level in the ancestry tree. Once we find the first ancestor that needs to be processed,
|
||||
* we will avoid creating leases for further descendants of that ancestor.
|
||||
* If not, set checkpoint of the shard to the initial position specified by the client.
|
||||
* To check if we need to create leases for ancestors, we use the following rules:
|
||||
* * If we began (or will begin) processing data for a shard, then we must reach end of that shard before
|
||||
|
|
@ -67,10 +69,17 @@ class NonEmptyLeaseTableSynchronizer implements LeaseSynchronizer {
|
|||
* 0 1 2 3 4 5 - shards till epoch 102
|
||||
* \ / \ / | |
|
||||
* 6 7 4 5 - shards from epoch 103 - 205
|
||||
* \ / | / \
|
||||
* 8 4 9 10 - shards from epoch 206 (open - no ending sequenceNumber)
|
||||
* Current leases: (3, 4, 5)
|
||||
* New leases to create: (2, 6, 7, 8, 9, 10)
|
||||
* \ / | / \
|
||||
* 8 4 9 10 - shards from epoch 206 (open - no ending sequenceNumber)
|
||||
*
|
||||
* Current leases: (4, 5, 7)
|
||||
*
|
||||
* If initial position is LATEST:
|
||||
* - New leases to create: (6)
|
||||
* If initial position is TRIM_HORIZON:
|
||||
* - New leases to create: (0, 1)
|
||||
* If initial position is AT_TIMESTAMP(epoch=200):
|
||||
* - New leases to create: (0, 1)
|
||||
*
|
||||
* The leases returned are sorted by the starting sequence number - following the same order
|
||||
* when persisting the leases in DynamoDB will ensure that we recover gracefully if we fail
|
||||
|
|
@ -104,7 +113,8 @@ class NonEmptyLeaseTableSynchronizer implements LeaseSynchronizer {
|
|||
}
|
||||
|
||||
List<Shard> openShards = KinesisShardSyncer.getOpenShards(shards);
|
||||
Map<String, Boolean> memoizationContext = new HashMap<>();
|
||||
final KinesisShardSyncer.MemoizationContext memoizationContext = new KinesisShardSyncer.MemoizationContext();
|
||||
|
||||
|
||||
// Iterate over the open shards and find those that don't have any lease entries.
|
||||
for (Shard shard : openShards) {
|
||||
|
|
@ -115,43 +125,30 @@ class NonEmptyLeaseTableSynchronizer implements LeaseSynchronizer {
|
|||
} else if (inconsistentShardIds.contains(shardId)) {
|
||||
LOG.info("shardId " + shardId + " is an inconsistent child. Not creating a lease");
|
||||
} else {
|
||||
LOG.debug("Need to create a lease for shardId " + shardId);
|
||||
KinesisClientLease newLease = KinesisShardSyncer.newKCLLease(shard);
|
||||
LOG.debug("Beginning traversal of ancestry tree for shardId " + shardId);
|
||||
|
||||
// A shard is a descendant if at least one if its ancestors exists in the lease table.
|
||||
// We will create leases for only one level in the ancestry tree. Once we find the first ancestor
|
||||
// that needs to be processed in order to complete the hash range, we will not create leases for
|
||||
// further descendants of that ancestor.
|
||||
boolean isDescendant = KinesisShardSyncer.checkIfDescendantAndAddNewLeasesForAncestors(shardId,
|
||||
initialPosition, shardIdsOfCurrentLeases, shardIdToShardMapOfAllKinesisShards,
|
||||
shardIdToNewLeaseMap, memoizationContext);
|
||||
|
||||
/**
|
||||
* If the shard is a descendant and the specified initial position is AT_TIMESTAMP, then the
|
||||
* checkpoint should be set to AT_TIMESTAMP, else to TRIM_HORIZON. For AT_TIMESTAMP, we will add a
|
||||
* lease just like we do for TRIM_HORIZON. However we will only return back records with server-side
|
||||
* timestamp at or after the specified initial position timestamp.
|
||||
*
|
||||
* Shard structure (each level depicts a stream segment):
|
||||
* 0 1 2 3 4 5 - shards till epoch 102
|
||||
* \ / \ / | |
|
||||
* 6 7 4 5 - shards from epoch 103 - 205
|
||||
* \ / | /\
|
||||
* 8 4 9 10 - shards from epoch 206 (open - no ending sequenceNumber)
|
||||
*
|
||||
* Current leases: empty set
|
||||
*
|
||||
* For the above example, suppose the initial position in stream is set to AT_TIMESTAMP with
|
||||
* timestamp value 206. We will then create new leases for all the shards (with checkpoint set to
|
||||
* AT_TIMESTAMP), including the ancestor shards with epoch less than 206. However as we begin
|
||||
* processing the ancestor shards, their checkpoints would be updated to SHARD_END and their leases
|
||||
* would then be deleted since they won't have records with server-side timestamp at/after 206. And
|
||||
* after that we will begin processing the descendant shards with epoch at/after 206 and we will
|
||||
* return the records that meet the timestamp requirement for these shards.
|
||||
*/
|
||||
if (isDescendant && !initialPosition.getInitialPositionInStream()
|
||||
.equals(InitialPositionInStream.AT_TIMESTAMP)) {
|
||||
newLease.setCheckpoint(ExtendedSequenceNumber.TRIM_HORIZON);
|
||||
} else {
|
||||
// If shard is a descendant, the leases for its ancestors were already created above. Open shards
|
||||
// that are NOT descendants will not have leases yet, so we create them here. We will not create
|
||||
// leases for open shards that ARE descendants yet - leases for these shards will be created upon
|
||||
// SHARD_END of their parents.
|
||||
if (!isDescendant) {
|
||||
LOG.debug("ShardId " + shardId + " has no ancestors. Creating a lease.");
|
||||
final KinesisClientLease newLease = KinesisShardSyncer.newKCLLease(shard);
|
||||
newLease.setCheckpoint(KinesisShardSyncer.convertToCheckpoint(initialPosition));
|
||||
LOG.debug("Set checkpoint of " + newLease.getLeaseKey() + " to " + newLease.getCheckpoint());
|
||||
shardIdToNewLeaseMap.put(shardId, newLease);
|
||||
} else {
|
||||
LOG.debug("ShardId " + shardId + " is a descendant whose ancestors should already have leases. " +
|
||||
"Not creating a lease.");
|
||||
}
|
||||
LOG.debug("Set checkpoint of " + newLease.getLeaseKey() + " to " + newLease.getCheckpoint());
|
||||
shardIdToNewLeaseMap.put(shardId, newLease);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ class ExceptionThrowingLeaseManager implements ILeaseManager<KinesisClientLease>
|
|||
@Override
|
||||
public boolean isLeaseTableEmpty() throws DependencyException,
|
||||
InvalidStateException, ProvisionedThroughputException {
|
||||
return false;
|
||||
return leaseManager.listLeases().isEmpty();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue