Making LeaseCleanupManager more verbose about lease cleanup failures

This commit is contained in:
Ashwin Giridharan 2021-01-24 16:10:25 -08:00
parent 9cb5020022
commit 76234d172c

View file

@ -176,6 +176,7 @@ public class LeaseCleanupManager {
boolean alreadyCheckedForGarbageCollection = false; boolean alreadyCheckedForGarbageCollection = false;
boolean wereChildShardsPresent = false; boolean wereChildShardsPresent = false;
boolean wasResourceNotFound = false; boolean wasResourceNotFound = false;
String cleanupFailureReason = "";
try { try {
if (cleanupLeasesUponShardCompletion && timeToCheckForCompletedShard) { if (cleanupLeasesUponShardCompletion && timeToCheckForCompletedShard) {
@ -184,53 +185,55 @@ public class LeaseCleanupManager {
Set<String> childShardKeys = leaseFromDDB.getChildShardIds(); Set<String> childShardKeys = leaseFromDDB.getChildShardIds();
if (CollectionUtils.isNullOrEmpty(childShardKeys)) { if (CollectionUtils.isNullOrEmpty(childShardKeys)) {
try { try {
// throws ResourceNotFoundException
childShardKeys = getChildShardsFromService(shardInfo); childShardKeys = getChildShardsFromService(shardInfo);
if (CollectionUtils.isNullOrEmpty(childShardKeys)) { if (CollectionUtils.isNullOrEmpty(childShardKeys)) {
LOG.error("No child shards returned from service for shard " + shardInfo.getShardId()); LOG.error("No child shards returned from service for shard " + shardInfo.getShardId());
// If no children shard is found in DDB and from service, then do not delete the lease // If no children shard is found in DDB and from service, then do not delete the lease
throw new InvalidStateException("No child shards found for this supposedly " + throw new InvalidStateException("No child shards found for this supposedly " +
"closed shard in both local DDB and in service " + shardInfo.getShardId()); "closed shard in both local DDB and in service " + shardInfo.getShardId());
} else { } else {
wereChildShardsPresent = true; wereChildShardsPresent = true;
updateLeaseWithChildShards(leasePendingDeletion, childShardKeys); updateLeaseWithChildShards(leasePendingDeletion, childShardKeys);
} }
} catch (ResourceNotFoundException e) {
throw e;
} finally { } finally {
// We rely on resource presence in service for garbage collection. Since we already
// made a call to getChildShardsFromService we would be coming to know if the resource
// is present of not. In latter case, we would throw ResourceNotFoundException, which is
// handled in catch block.
alreadyCheckedForGarbageCollection = true; alreadyCheckedForGarbageCollection = true;
} }
} else { } else {
wereChildShardsPresent = true; wereChildShardsPresent = true;
} }
try { try {
cleanedUpCompletedLease = cleanupLeaseForCompletedShard(lease, childShardKeys); final CompletedShardResult completedShardResult = cleanupLeaseForCompletedShard(lease, childShardKeys);
cleanedUpCompletedLease = completedShardResult.cleanedUp();
cleanupFailureReason = completedShardResult.failureMsg();
} catch (Exception e) { } catch (Exception e) {
// Suppressing the exception here, so that we can attempt for garbage cleanup. // Suppressing the exception here, so that we can attempt for garbage cleanup.
LOG.warn("Unable to cleanup lease for shard " + shardInfo.getShardId()); LOG.warn("Unable to cleanup lease for shard " + shardInfo.getShardId() + " due to " + e.getMessage());
} }
} else { } else {
LOG.info("Lease not present in lease table while cleaning the shard " + shardInfo.getShardId()); LOG.info("Lease not present in lease table while cleaning the shard " + shardInfo.getShardId());
cleanedUpCompletedLease = true; cleanedUpCompletedLease = true;
} }
} }
if (!cleanedUpCompletedLease && !alreadyCheckedForGarbageCollection && timeToCheckForGarbageShard) {
if (!alreadyCheckedForGarbageCollection && timeToCheckForGarbageShard) { // throws ResourceNotFoundException
try { wereChildShardsPresent = !CollectionUtils
wereChildShardsPresent = !CollectionUtils
.isNullOrEmpty(getChildShardsFromService(shardInfo)); .isNullOrEmpty(getChildShardsFromService(shardInfo));
} catch (ResourceNotFoundException e) {
throw e;
}
} }
} catch (ResourceNotFoundException e) { } catch (ResourceNotFoundException e) {
wasResourceNotFound = true; wasResourceNotFound = true;
cleanedUpGarbageLease = cleanupLeaseForGarbageShard(lease); cleanedUpGarbageLease = cleanupLeaseForGarbageShard(lease);
cleanupFailureReason = cleanedUpGarbageLease ? "" : "DDB Lease Deletion Failed";
} catch (Exception e) {
LOG.warn("Unable to cleanup lease for shard " + shardInfo.getShardId() + " : " + e.getMessage());
cleanupFailureReason = e.getMessage();
} }
return new LeaseCleanupResult(cleanedUpCompletedLease, cleanedUpGarbageLease, wereChildShardsPresent, return new LeaseCleanupResult(cleanedUpCompletedLease, cleanedUpGarbageLease, wereChildShardsPresent,
wasResourceNotFound); wasResourceNotFound, cleanupFailureReason);
} }
private Set<String> getChildShardsFromService(ShardInfo shardInfo) { private Set<String> getChildShardsFromService(ShardInfo shardInfo) {
@ -238,12 +241,16 @@ public class LeaseCleanupManager {
return kinesisProxy.get(iterator, maxRecords).getChildShards().stream().map(c -> c.getShardId()).collect(Collectors.toSet()); return kinesisProxy.get(iterator, maxRecords).getChildShards().stream().map(c -> c.getShardId()).collect(Collectors.toSet());
} }
// A lease that ended with SHARD_END from ResourceNotFoundException is safe to delete if it no longer exists in the // A lease that ended with SHARD_END from ResourceNotFoundException is safe to delete if it no longer exists in the
// stream (known explicitly from ResourceNotFound being thrown when processing this shard), // stream (known explicitly from ResourceNotFound being thrown when processing this shard),
private boolean cleanupLeaseForGarbageShard(KinesisClientLease lease) throws DependencyException, ProvisionedThroughputException, InvalidStateException { private boolean cleanupLeaseForGarbageShard(KinesisClientLease lease) throws DependencyException, ProvisionedThroughputException, InvalidStateException {
LOG.info("Deleting lease " + lease.getLeaseKey() + " as it is not present in the stream."); LOG.info("Deleting lease " + lease.getLeaseKey() + " as it is not present in the stream.");
leaseManager.deleteLease(lease); try {
leaseManager.deleteLease(lease);
} catch (Exception e) {
LOG.warn("Lease deletion failed for " + lease.getLeaseKey() + " due to " + e.getMessage());
return false;
}
return true; return true;
} }
@ -263,8 +270,9 @@ public class LeaseCleanupManager {
// We should only be deleting the current shard's lease if // We should only be deleting the current shard's lease if
// 1. All of its children are currently being processed, i.e their checkpoint is not TRIM_HORIZON or AT_TIMESTAMP. // 1. All of its children are currently being processed, i.e their checkpoint is not TRIM_HORIZON or AT_TIMESTAMP.
// 2. Its parent shard lease(s) have already been deleted. // 2. Its parent shard lease(s) have already been deleted.
private boolean cleanupLeaseForCompletedShard(KinesisClientLease lease, Set<String> childShardLeaseKeys) private CompletedShardResult cleanupLeaseForCompletedShard(KinesisClientLease lease, Set<String> childShardLeaseKeys)
throws DependencyException, ProvisionedThroughputException, InvalidStateException, IllegalStateException { throws DependencyException, ProvisionedThroughputException, InvalidStateException, IllegalStateException {
final Set<String> processedChildShardLeaseKeys = new HashSet<>(); final Set<String> processedChildShardLeaseKeys = new HashSet<>();
for (String childShardLeaseKey : childShardLeaseKeys) { for (String childShardLeaseKey : childShardLeaseKeys) {
@ -280,14 +288,17 @@ public class LeaseCleanupManager {
} }
} }
if (!allParentShardLeasesDeleted(lease) || !Objects.equals(childShardLeaseKeys, processedChildShardLeaseKeys)) { boolean parentShardsDeleted = allParentShardLeasesDeleted(lease);
return false; boolean childrenStartedProcessing = Objects.equals(childShardLeaseKeys, processedChildShardLeaseKeys);
if (!parentShardsDeleted || !childrenStartedProcessing) {
return new CompletedShardResult(false, !parentShardsDeleted ? "Parent shard(s) not deleted yet" : "Child shard(s) yet to begin processing");
} }
LOG.info("Deleting lease " + lease.getLeaseKey() + " as it has been completely processed and processing of child shard(s) has begun."); LOG.info("Deleting lease " + lease.getLeaseKey() + " as it has been completely processed and processing of child shard(s) has begun.");
leaseManager.deleteLease(lease); leaseManager.deleteLease(lease);
return true; return new CompletedShardResult(true, "");
} }
private void updateLeaseWithChildShards(LeasePendingDeletion leasePendingDeletion, Set<String> childShardKeys) private void updateLeaseWithChildShards(LeasePendingDeletion leasePendingDeletion, Set<String> childShardKeys)
@ -363,9 +374,17 @@ public class LeaseCleanupManager {
boolean cleanedUpGarbageLease; boolean cleanedUpGarbageLease;
boolean wereChildShardsPresent; boolean wereChildShardsPresent;
boolean wasResourceNotFound; boolean wasResourceNotFound;
String cleanupFailureReason;
public boolean leaseCleanedUp() { public boolean leaseCleanedUp() {
return cleanedUpCompletedLease | cleanedUpGarbageLease; return cleanedUpCompletedLease | cleanedUpGarbageLease;
} }
} }
@Value
@Accessors(fluent = true)
private static class CompletedShardResult {
boolean cleanedUp;
String failureMsg;
}
} }