Minor refactor, addressing feedback

This commit is contained in:
Jegosh John 2020-03-03 16:02:00 -08:00
parent 62e13ff3a1
commit 8511475868
3 changed files with 188 additions and 34 deletions

View file

@ -18,6 +18,7 @@ import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.Validate;
import software.amazon.kinesis.exceptions.internal.KinesisClientLibIOException;
import software.amazon.kinesis.leases.ShardSyncTask;
import software.amazon.kinesis.lifecycle.ConsumerTask;
import software.amazon.kinesis.lifecycle.TaskResult;
@ -36,8 +37,8 @@ import java.util.concurrent.TimeUnit;
@EqualsAndHashCode
@Slf4j
class PeriodicShardSyncManager {
private static final long INITIAL_DELAY = 0;
private static final long PERIODIC_SHARD_SYNC_INTERVAL_MILLIS = 5 * 60 * 1000;
private static final long INITIAL_DELAY = 60 * 1000L;
private static final long PERIODIC_SHARD_SYNC_INTERVAL_MILLIS = 5 * 60 * 1000L;
private final String workerId;
private final LeaderDecider leaderDecider;
@ -61,14 +62,34 @@ class PeriodicShardSyncManager {
public synchronized TaskResult start() {
if (!isRunning) {
shardSyncThreadPool
.scheduleWithFixedDelay(this::runShardSync, INITIAL_DELAY, PERIODIC_SHARD_SYNC_INTERVAL_MILLIS,
TimeUnit.MILLISECONDS);
final Runnable periodicShardSyncer = () -> {
try {
runShardSync();
} catch (Throwable t) {
log.error("Error during runShardSync.", t);
}
};
shardSyncThreadPool.scheduleWithFixedDelay(periodicShardSyncer, INITIAL_DELAY, PERIODIC_SHARD_SYNC_INTERVAL_MILLIS,
TimeUnit.MILLISECONDS);
isRunning = true;
}
return new TaskResult(null);
}
public synchronized TaskResult syncShardsOnce() {
Exception lastException = null;
try {
if (!isRunning) {
runShardSync();
}
} catch (Exception e) {
lastException = e;
}
return new TaskResult(lastException);
}
public void stop() {
if (isRunning) {
log.info(String.format("Shutting down leader decider on worker %s", workerId));
@ -80,15 +101,23 @@ class PeriodicShardSyncManager {
}
private void runShardSync() {
try {
if (leaderDecider.isLeader(workerId)) {
log.info(String.format("WorkerId %s is a leader, running the shard sync task", workerId));
metricsEmittingShardSyncTask.call();
} else {
log.debug(String.format("WorkerId %s is not a leader, not running the shard sync task", workerId));
if (leaderDecider.isLeader(workerId)) {
log.info(String.format("WorkerId %s is a leader, running the shard sync task", workerId));
final TaskResult taskResult = metricsEmittingShardSyncTask.call();
if (taskResult != null && taskResult.getException() != null) {
throw new KinesisClientLibIOException("Failed to sync shards", taskResult.getException());
}
} catch (Throwable t) {
log.error("Error during runShardSync.", t);
} else {
log.debug(String.format("WorkerId %s is not a leader, not running the shard sync task", workerId));
}
}
/**
* Checks if the entire hash range is covered
* @return true if covered, false otherwise
*/
public boolean hashRangeCovered() {
// TODO: Implement method
return true;
}
}

View file

@ -46,7 +46,6 @@ import software.amazon.kinesis.lifecycle.ShutdownNotification;
import software.amazon.kinesis.lifecycle.ShutdownReason;
import software.amazon.kinesis.lifecycle.TaskResult;
import software.amazon.kinesis.metrics.CloudWatchMetricsFactory;
import software.amazon.kinesis.metrics.MetricsCollectingTaskDecorator;
import software.amazon.kinesis.metrics.MetricsConfig;
import software.amazon.kinesis.metrics.MetricsFactory;
import software.amazon.kinesis.processor.Checkpointer;
@ -68,6 +67,7 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
/**
@ -102,7 +102,7 @@ public class Scheduler implements Runnable {
// private final GetRecordsRetrievalStrategy getRecordsRetrievalStrategy;
private final LeaseCoordinator leaseCoordinator;
private final ShardSyncTaskManager shardSyncTaskManager;
private final PeriodicShardSyncManager periodicShardSyncManager;
private final PeriodicShardSyncManager leaderElectedPeriodicShardSyncManager;
private final ShardPrioritization shardPrioritization;
private final boolean cleanupLeasesUponShardCompletion;
private final boolean skipShardSyncAtWorkerInitializationIfLeasesExist;
@ -121,7 +121,7 @@ public class Scheduler implements Runnable {
private final AggregatorUtil aggregatorUtil;
private final HierarchicalShardSyncer hierarchicalShardSyncer;
private final long schedulerInitializationBackoffTimeMillis;
private LeaderDecider leaderDecider;
private final LeaderDecider leaderDecider;
// Holds consumers for shards the worker is currently tracking. Key is shard
// info, value is ShardConsumer.
@ -212,10 +212,8 @@ public class Scheduler implements Runnable {
this.workerStateChangeListener = this.coordinatorConfig.coordinatorFactory()
.createWorkerStateChangeListener();
}
if (leaderDecider == null) {
leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(leaseRefresher,
this.leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(leaseRefresher,
Executors.newSingleThreadScheduledExecutor(), PERIODIC_SHARD_SYNC_MAX_WORKERS_DEFAULT);
}
this.initialPosition = retrievalConfig.initialPositionInStreamExtended();
this.failoverTimeMillis = this.leaseManagementConfig.failoverTimeMillis();
this.taskBackoffTimeMillis = this.lifecycleConfig.taskBackoffTimeMillis();
@ -232,7 +230,7 @@ public class Scheduler implements Runnable {
ShardSyncTask shardSyncTask = new ShardSyncTask(shardDetector, leaseRefresher, initialPosition,
cleanupLeasesUponShardCompletion, ignoreUnexpetedChildShards, 0L, hierarchicalShardSyncer,
metricsFactory);
this.periodicShardSyncManager = new PeriodicShardSyncManager(leaseManagementConfig.workerIdentifier(),
this.leaderElectedPeriodicShardSyncManager = new PeriodicShardSyncManager(leaseManagementConfig.workerIdentifier(),
leaderDecider, shardSyncTask, metricsFactory);
}
@ -247,12 +245,7 @@ public class Scheduler implements Runnable {
try {
initialize();
log.info("Initialization complete. Scheduling periodicShardSync..");
periodicShardSyncManager.start();
log.info("Scheduled periodicShardSync tasks. Starting worker loop.");
log.info("Initialization complete. Starting worker loop.");
} catch (RuntimeException e) {
log.error("Unable to initialize after {} attempts. Shutting down.", maxInitializationAttempts, e);
workerStateChangeListener.onAllInitializationAttemptsFailed(e);
@ -282,11 +275,14 @@ public class Scheduler implements Runnable {
TaskResult result = null;
if (!skipShardSyncAtWorkerInitializationIfLeasesExist || leaseRefresher.isLeaseTableEmpty()) {
for (int j = 0; j < 10 && leaseRefresher.isLeaseTableEmpty(); j++) {
// check every 1-5 seconds if lease table is still empty,
// to minimize contention between all workers bootstrapping at the same time
long waitTime = ThreadLocalRandom.current().nextLong(1000L, 5000L);
Thread.sleep(waitTime);
}
log.info("Syncing Kinesis shard info");
ShardSyncTask shardSyncTask = new ShardSyncTask(shardDetector, leaseRefresher, initialPosition,
cleanupLeasesUponShardCompletion, ignoreUnexpetedChildShards, 0L, hierarchicalShardSyncer,
metricsFactory);
result = new MetricsCollectingTaskDecorator(shardSyncTask, metricsFactory).call();
result = leaderElectedPeriodicShardSyncManager.syncShardsOnce();
} else {
log.info("Skipping shard sync per configuration setting (and lease table is not empty)");
}
@ -298,6 +294,9 @@ public class Scheduler implements Runnable {
} else {
log.info("LeaseCoordinator is already running. No need to start it.");
}
log.info("Scheduling periodicShardSync)");
// leaderElectedPeriodicShardSyncManager.start();
// TODO: enable periodicShardSync after https://github.com/jushkem/amazon-kinesis-client/pull/2 is merged
isDone = true;
} else {
lastException = result.getException();
@ -309,9 +308,10 @@ public class Scheduler implements Runnable {
lastException = e;
}
if (!isDone) {
if (!isDone || !leaderElectedPeriodicShardSyncManager.hashRangeCovered()) {
try {
Thread.sleep(schedulerInitializationBackoffTimeMillis);
leaderElectedPeriodicShardSyncManager.stop();
} catch (InterruptedException e) {
log.debug("Sleep interrupted while initializing worker.");
}
@ -532,7 +532,7 @@ public class Scheduler implements Runnable {
// Lost leases will force Worker to begin shutdown process for all shard consumers in
// Worker.run().
leaseCoordinator.stop();
periodicShardSyncManager.stop();
leaderElectedPeriodicShardSyncManager.stop();
workerStateChangeListener.onWorkerStateChange(WorkerStateChangeListener.WorkerState.SHUT_DOWN);
}
}
@ -630,12 +630,12 @@ public class Scheduler implements Runnable {
hierarchicalShardSyncer,
metricsFactory);
return new ShardConsumer(cache, executorService, shardInfo, lifecycleConfig.logWarningForTaskAfterMillis(),
argument, lifecycleConfig.taskExecutionListener(),lifecycleConfig.readTimeoutsToIgnoreBeforeWarning());
argument, lifecycleConfig.taskExecutionListener(), lifecycleConfig.readTimeoutsToIgnoreBeforeWarning());
}
/**
* NOTE: This method is internal/private to the Worker class. It has package access solely for testing.
*
* <p>
* This method relies on ShardInfo.equals() method returning true for ShardInfo objects which may have been
* instantiated with parentShardIds in a different order (and rest of the fields being the equal). For example
* shardInfo1.equals(shardInfo2) should return true with shardInfo1 and shardInfo2 defined as follows. ShardInfo

View file

@ -0,0 +1,125 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates.
* Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package software.amazon.kinesis.coordinator;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import software.amazon.kinesis.leases.Lease;
import software.amazon.kinesis.leases.LeaseRefresher;
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.stream.Collectors;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
import static software.amazon.kinesis.coordinator.DeterministicShuffleShardSyncLeaderDecider.DETERMINISTIC_SHUFFLE_SEED;
@RunWith(MockitoJUnitRunner.class)
public class DeterministicShuffleShardSyncLeaderDeciderTest {
private static final String LEASE_KEY = "lease_key";
private static final String LEASE_OWNER = "lease_owner";
private static final String WORKER_ID = "worker-id";
private DeterministicShuffleShardSyncLeaderDecider leaderDecider;
@Mock
private LeaseRefresher leaseRefresher;
@Mock
private ScheduledExecutorService scheduledExecutorService;
private int numShardSyncWorkers;
@Before
public void setup() {
numShardSyncWorkers = 1;
leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(leaseRefresher, scheduledExecutorService, numShardSyncWorkers);
}
@Test
public void testLeaderElectionWithNullLeases() {
boolean isLeader = leaderDecider.isLeader(WORKER_ID);
assertTrue("IsLeader should return true if leaders is null", isLeader);
}
@Test
public void testLeaderElectionWithEmptyLeases() throws Exception {
when(leaseRefresher.listLeases()).thenReturn(new ArrayList<>());
boolean isLeader = leaderDecider.isLeader(WORKER_ID);
assertTrue("IsLeader should return true if no leases are returned", isLeader);
}
@Test
public void testElectedLeadersAsPerExpectedShufflingOrder()
throws Exception {
List<Lease> leases = getLeases(5, false /* duplicateLeaseOwner */, true /* activeLeases */);
when(leaseRefresher.listLeases()).thenReturn(leases);
Set<String> expectedLeaders = getExpectedLeaders(leases);
for (String leader : expectedLeaders) {
assertTrue(leaderDecider.isLeader(leader));
}
for (Lease lease : leases) {
if (!expectedLeaders.contains(lease.leaseOwner())) {
assertFalse(leaderDecider.isLeader(lease.leaseOwner()));
}
}
}
@Test
public void testElectedLeadersAsPerExpectedShufflingOrderWhenUniqueWorkersLessThanMaxLeaders() {
this.numShardSyncWorkers = 5; // More than number of unique lease owners
leaderDecider = new DeterministicShuffleShardSyncLeaderDecider(leaseRefresher, scheduledExecutorService, numShardSyncWorkers);
List<Lease> leases = getLeases(3, false /* duplicateLeaseOwner */, true /* activeLeases */);
Set<String> expectedLeaders = getExpectedLeaders(leases);
// All lease owners should be present in expected leaders set, and they should all be leaders.
for (Lease lease : leases) {
assertTrue(leaderDecider.isLeader(lease.leaseOwner()));
assertTrue(expectedLeaders.contains(lease.leaseOwner()));
}
}
private List<Lease> getLeases(int count, boolean duplicateLeaseOwner, boolean activeLeases) {
List<Lease> leases = new ArrayList<>();
for (int i = 0; i < count; i++) {
Lease lease = new Lease();
lease.leaseKey(LEASE_KEY + i);
lease.checkpoint(activeLeases ? ExtendedSequenceNumber.LATEST : ExtendedSequenceNumber.SHARD_END);
lease.leaseCounter(new Random().nextLong());
lease.leaseOwner(LEASE_OWNER + (duplicateLeaseOwner ? "" : i));
leases.add(lease);
}
return leases;
}
private Set<String> getExpectedLeaders(List<Lease> leases) {
List<String> uniqueHosts = leases.stream().filter(lease -> lease.leaseOwner() != null)
.map(Lease::leaseOwner).distinct().sorted().collect(Collectors.toList());
Collections.shuffle(uniqueHosts, new Random(DETERMINISTIC_SHUFFLE_SEED));
int numWorkers = Math.min(uniqueHosts.size(), this.numShardSyncWorkers);
return new HashSet<>(uniqueHosts.subList(0, numWorkers));
}
}