Add support for lease stealing (#78)
Fixes #4 Signed-off-by: Connor McKelvey <connormckelvey@gmail.com> Signed-off-by: Ali Hobbs <alisuehobbs@gmail.com> Co-authored-by: Ali Hobbs <alisuehobbs@gmail.com> Co-authored-by: Ali Hobbs <alisuehobbs@gmail.com>
This commit is contained in:
parent
4a642bfa2f
commit
7de4607b71
18 changed files with 1233 additions and 50 deletions
|
|
@ -8,8 +8,8 @@ targets:
|
||||||
rebuild-toolchain:
|
rebuild-toolchain:
|
||||||
description: build toolchain image
|
description: build toolchain image
|
||||||
watches:
|
watches:
|
||||||
- support/docker/toolchain
|
- support/toolchain/docker
|
||||||
build: support/docker/toolchain
|
build: support/toolchain/docker
|
||||||
|
|
||||||
toolchain:
|
toolchain:
|
||||||
description: placeholder for additional toolchain dependencies
|
description: placeholder for additional toolchain dependencies
|
||||||
|
|
|
||||||
|
|
@ -40,9 +40,13 @@ const (
|
||||||
LeaseTimeoutKey = "LeaseTimeout"
|
LeaseTimeoutKey = "LeaseTimeout"
|
||||||
SequenceNumberKey = "Checkpoint"
|
SequenceNumberKey = "Checkpoint"
|
||||||
ParentShardIdKey = "ParentShardId"
|
ParentShardIdKey = "ParentShardId"
|
||||||
|
ClaimRequestKey = "ClaimRequest"
|
||||||
|
|
||||||
// We've completely processed all records in this shard.
|
// We've completely processed all records in this shard.
|
||||||
ShardEnd = "SHARD_END"
|
ShardEnd = "SHARD_END"
|
||||||
|
|
||||||
|
// ErrShardClaimed is returned when shard is claimed
|
||||||
|
ErrShardClaimed = "Shard is already claimed by another node"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ErrLeaseNotAcquired struct {
|
type ErrLeaseNotAcquired struct {
|
||||||
|
|
@ -72,7 +76,17 @@ type Checkpointer interface {
|
||||||
|
|
||||||
// RemoveLeaseOwner to remove lease owner for the shard entry to make the shard available for reassignment
|
// RemoveLeaseOwner to remove lease owner for the shard entry to make the shard available for reassignment
|
||||||
RemoveLeaseOwner(string) error
|
RemoveLeaseOwner(string) error
|
||||||
|
|
||||||
|
// New Lease Stealing Methods
|
||||||
|
// ListActiveWorkers returns active workers and their shards
|
||||||
|
ListActiveWorkers(map[string]*par.ShardStatus) (map[string][]*par.ShardStatus, error)
|
||||||
|
|
||||||
|
// ClaimShard claims a shard for stealing
|
||||||
|
ClaimShard(*par.ShardStatus, string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// ErrSequenceIDNotFound is returned by FetchCheckpoint when no SequenceID is found
|
// ErrSequenceIDNotFound is returned by FetchCheckpoint when no SequenceID is found
|
||||||
var ErrSequenceIDNotFound = errors.New("SequenceIDNotFoundForShard")
|
var ErrSequenceIDNotFound = errors.New("SequenceIDNotFoundForShard")
|
||||||
|
|
||||||
|
// ErrShardNotAssigned is returned by ListActiveWorkers when no AssignedTo is found
|
||||||
|
var ErrShardNotAssigned = errors.New("AssignedToNotFoundForShard")
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,8 @@
|
||||||
package checkpoint
|
package checkpoint
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/aws/aws-sdk-go/aws"
|
"github.com/aws/aws-sdk-go/aws"
|
||||||
|
|
@ -61,6 +63,7 @@ type DynamoCheckpoint struct {
|
||||||
svc dynamodbiface.DynamoDBAPI
|
svc dynamodbiface.DynamoDBAPI
|
||||||
kclConfig *config.KinesisClientLibConfiguration
|
kclConfig *config.KinesisClientLibConfiguration
|
||||||
Retries int
|
Retries int
|
||||||
|
lastLeaseSync time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDynamoCheckpoint(kclConfig *config.KinesisClientLibConfiguration) *DynamoCheckpoint {
|
func NewDynamoCheckpoint(kclConfig *config.KinesisClientLibConfiguration) *DynamoCheckpoint {
|
||||||
|
|
@ -124,8 +127,22 @@ func (checkpointer *DynamoCheckpoint) GetLease(shard *par.ShardStatus, newAssign
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isClaimRequestExpired := shard.IsClaimRequestExpired(checkpointer.kclConfig)
|
||||||
|
|
||||||
|
var claimRequest string
|
||||||
|
if checkpointer.kclConfig.EnableLeaseStealing {
|
||||||
|
if currentCheckpointClaimRequest, ok := currentCheckpoint[ClaimRequestKey]; ok && currentCheckpointClaimRequest.S != nil {
|
||||||
|
claimRequest = *currentCheckpointClaimRequest.S
|
||||||
|
if newAssignTo != claimRequest && !isClaimRequestExpired {
|
||||||
|
checkpointer.log.Debugf("another worker: %s has a claim on this shard. Not going to renew the lease", claimRequest)
|
||||||
|
return errors.New(ErrShardClaimed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assignedVar, assignedToOk := currentCheckpoint[LeaseOwnerKey]
|
assignedVar, assignedToOk := currentCheckpoint[LeaseOwnerKey]
|
||||||
leaseVar, leaseTimeoutOk := currentCheckpoint[LeaseTimeoutKey]
|
leaseVar, leaseTimeoutOk := currentCheckpoint[LeaseTimeoutKey]
|
||||||
|
|
||||||
var conditionalExpression string
|
var conditionalExpression string
|
||||||
var expressionAttributeValues map[string]*dynamodb.AttributeValue
|
var expressionAttributeValues map[string]*dynamodb.AttributeValue
|
||||||
|
|
||||||
|
|
@ -140,8 +157,14 @@ func (checkpointer *DynamoCheckpoint) GetLease(shard *par.ShardStatus, newAssign
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if time.Now().UTC().Before(currentLeaseTimeout) && assignedTo != newAssignTo {
|
if checkpointer.kclConfig.EnableLeaseStealing {
|
||||||
return ErrLeaseNotAcquired{"current lease timeout not yet expired"}
|
if time.Now().UTC().Before(currentLeaseTimeout) && assignedTo != newAssignTo && !isClaimRequestExpired {
|
||||||
|
return ErrLeaseNotAcquired{"current lease timeout not yet expired"}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if time.Now().UTC().Before(currentLeaseTimeout) && assignedTo != newAssignTo {
|
||||||
|
return ErrLeaseNotAcquired{"current lease timeout not yet expired"}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
checkpointer.log.Debugf("Attempting to get a lock for shard: %s, leaseTimeout: %s, assignedTo: %s, newAssignedTo: %s", shard.ID, currentLeaseTimeout, assignedTo, newAssignTo)
|
checkpointer.log.Debugf("Attempting to get a lock for shard: %s, leaseTimeout: %s, assignedTo: %s, newAssignedTo: %s", shard.ID, currentLeaseTimeout, assignedTo, newAssignTo)
|
||||||
|
|
@ -175,9 +198,21 @@ func (checkpointer *DynamoCheckpoint) GetLease(shard *par.ShardStatus, newAssign
|
||||||
marshalledCheckpoint[ParentShardIdKey] = &dynamodb.AttributeValue{S: aws.String(shard.ParentShardId)}
|
marshalledCheckpoint[ParentShardIdKey] = &dynamodb.AttributeValue{S: aws.String(shard.ParentShardId)}
|
||||||
}
|
}
|
||||||
|
|
||||||
if shard.GetCheckpoint() != "" {
|
if checkpoint := shard.GetCheckpoint(); checkpoint != "" {
|
||||||
marshalledCheckpoint[SequenceNumberKey] = &dynamodb.AttributeValue{
|
marshalledCheckpoint[SequenceNumberKey] = &dynamodb.AttributeValue{
|
||||||
S: aws.String(shard.GetCheckpoint()),
|
S: aws.String(checkpoint),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if checkpointer.kclConfig.EnableLeaseStealing {
|
||||||
|
if claimRequest != "" && claimRequest == newAssignTo && !isClaimRequestExpired {
|
||||||
|
if expressionAttributeValues == nil {
|
||||||
|
expressionAttributeValues = make(map[string]*dynamodb.AttributeValue)
|
||||||
|
}
|
||||||
|
conditionalExpression = conditionalExpression + " AND ClaimRequest = :claim_request"
|
||||||
|
expressionAttributeValues[":claim_request"] = &dynamodb.AttributeValue{
|
||||||
|
S: &claimRequest,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -199,7 +234,7 @@ func (checkpointer *DynamoCheckpoint) GetLease(shard *par.ShardStatus, newAssign
|
||||||
|
|
||||||
// CheckpointSequence writes a checkpoint at the designated sequence ID
|
// CheckpointSequence writes a checkpoint at the designated sequence ID
|
||||||
func (checkpointer *DynamoCheckpoint) CheckpointSequence(shard *par.ShardStatus) error {
|
func (checkpointer *DynamoCheckpoint) CheckpointSequence(shard *par.ShardStatus) error {
|
||||||
leaseTimeout := shard.LeaseTimeout.UTC().Format(time.RFC3339)
|
leaseTimeout := shard.GetLeaseTimeout().UTC().Format(time.RFC3339)
|
||||||
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
||||||
LeaseKeyKey: {
|
LeaseKeyKey: {
|
||||||
S: aws.String(shard.ID),
|
S: aws.String(shard.ID),
|
||||||
|
|
@ -208,7 +243,7 @@ func (checkpointer *DynamoCheckpoint) CheckpointSequence(shard *par.ShardStatus)
|
||||||
S: aws.String(shard.GetCheckpoint()),
|
S: aws.String(shard.GetCheckpoint()),
|
||||||
},
|
},
|
||||||
LeaseOwnerKey: {
|
LeaseOwnerKey: {
|
||||||
S: aws.String(shard.AssignedTo),
|
S: aws.String(shard.GetLeaseOwner()),
|
||||||
},
|
},
|
||||||
LeaseTimeoutKey: {
|
LeaseTimeoutKey: {
|
||||||
S: aws.String(leaseTimeout),
|
S: aws.String(leaseTimeout),
|
||||||
|
|
@ -239,6 +274,16 @@ func (checkpointer *DynamoCheckpoint) FetchCheckpoint(shard *par.ShardStatus) er
|
||||||
if assignedTo, ok := checkpoint[LeaseOwnerKey]; ok {
|
if assignedTo, ok := checkpoint[LeaseOwnerKey]; ok {
|
||||||
shard.SetLeaseOwner(aws.StringValue(assignedTo.S))
|
shard.SetLeaseOwner(aws.StringValue(assignedTo.S))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use up-to-date leaseTimeout to avoid ConditionalCheckFailedException when claiming
|
||||||
|
if leaseTimeout, ok := checkpoint[LeaseTimeoutKey]; ok && leaseTimeout.S != nil {
|
||||||
|
currentLeaseTimeout, err := time.Parse(time.RFC3339, aws.StringValue(leaseTimeout.S))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
shard.LeaseTimeout = currentLeaseTimeout
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -265,6 +310,12 @@ func (checkpointer *DynamoCheckpoint) RemoveLeaseOwner(shardID string) error {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
UpdateExpression: aws.String("remove " + LeaseOwnerKey),
|
UpdateExpression: aws.String("remove " + LeaseOwnerKey),
|
||||||
|
ExpressionAttributeValues: map[string]*dynamodb.AttributeValue{
|
||||||
|
":assigned_to": {
|
||||||
|
S: aws.String(checkpointer.kclConfig.WorkerID),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ConditionExpression: aws.String("AssignedTo = :assigned_to"),
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := checkpointer.svc.UpdateItem(input)
|
_, err := checkpointer.svc.UpdateItem(input)
|
||||||
|
|
@ -272,6 +323,135 @@ func (checkpointer *DynamoCheckpoint) RemoveLeaseOwner(shardID string) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListActiveWorkers returns a map of workers and their shards
|
||||||
|
func (checkpointer *DynamoCheckpoint) ListActiveWorkers(shardStatus map[string]*par.ShardStatus) (map[string][]*par.ShardStatus, error) {
|
||||||
|
err := checkpointer.syncLeases(shardStatus)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
workers := map[string][]*par.ShardStatus{}
|
||||||
|
for _, shard := range shardStatus {
|
||||||
|
if shard.GetCheckpoint() == ShardEnd {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
leaseOwner := shard.GetLeaseOwner()
|
||||||
|
if leaseOwner == "" {
|
||||||
|
checkpointer.log.Debugf("Shard Not Assigned Error. ShardID: %s, WorkerID: %s", shard.ID, checkpointer.kclConfig.WorkerID)
|
||||||
|
return nil, ErrShardNotAssigned
|
||||||
|
}
|
||||||
|
if w, ok := workers[leaseOwner]; ok {
|
||||||
|
workers[leaseOwner] = append(w, shard)
|
||||||
|
} else {
|
||||||
|
workers[leaseOwner] = []*par.ShardStatus{shard}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return workers, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClaimShard places a claim request on a shard to signal a steal attempt
|
||||||
|
func (checkpointer *DynamoCheckpoint) ClaimShard(shard *par.ShardStatus, claimID string) error {
|
||||||
|
err := checkpointer.FetchCheckpoint(shard)
|
||||||
|
if err != nil && err != ErrSequenceIDNotFound {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
leaseTimeoutString := shard.GetLeaseTimeout().Format(time.RFC3339)
|
||||||
|
|
||||||
|
conditionalExpression := `ShardID = :id AND LeaseTimeout = :lease_timeout AND attribute_not_exists(ClaimRequest)`
|
||||||
|
expressionAttributeValues := map[string]*dynamodb.AttributeValue{
|
||||||
|
":id": {
|
||||||
|
S: aws.String(shard.ID),
|
||||||
|
},
|
||||||
|
":lease_timeout": {
|
||||||
|
S: aws.String(leaseTimeoutString),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
||||||
|
LeaseKeyKey: {
|
||||||
|
S: &shard.ID,
|
||||||
|
},
|
||||||
|
LeaseTimeoutKey: {
|
||||||
|
S: &leaseTimeoutString,
|
||||||
|
},
|
||||||
|
SequenceNumberKey: {
|
||||||
|
S: &shard.Checkpoint,
|
||||||
|
},
|
||||||
|
ClaimRequestKey: {
|
||||||
|
S: &claimID,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if leaseOwner := shard.GetLeaseOwner(); leaseOwner == "" {
|
||||||
|
conditionalExpression += " AND attribute_not_exists(AssignedTo)"
|
||||||
|
} else {
|
||||||
|
marshalledCheckpoint[LeaseOwnerKey] = &dynamodb.AttributeValue{S: &leaseOwner}
|
||||||
|
conditionalExpression += "AND AssignedTo = :assigned_to"
|
||||||
|
expressionAttributeValues[":assigned_to"] = &dynamodb.AttributeValue{S: &leaseOwner}
|
||||||
|
}
|
||||||
|
|
||||||
|
if checkpoint := shard.GetCheckpoint(); checkpoint == "" {
|
||||||
|
conditionalExpression += " AND attribute_not_exists(Checkpoint)"
|
||||||
|
} else if checkpoint == ShardEnd {
|
||||||
|
conditionalExpression += " AND Checkpoint <> :checkpoint"
|
||||||
|
expressionAttributeValues[":checkpoint"] = &dynamodb.AttributeValue{S: aws.String(ShardEnd)}
|
||||||
|
} else {
|
||||||
|
conditionalExpression += " AND Checkpoint = :checkpoint"
|
||||||
|
expressionAttributeValues[":checkpoint"] = &dynamodb.AttributeValue{S: &checkpoint}
|
||||||
|
}
|
||||||
|
|
||||||
|
if shard.ParentShardId == "" {
|
||||||
|
conditionalExpression += " AND attribute_not_exists(ParentShardId)"
|
||||||
|
} else {
|
||||||
|
marshalledCheckpoint[ParentShardIdKey] = &dynamodb.AttributeValue{S: aws.String(shard.ParentShardId)}
|
||||||
|
conditionalExpression += " AND ParentShardId = :parent_shard"
|
||||||
|
expressionAttributeValues[":parent_shard"] = &dynamodb.AttributeValue{S: &shard.ParentShardId}
|
||||||
|
}
|
||||||
|
|
||||||
|
return checkpointer.conditionalUpdate(conditionalExpression, expressionAttributeValues, marshalledCheckpoint)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (checkpointer *DynamoCheckpoint) syncLeases(shardStatus map[string]*par.ShardStatus) error {
|
||||||
|
log := checkpointer.kclConfig.Logger
|
||||||
|
|
||||||
|
if (checkpointer.lastLeaseSync.Add(time.Duration(checkpointer.kclConfig.LeaseSyncingTimeIntervalMillis) * time.Millisecond)).After(time.Now()) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpointer.lastLeaseSync = time.Now()
|
||||||
|
input := &dynamodb.ScanInput{
|
||||||
|
ProjectionExpression: aws.String(fmt.Sprintf("%s,%s,%s", LeaseKeyKey, LeaseOwnerKey, SequenceNumberKey)),
|
||||||
|
Select: aws.String("SPECIFIC_ATTRIBUTES"),
|
||||||
|
TableName: aws.String(checkpointer.kclConfig.TableName),
|
||||||
|
}
|
||||||
|
|
||||||
|
err := checkpointer.svc.ScanPages(input,
|
||||||
|
func(pages *dynamodb.ScanOutput, lastPage bool) bool {
|
||||||
|
results := pages.Items
|
||||||
|
for _, result := range results {
|
||||||
|
shardId, foundShardId := result[LeaseKeyKey]
|
||||||
|
assignedTo, foundAssignedTo := result[LeaseOwnerKey]
|
||||||
|
checkpoint, foundCheckpoint := result[SequenceNumberKey]
|
||||||
|
if !foundShardId || !foundAssignedTo || !foundCheckpoint {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if shard, ok := shardStatus[aws.StringValue(shardId.S)]; ok {
|
||||||
|
shard.SetLeaseOwner(aws.StringValue(assignedTo.S))
|
||||||
|
shard.SetCheckpoint(aws.StringValue(checkpoint.S))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return !lastPage
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("Error performing SyncLeases. Error: %+v ", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Debugf("Lease sync completed. Next lease sync will occur in %s", time.Duration(checkpointer.kclConfig.LeaseSyncingTimeIntervalMillis)*time.Millisecond)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (checkpointer *DynamoCheckpoint) createTable() error {
|
func (checkpointer *DynamoCheckpoint) createTable() error {
|
||||||
input := &dynamodb.CreateTableInput{
|
input := &dynamodb.CreateTableInput{
|
||||||
AttributeDefinitions: []*dynamodb.AttributeDefinition{
|
AttributeDefinitions: []*dynamodb.AttributeDefinition{
|
||||||
|
|
|
||||||
|
|
@ -85,6 +85,7 @@ func TestGetLeaseNotAquired(t *testing.T) {
|
||||||
Checkpoint: "",
|
Checkpoint: "",
|
||||||
Mux: &sync.RWMutex{},
|
Mux: &sync.RWMutex{},
|
||||||
}, "ijkl-mnop")
|
}, "ijkl-mnop")
|
||||||
|
|
||||||
if err == nil || !errors.As(err, &ErrLeaseNotAcquired{}) {
|
if err == nil || !errors.As(err, &ErrLeaseNotAcquired{}) {
|
||||||
t.Errorf("Got a lease when it was already held by abcd-efgh: %s", err)
|
t.Errorf("Got a lease when it was already held by abcd-efgh: %s", err)
|
||||||
}
|
}
|
||||||
|
|
@ -102,16 +103,16 @@ func TestGetLeaseAquired(t *testing.T) {
|
||||||
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
checkpoint.Init()
|
checkpoint.Init()
|
||||||
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
||||||
"ShardID": {
|
LeaseKeyKey: {
|
||||||
S: aws.String("0001"),
|
S: aws.String("0001"),
|
||||||
},
|
},
|
||||||
"AssignedTo": {
|
LeaseOwnerKey: {
|
||||||
S: aws.String("abcd-efgh"),
|
S: aws.String("abcd-efgh"),
|
||||||
},
|
},
|
||||||
"LeaseTimeout": {
|
LeaseTimeoutKey: {
|
||||||
S: aws.String(time.Now().AddDate(0, -1, 0).UTC().Format(time.RFC3339)),
|
S: aws.String(time.Now().AddDate(0, -1, 0).UTC().Format(time.RFC3339)),
|
||||||
},
|
},
|
||||||
"SequenceID": {
|
SequenceNumberKey: {
|
||||||
S: aws.String("deadbeef"),
|
S: aws.String("deadbeef"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
@ -156,10 +157,221 @@ func TestGetLeaseAquired(t *testing.T) {
|
||||||
assert.Equal(t, "", status.GetLeaseOwner())
|
assert.Equal(t, "", status.GetLeaseOwner())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetLeaseShardClaimed(t *testing.T) {
|
||||||
|
leaseTimeout := time.Now().Add(-100 * time.Second).UTC()
|
||||||
|
svc := &mockDynamoDB{
|
||||||
|
tableExist: true,
|
||||||
|
item: map[string]*dynamodb.AttributeValue{
|
||||||
|
ClaimRequestKey: {S: aws.String("ijkl-mnop")},
|
||||||
|
LeaseTimeoutKey: {S: aws.String(leaseTimeout.Format(time.RFC3339))},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithMaxLeasesForWorker(1).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(300000).
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
checkpoint.Init()
|
||||||
|
err := checkpoint.GetLease(&par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "",
|
||||||
|
LeaseTimeout: leaseTimeout,
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}, "abcd-efgh")
|
||||||
|
if err == nil || err.Error() != ErrShardClaimed {
|
||||||
|
t.Errorf("Got a lease when it was already claimed by by ijkl-mnop: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = checkpoint.GetLease(&par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "",
|
||||||
|
LeaseTimeout: leaseTimeout,
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}, "ijkl-mnop")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error getting lease %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetLeaseClaimRequestExpiredOwner(t *testing.T) {
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithMaxLeasesForWorker(1).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(300000).
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
// Not expired
|
||||||
|
leaseTimeout := time.Now().
|
||||||
|
Add(-time.Duration(kclConfig.LeaseStealingClaimTimeoutMillis) * time.Millisecond).
|
||||||
|
Add(1 * time.Second).
|
||||||
|
UTC()
|
||||||
|
|
||||||
|
svc := &mockDynamoDB{
|
||||||
|
tableExist: true,
|
||||||
|
item: map[string]*dynamodb.AttributeValue{
|
||||||
|
LeaseOwnerKey: {S: aws.String("abcd-efgh")},
|
||||||
|
ClaimRequestKey: {S: aws.String("ijkl-mnop")},
|
||||||
|
LeaseTimeoutKey: {S: aws.String(leaseTimeout.Format(time.RFC3339))},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
checkpoint.Init()
|
||||||
|
err := checkpoint.GetLease(&par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "",
|
||||||
|
LeaseTimeout: leaseTimeout,
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}, "abcd-efgh")
|
||||||
|
if err == nil || err.Error() != ErrShardClaimed {
|
||||||
|
t.Errorf("Got a lease when it was already claimed by ijkl-mnop: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetLeaseClaimRequestExpiredClaimer(t *testing.T) {
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithMaxLeasesForWorker(1).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(300000).
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
// Not expired
|
||||||
|
leaseTimeout := time.Now().
|
||||||
|
Add(-time.Duration(kclConfig.LeaseStealingClaimTimeoutMillis) * time.Millisecond).
|
||||||
|
Add(121 * time.Second).
|
||||||
|
UTC()
|
||||||
|
|
||||||
|
svc := &mockDynamoDB{
|
||||||
|
tableExist: true,
|
||||||
|
item: map[string]*dynamodb.AttributeValue{
|
||||||
|
LeaseOwnerKey: {S: aws.String("abcd-efgh")},
|
||||||
|
ClaimRequestKey: {S: aws.String("ijkl-mnop")},
|
||||||
|
LeaseTimeoutKey: {S: aws.String(leaseTimeout.Format(time.RFC3339))},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
checkpoint.Init()
|
||||||
|
err := checkpoint.GetLease(&par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "",
|
||||||
|
LeaseTimeout: leaseTimeout,
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}, "ijkl-mnop")
|
||||||
|
if err == nil || !errors.As(err, &ErrLeaseNotAcquired{}) {
|
||||||
|
t.Errorf("Got a lease when it was already claimed by ijkl-mnop: %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFetchCheckpointWithStealing(t *testing.T) {
|
||||||
|
future := time.Now().AddDate(0, 1, 0)
|
||||||
|
|
||||||
|
svc := &mockDynamoDB{
|
||||||
|
tableExist: true,
|
||||||
|
item: map[string]*dynamodb.AttributeValue{
|
||||||
|
SequenceNumberKey: {S: aws.String("deadbeef")},
|
||||||
|
LeaseOwnerKey: {S: aws.String("abcd-efgh")},
|
||||||
|
LeaseTimeoutKey: {
|
||||||
|
S: aws.String(future.Format(time.RFC3339)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithMaxLeasesForWorker(1).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(300000).
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
checkpoint.Init()
|
||||||
|
|
||||||
|
status := &par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "",
|
||||||
|
LeaseTimeout: time.Now(),
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}
|
||||||
|
|
||||||
|
checkpoint.FetchCheckpoint(status)
|
||||||
|
|
||||||
|
leaseTimeout, _ := time.Parse(time.RFC3339, *svc.item[LeaseTimeoutKey].S)
|
||||||
|
assert.Equal(t, leaseTimeout, status.LeaseTimeout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetLeaseConditional(t *testing.T) {
|
||||||
|
svc := &mockDynamoDB{tableExist: true, item: map[string]*dynamodb.AttributeValue{}}
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithMaxLeasesForWorker(1).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(300000).
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
checkpoint.Init()
|
||||||
|
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
||||||
|
LeaseKeyKey: {
|
||||||
|
S: aws.String("0001"),
|
||||||
|
},
|
||||||
|
LeaseOwnerKey: {
|
||||||
|
S: aws.String("abcd-efgh"),
|
||||||
|
},
|
||||||
|
LeaseTimeoutKey: {
|
||||||
|
S: aws.String(time.Now().Add(-1 * time.Second).UTC().Format(time.RFC3339)),
|
||||||
|
},
|
||||||
|
SequenceNumberKey: {
|
||||||
|
S: aws.String("deadbeef"),
|
||||||
|
},
|
||||||
|
ClaimRequestKey: {
|
||||||
|
S: aws.String("ijkl-mnop"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
input := &dynamodb.PutItemInput{
|
||||||
|
TableName: aws.String("TableName"),
|
||||||
|
Item: marshalledCheckpoint,
|
||||||
|
}
|
||||||
|
checkpoint.svc.PutItem(input)
|
||||||
|
shard := &par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "deadbeef",
|
||||||
|
ClaimRequest: "ijkl-mnop",
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}
|
||||||
|
err := checkpoint.FetchCheckpoint(shard)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Could not fetch checkpoint %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = checkpoint.GetLease(shard, "ijkl-mnop")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Lease not aquired after timeout %s", err)
|
||||||
|
}
|
||||||
|
assert.Equal(t, *svc.expressionAttributeValues[":claim_request"].S, "ijkl-mnop")
|
||||||
|
assert.Contains(t, svc.conditionalExpression, " AND ClaimRequest = :claim_request")
|
||||||
|
}
|
||||||
|
|
||||||
type mockDynamoDB struct {
|
type mockDynamoDB struct {
|
||||||
dynamodbiface.DynamoDBAPI
|
dynamodbiface.DynamoDBAPI
|
||||||
tableExist bool
|
tableExist bool
|
||||||
item map[string]*dynamodb.AttributeValue
|
item map[string]*dynamodb.AttributeValue
|
||||||
|
conditionalExpression string
|
||||||
|
expressionAttributeValues map[string]*dynamodb.AttributeValue
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mockDynamoDB) ScanPages(*dynamodb.ScanInput, func(*dynamodb.ScanOutput, bool) bool) error {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mockDynamoDB) DescribeTable(*dynamodb.DescribeTableInput) (*dynamodb.DescribeTableOutput, error) {
|
func (m *mockDynamoDB) DescribeTable(*dynamodb.DescribeTableInput) (*dynamodb.DescribeTableOutput, error) {
|
||||||
|
|
@ -192,6 +404,16 @@ func (m *mockDynamoDB) PutItem(input *dynamodb.PutItemInput) (*dynamodb.PutItemO
|
||||||
m.item[ParentShardIdKey] = parent
|
m.item[ParentShardIdKey] = parent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if claimRequest, ok := item[ClaimRequestKey]; ok {
|
||||||
|
m.item[ClaimRequestKey] = claimRequest
|
||||||
|
}
|
||||||
|
|
||||||
|
if input.ConditionExpression != nil {
|
||||||
|
m.conditionalExpression = *input.ConditionExpression
|
||||||
|
}
|
||||||
|
|
||||||
|
m.expressionAttributeValues = input.ExpressionAttributeValues
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -214,3 +436,124 @@ func (m *mockDynamoDB) UpdateItem(input *dynamodb.UpdateItemInput) (*dynamodb.Up
|
||||||
func (m *mockDynamoDB) CreateTable(input *dynamodb.CreateTableInput) (*dynamodb.CreateTableOutput, error) {
|
func (m *mockDynamoDB) CreateTable(input *dynamodb.CreateTableInput) (*dynamodb.CreateTableOutput, error) {
|
||||||
return &dynamodb.CreateTableOutput{}, nil
|
return &dynamodb.CreateTableOutput{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestListActiveWorkers(t *testing.T) {
|
||||||
|
svc := &mockDynamoDB{tableExist: true, item: map[string]*dynamodb.AttributeValue{}}
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
err := checkpoint.Init()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Checkpoint initialization failed: %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
shardStatus := map[string]*par.ShardStatus{
|
||||||
|
"0000": {ID: "0000", AssignedTo: "worker_1", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0001": {ID: "0001", AssignedTo: "worker_2", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0002": {ID: "0002", AssignedTo: "worker_4", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0003": {ID: "0003", AssignedTo: "worker_0", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0004": {ID: "0004", AssignedTo: "worker_1", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0005": {ID: "0005", AssignedTo: "worker_3", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0006": {ID: "0006", AssignedTo: "worker_3", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0007": {ID: "0007", AssignedTo: "worker_0", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0008": {ID: "0008", AssignedTo: "worker_4", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0009": {ID: "0009", AssignedTo: "worker_2", Checkpoint: "", Mux: &sync.RWMutex{}},
|
||||||
|
"0010": {ID: "0010", AssignedTo: "worker_0", Checkpoint: ShardEnd, Mux: &sync.RWMutex{}},
|
||||||
|
}
|
||||||
|
|
||||||
|
workers, err := checkpoint.ListActiveWorkers(shardStatus)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for workerID, shards := range workers {
|
||||||
|
assert.Equal(t, 2, len(shards))
|
||||||
|
for _, shard := range shards {
|
||||||
|
assert.Equal(t, workerID, shard.AssignedTo)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestListActiveWorkersErrShardNotAssigned(t *testing.T) {
|
||||||
|
svc := &mockDynamoDB{tableExist: true, item: map[string]*dynamodb.AttributeValue{}}
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
err := checkpoint.Init()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Checkpoint initialization failed: %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
shardStatus := map[string]*par.ShardStatus{
|
||||||
|
"0000": {ID: "0000", Mux: &sync.RWMutex{}},
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = checkpoint.ListActiveWorkers(shardStatus)
|
||||||
|
if err != ErrShardNotAssigned {
|
||||||
|
t.Error("Expected ErrShardNotAssigned when shard is missing AssignedTo value")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClaimShard(t *testing.T) {
|
||||||
|
svc := &mockDynamoDB{tableExist: true, item: map[string]*dynamodb.AttributeValue{}}
|
||||||
|
kclConfig := cfg.NewKinesisClientLibConfig("appName", "test", "us-west-2", "abc").
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithMaxLeasesForWorker(1).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(300000).
|
||||||
|
WithLeaseStealing(true)
|
||||||
|
|
||||||
|
checkpoint := NewDynamoCheckpoint(kclConfig).WithDynamoDB(svc)
|
||||||
|
checkpoint.Init()
|
||||||
|
|
||||||
|
marshalledCheckpoint := map[string]*dynamodb.AttributeValue{
|
||||||
|
"ShardID": {
|
||||||
|
S: aws.String("0001"),
|
||||||
|
},
|
||||||
|
"AssignedTo": {
|
||||||
|
S: aws.String("abcd-efgh"),
|
||||||
|
},
|
||||||
|
"LeaseTimeout": {
|
||||||
|
S: aws.String(time.Now().AddDate(0, -1, 0).UTC().Format(time.RFC3339)),
|
||||||
|
},
|
||||||
|
"Checkpoint": {
|
||||||
|
S: aws.String("deadbeef"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
input := &dynamodb.PutItemInput{
|
||||||
|
TableName: aws.String("TableName"),
|
||||||
|
Item: marshalledCheckpoint,
|
||||||
|
}
|
||||||
|
checkpoint.svc.PutItem(input)
|
||||||
|
shard := &par.ShardStatus{
|
||||||
|
ID: "0001",
|
||||||
|
Checkpoint: "deadbeef",
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}
|
||||||
|
|
||||||
|
err := checkpoint.ClaimShard(shard, "ijkl-mnop")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Shard not claimed %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
claimRequest, ok := svc.item[ClaimRequestKey]
|
||||||
|
if !ok {
|
||||||
|
t.Error("Expected claimRequest to be set by ClaimShard")
|
||||||
|
} else if *claimRequest.S != "ijkl-mnop" {
|
||||||
|
t.Errorf("Expected checkpoint to be ijkl-mnop. Got '%s'", *claimRequest.S)
|
||||||
|
}
|
||||||
|
|
||||||
|
status := &par.ShardStatus{
|
||||||
|
ID: shard.ID,
|
||||||
|
Mux: &sync.RWMutex{},
|
||||||
|
}
|
||||||
|
checkpoint.FetchCheckpoint(status)
|
||||||
|
|
||||||
|
// asiggnedTo, checkpointer, and parent shard id should be the same
|
||||||
|
assert.Equal(t, shard.AssignedTo, status.AssignedTo)
|
||||||
|
assert.Equal(t, shard.Checkpoint, status.Checkpoint)
|
||||||
|
assert.Equal(t, shard.ParentShardId, status.ParentShardId)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -122,6 +122,18 @@ const (
|
||||||
|
|
||||||
// The amount of milliseconds to wait before graceful shutdown forcefully terminates.
|
// The amount of milliseconds to wait before graceful shutdown forcefully terminates.
|
||||||
DefaultShutdownGraceMillis = 5000
|
DefaultShutdownGraceMillis = 5000
|
||||||
|
|
||||||
|
// Lease stealing defaults to false for backwards compatibility.
|
||||||
|
DefaultEnableLeaseStealing = false
|
||||||
|
|
||||||
|
// Interval between rebalance tasks defaults to 5 seconds.
|
||||||
|
DefaultLeaseStealingIntervalMillis = 5000
|
||||||
|
|
||||||
|
// Number of milliseconds to wait before another worker can aquire a claimed shard
|
||||||
|
DefaultLeaseStealingClaimTimeoutMillis = 120000
|
||||||
|
|
||||||
|
// Number of milliseconds to wait before syncing with lease table (dynamodDB)
|
||||||
|
DefaultLeaseSyncingIntervalMillis = 60000
|
||||||
)
|
)
|
||||||
|
|
||||||
type (
|
type (
|
||||||
|
|
@ -257,6 +269,18 @@ type (
|
||||||
|
|
||||||
// MonitoringService publishes per worker-scoped metrics.
|
// MonitoringService publishes per worker-scoped metrics.
|
||||||
MonitoringService metrics.MonitoringService
|
MonitoringService metrics.MonitoringService
|
||||||
|
|
||||||
|
// EnableLeaseStealing turns on lease stealing
|
||||||
|
EnableLeaseStealing bool
|
||||||
|
|
||||||
|
// LeaseStealingIntervalMillis The number of milliseconds between rebalance tasks
|
||||||
|
LeaseStealingIntervalMillis int
|
||||||
|
|
||||||
|
// LeaseStealingClaimTimeoutMillis The number of milliseconds to wait before another worker can aquire a claimed shard
|
||||||
|
LeaseStealingClaimTimeoutMillis int
|
||||||
|
|
||||||
|
// LeaseSyncingTimeInterval The number of milliseconds to wait before syncing with lease table (dynamoDB)
|
||||||
|
LeaseSyncingTimeIntervalMillis int
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,9 +39,35 @@ func TestConfig(t *testing.T) {
|
||||||
assert.Equal(t, "appName", kclConfig.ApplicationName)
|
assert.Equal(t, "appName", kclConfig.ApplicationName)
|
||||||
assert.Equal(t, 500, kclConfig.FailoverTimeMillis)
|
assert.Equal(t, 500, kclConfig.FailoverTimeMillis)
|
||||||
assert.Equal(t, 10, kclConfig.TaskBackoffTimeMillis)
|
assert.Equal(t, 10, kclConfig.TaskBackoffTimeMillis)
|
||||||
|
|
||||||
assert.True(t, kclConfig.EnableEnhancedFanOutConsumer)
|
assert.True(t, kclConfig.EnableEnhancedFanOutConsumer)
|
||||||
assert.Equal(t, "fan-out-consumer", kclConfig.EnhancedFanOutConsumerName)
|
assert.Equal(t, "fan-out-consumer", kclConfig.EnhancedFanOutConsumerName)
|
||||||
|
|
||||||
|
assert.Equal(t, false, kclConfig.EnableLeaseStealing)
|
||||||
|
assert.Equal(t, 5000, kclConfig.LeaseStealingIntervalMillis)
|
||||||
|
|
||||||
|
contextLogger := kclConfig.Logger.WithFields(logger.Fields{"key1": "value1"})
|
||||||
|
contextLogger.Debugf("Starting with default logger")
|
||||||
|
contextLogger.Infof("Default logger is awesome")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfigLeaseStealing(t *testing.T) {
|
||||||
|
kclConfig := NewKinesisClientLibConfig("appName", "StreamName", "us-west-2", "workerId").
|
||||||
|
WithFailoverTimeMillis(500).
|
||||||
|
WithMaxRecords(100).
|
||||||
|
WithInitialPositionInStream(TRIM_HORIZON).
|
||||||
|
WithIdleTimeBetweenReadsInMillis(20).
|
||||||
|
WithCallProcessRecordsEvenForEmptyRecordList(true).
|
||||||
|
WithTaskBackoffTimeMillis(10).
|
||||||
|
WithLeaseStealing(true).
|
||||||
|
WithLeaseStealingIntervalMillis(10000)
|
||||||
|
|
||||||
|
assert.Equal(t, "appName", kclConfig.ApplicationName)
|
||||||
|
assert.Equal(t, 500, kclConfig.FailoverTimeMillis)
|
||||||
|
assert.Equal(t, 10, kclConfig.TaskBackoffTimeMillis)
|
||||||
|
assert.Equal(t, true, kclConfig.EnableLeaseStealing)
|
||||||
|
assert.Equal(t, 10000, kclConfig.LeaseStealingIntervalMillis)
|
||||||
|
|
||||||
contextLogger := kclConfig.Logger.WithFields(logger.Fields{"key1": "value1"})
|
contextLogger := kclConfig.Logger.WithFields(logger.Fields{"key1": "value1"})
|
||||||
contextLogger.Debugf("Starting with default logger")
|
contextLogger.Debugf("Starting with default logger")
|
||||||
contextLogger.Infof("Default logger is awesome")
|
contextLogger.Infof("Default logger is awesome")
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,11 @@ func NewKinesisClientLibConfigWithCredentials(applicationName, streamName, regio
|
||||||
InitialLeaseTableReadCapacity: DefaultInitialLeaseTableReadCapacity,
|
InitialLeaseTableReadCapacity: DefaultInitialLeaseTableReadCapacity,
|
||||||
InitialLeaseTableWriteCapacity: DefaultInitialLeaseTableWriteCapacity,
|
InitialLeaseTableWriteCapacity: DefaultInitialLeaseTableWriteCapacity,
|
||||||
SkipShardSyncAtWorkerInitializationIfLeasesExist: DefaultSkipShardSyncAtStartupIfLeasesExist,
|
SkipShardSyncAtWorkerInitializationIfLeasesExist: DefaultSkipShardSyncAtStartupIfLeasesExist,
|
||||||
Logger: logger.GetDefaultLogger(),
|
EnableLeaseStealing: DefaultEnableLeaseStealing,
|
||||||
|
LeaseStealingIntervalMillis: DefaultLeaseStealingIntervalMillis,
|
||||||
|
LeaseStealingClaimTimeoutMillis: DefaultLeaseStealingClaimTimeoutMillis,
|
||||||
|
LeaseSyncingTimeIntervalMillis: DefaultLeaseSyncingIntervalMillis,
|
||||||
|
Logger: logger.GetDefaultLogger(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -241,3 +245,18 @@ func (c *KinesisClientLibConfiguration) WithEnhancedFanOutConsumerARN(consumerAR
|
||||||
c.EnableEnhancedFanOutConsumer = true
|
c.EnableEnhancedFanOutConsumer = true
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *KinesisClientLibConfiguration) WithLeaseStealing(enableLeaseStealing bool) *KinesisClientLibConfiguration {
|
||||||
|
c.EnableLeaseStealing = enableLeaseStealing
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *KinesisClientLibConfiguration) WithLeaseStealingIntervalMillis(leaseStealingIntervalMillis int) *KinesisClientLibConfiguration {
|
||||||
|
c.LeaseStealingIntervalMillis = leaseStealingIntervalMillis
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *KinesisClientLibConfiguration) WithLeaseSyncingIntervalMillis(leaseSyncingIntervalMillis int) *KinesisClientLibConfiguration {
|
||||||
|
c.LeaseSyncingTimeIntervalMillis = leaseSyncingIntervalMillis
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,8 @@ package worker
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/vmware/vmware-go-kcl/clientlibrary/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ShardStatus struct {
|
type ShardStatus struct {
|
||||||
|
|
@ -43,6 +45,7 @@ type ShardStatus struct {
|
||||||
StartingSequenceNumber string
|
StartingSequenceNumber string
|
||||||
// child shard doesn't have end sequence number
|
// child shard doesn't have end sequence number
|
||||||
EndingSequenceNumber string
|
EndingSequenceNumber string
|
||||||
|
ClaimRequest string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ss *ShardStatus) GetLeaseOwner() string {
|
func (ss *ShardStatus) GetLeaseOwner() string {
|
||||||
|
|
@ -68,3 +71,24 @@ func (ss *ShardStatus) SetCheckpoint(c string) {
|
||||||
defer ss.Mux.Unlock()
|
defer ss.Mux.Unlock()
|
||||||
ss.Checkpoint = c
|
ss.Checkpoint = c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ss *ShardStatus) GetLeaseTimeout() time.Time {
|
||||||
|
ss.Mux.Lock()
|
||||||
|
defer ss.Mux.Unlock()
|
||||||
|
return ss.LeaseTimeout
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ss *ShardStatus) SetLeaseTimeout(timeout time.Time) {
|
||||||
|
ss.Mux.Lock()
|
||||||
|
defer ss.Mux.Unlock()
|
||||||
|
ss.LeaseTimeout = timeout
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ss *ShardStatus) IsClaimRequestExpired(kclConfig *config.KinesisClientLibConfiguration) bool {
|
||||||
|
if leaseTimeout := ss.GetLeaseTimeout(); leaseTimeout.IsZero() {
|
||||||
|
return false
|
||||||
|
} else {
|
||||||
|
return leaseTimeout.
|
||||||
|
Before(time.Now().UTC().Add(time.Duration(-kclConfig.LeaseStealingClaimTimeoutMillis) * time.Millisecond))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,7 @@ func (sc *PollingShardConsumer) getRecords() error {
|
||||||
retriedErrors := 0
|
retriedErrors := 0
|
||||||
|
|
||||||
for {
|
for {
|
||||||
if time.Now().UTC().After(sc.shard.LeaseTimeout.Add(-time.Duration(sc.kclConfig.LeaseRefreshPeriodMillis) * time.Millisecond)) {
|
if time.Now().UTC().After(sc.shard.GetLeaseTimeout().Add(-time.Duration(sc.kclConfig.LeaseRefreshPeriodMillis) * time.Millisecond)) {
|
||||||
log.Debugf("Refreshing lease on shard: %s for worker: %s", sc.shard.ID, sc.consumerID)
|
log.Debugf("Refreshing lease on shard: %s for worker: %s", sc.shard.ID, sc.consumerID)
|
||||||
err = sc.checkpointer.GetLease(sc.shard, sc.consumerID)
|
err = sc.checkpointer.GetLease(sc.shard, sc.consumerID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,8 @@ type Worker struct {
|
||||||
|
|
||||||
rng *rand.Rand
|
rng *rand.Rand
|
||||||
|
|
||||||
shardStatus map[string]*par.ShardStatus
|
shardStatus map[string]*par.ShardStatus
|
||||||
|
shardStealInProgress bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewWorker constructs a Worker instance for processing Kinesis stream data.
|
// NewWorker constructs a Worker instance for processing Kinesis stream data.
|
||||||
|
|
@ -271,7 +272,7 @@ func (w *Worker) eventLoop() {
|
||||||
log.Infof("Found %d shards", foundShards)
|
log.Infof("Found %d shards", foundShards)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count the number of leases hold by this worker excluding the processed shard
|
// Count the number of leases held by this worker excluding the processed shard
|
||||||
counter := 0
|
counter := 0
|
||||||
for _, shard := range w.shardStatus {
|
for _, shard := range w.shardStatus {
|
||||||
if shard.GetLeaseOwner() == w.workerID && shard.GetCheckpoint() != chk.ShardEnd {
|
if shard.GetLeaseOwner() == w.workerID && shard.GetCheckpoint() != chk.ShardEnd {
|
||||||
|
|
@ -302,6 +303,20 @@ func (w *Worker) eventLoop() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var stealShard bool
|
||||||
|
if w.kclConfig.EnableLeaseStealing && shard.ClaimRequest != "" {
|
||||||
|
upcomingStealingInterval := time.Now().UTC().Add(time.Duration(w.kclConfig.LeaseStealingIntervalMillis) * time.Millisecond)
|
||||||
|
if shard.GetLeaseTimeout().Before(upcomingStealingInterval) && !shard.IsClaimRequestExpired(w.kclConfig) {
|
||||||
|
if shard.ClaimRequest == w.workerID {
|
||||||
|
stealShard = true
|
||||||
|
log.Debugf("Stealing shard: %s", shard.ID)
|
||||||
|
} else {
|
||||||
|
log.Debugf("Shard being stolen: %s", shard.ID)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = w.checkpointer.GetLease(shard, w.workerID)
|
err = w.checkpointer.GetLease(shard, w.workerID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// cannot get lease on the shard
|
// cannot get lease on the shard
|
||||||
|
|
@ -311,6 +326,11 @@ func (w *Worker) eventLoop() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if stealShard {
|
||||||
|
log.Debugf("Successfully stole shard: %+v", shard.ID)
|
||||||
|
w.shardStealInProgress = false
|
||||||
|
}
|
||||||
|
|
||||||
// log metrics on got lease
|
// log metrics on got lease
|
||||||
w.mService.LeaseGained(shard.ID)
|
w.mService.LeaseGained(shard.ID)
|
||||||
w.waitGroup.Add(1)
|
w.waitGroup.Add(1)
|
||||||
|
|
@ -325,6 +345,13 @@ func (w *Worker) eventLoop() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if w.kclConfig.EnableLeaseStealing {
|
||||||
|
err = w.rebalance()
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error in rebalance: %+v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-*w.stop:
|
case <-*w.stop:
|
||||||
log.Infof("Shutting down...")
|
log.Infof("Shutting down...")
|
||||||
|
|
@ -335,6 +362,90 @@ func (w *Worker) eventLoop() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (w *Worker) rebalance() error {
|
||||||
|
log := w.kclConfig.Logger
|
||||||
|
|
||||||
|
workers, err := w.checkpointer.ListActiveWorkers(w.shardStatus)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("Error listing workers. workerID: %s. Error: %+v ", w.workerID, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only attempt to steal one shard at at time, to allow for linear convergence
|
||||||
|
if w.shardStealInProgress {
|
||||||
|
shardInfo := make(map[string]bool)
|
||||||
|
err := w.getShardIDs("", shardInfo)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, shard := range w.shardStatus {
|
||||||
|
if shard.ClaimRequest != "" && shard.ClaimRequest == w.workerID {
|
||||||
|
log.Debugf("Steal in progress. workerID: %s", w.workerID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Our shard steal was stomped on by a Checkpoint.
|
||||||
|
// We could deal with that, but instead just try again
|
||||||
|
w.shardStealInProgress = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var numShards int
|
||||||
|
for _, shards := range workers {
|
||||||
|
numShards += len(shards)
|
||||||
|
}
|
||||||
|
|
||||||
|
numWorkers := len(workers)
|
||||||
|
|
||||||
|
// 1:1 shards to workers is optimal, so we cannot possibly rebalance
|
||||||
|
if numWorkers >= numShards {
|
||||||
|
log.Debugf("Optimal shard allocation, not stealing any shards. workerID: %s, %v > %v. ", w.workerID, numWorkers, numShards)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
currentShards, ok := workers[w.workerID]
|
||||||
|
var numCurrentShards int
|
||||||
|
if !ok {
|
||||||
|
numCurrentShards = 0
|
||||||
|
numWorkers++
|
||||||
|
} else {
|
||||||
|
numCurrentShards = len(currentShards)
|
||||||
|
}
|
||||||
|
|
||||||
|
optimalShards := numShards / numWorkers
|
||||||
|
|
||||||
|
// We have more than or equal optimal shards, so no rebalancing can take place
|
||||||
|
if numCurrentShards >= optimalShards || numCurrentShards == w.kclConfig.MaxLeasesForWorker {
|
||||||
|
log.Debugf("We have enough shards, not attempting to steal any. workerID: %s", w.workerID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
maxShards := int(optimalShards)
|
||||||
|
var workerSteal string
|
||||||
|
for worker, shards := range workers {
|
||||||
|
if worker != w.workerID && len(shards) > maxShards {
|
||||||
|
workerSteal = worker
|
||||||
|
maxShards = len(shards)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Not all shards are allocated so fallback to default shard allocation mechanisms
|
||||||
|
if workerSteal == "" {
|
||||||
|
log.Infof("Not all shards are allocated, not stealing any. workerID: %s", w.workerID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Steal a random shard from the worker with the most shards
|
||||||
|
w.shardStealInProgress = true
|
||||||
|
randIndex := rand.Intn(len(workers[workerSteal]))
|
||||||
|
shardToSteal := workers[workerSteal][randIndex]
|
||||||
|
log.Debugf("Stealing shard %s from %s", shardToSteal, workerSteal)
|
||||||
|
|
||||||
|
err = w.checkpointer.ClaimShard(w.shardStatus[shardToSteal.ID], w.workerID)
|
||||||
|
if err != nil {
|
||||||
|
w.shardStealInProgress = false
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// List all shards and store them into shardStatus table
|
// List all shards and store them into shardStatus table
|
||||||
// If shard has been removed, need to exclude it from cached shard status.
|
// If shard has been removed, need to exclude it from cached shard status.
|
||||||
func (w *Worker) getShardIDs(nextToken string, shardInfo map[string]bool) error {
|
func (w *Worker) getShardIDs(nextToken string, shardInfo map[string]bool) error {
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
FROM golang:1.12
|
FROM golang:1.13
|
||||||
ENV PATH /go/bin:/src/bin:/root/go/bin:/usr/local/go/bin:$PATH
|
ENV PATH /go/bin:/src/bin:/root/go/bin:/usr/local/go/bin:$PATH
|
||||||
ENV GOPATH /go:/src
|
ENV GOPATH /go:/src
|
||||||
RUN go get -v github.com/alecthomas/gometalinter && \
|
RUN go get -v github.com/alecthomas/gometalinter && \
|
||||||
|
|
|
||||||
230
test/lease_stealing_util_test.go
Normal file
230
test/lease_stealing_util_test.go
Normal file
|
|
@ -0,0 +1,230 @@
|
||||||
|
package test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/aws/aws-sdk-go/aws"
|
||||||
|
"github.com/aws/aws-sdk-go/service/dynamodb"
|
||||||
|
"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
|
||||||
|
"github.com/aws/aws-sdk-go/service/kinesis/kinesisiface"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
chk "github.com/vmware/vmware-go-kcl/clientlibrary/checkpoint"
|
||||||
|
cfg "github.com/vmware/vmware-go-kcl/clientlibrary/config"
|
||||||
|
wk "github.com/vmware/vmware-go-kcl/clientlibrary/worker"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LeaseStealingTest struct {
|
||||||
|
t *testing.T
|
||||||
|
config *TestClusterConfig
|
||||||
|
cluster *TestCluster
|
||||||
|
kc kinesisiface.KinesisAPI
|
||||||
|
dc dynamodbiface.DynamoDBAPI
|
||||||
|
|
||||||
|
backOffSeconds int
|
||||||
|
maxRetries int
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLeaseStealingTest(t *testing.T, config *TestClusterConfig, workerFactory TestWorkerFactory) *LeaseStealingTest {
|
||||||
|
cluster := NewTestCluster(t, config, workerFactory)
|
||||||
|
clientConfig := cluster.workerFactory.CreateKCLConfig("test-client", config)
|
||||||
|
return &LeaseStealingTest{
|
||||||
|
t: t,
|
||||||
|
config: config,
|
||||||
|
cluster: cluster,
|
||||||
|
kc: NewKinesisClient(t, config.regionName, clientConfig.KinesisEndpoint, clientConfig.KinesisCredentials),
|
||||||
|
dc: NewDynamoDBClient(t, config.regionName, clientConfig.DynamoDBEndpoint, clientConfig.KinesisCredentials),
|
||||||
|
backOffSeconds: 5,
|
||||||
|
maxRetries: 60,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (lst *LeaseStealingTest) WithBackoffSeconds(backoff int) *LeaseStealingTest {
|
||||||
|
lst.backOffSeconds = backoff
|
||||||
|
return lst
|
||||||
|
}
|
||||||
|
|
||||||
|
func (lst *LeaseStealingTest) WithMaxRetries(retries int) *LeaseStealingTest {
|
||||||
|
lst.maxRetries = retries
|
||||||
|
return lst
|
||||||
|
}
|
||||||
|
|
||||||
|
func (lst *LeaseStealingTest) publishSomeData() (stop func()) {
|
||||||
|
done := make(chan int)
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(500 * time.Millisecond)
|
||||||
|
defer wg.Done()
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
lst.t.Log("Coninuously publishing records")
|
||||||
|
publishSomeData(lst.t, lst.kc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return func() {
|
||||||
|
close(done)
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (lst *LeaseStealingTest) getShardCountByWorker() map[string]int {
|
||||||
|
input := &dynamodb.ScanInput{
|
||||||
|
TableName: aws.String(lst.config.appName),
|
||||||
|
}
|
||||||
|
|
||||||
|
shardsByWorker := map[string]map[string]bool{}
|
||||||
|
err := lst.dc.ScanPages(input, func(out *dynamodb.ScanOutput, lastPage bool) bool {
|
||||||
|
for _, result := range out.Items {
|
||||||
|
if shardID, ok := result[chk.LeaseKeyKey]; !ok {
|
||||||
|
continue
|
||||||
|
} else if assignedTo, ok := result[chk.LeaseOwnerKey]; !ok {
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
if _, ok := shardsByWorker[*assignedTo.S]; !ok {
|
||||||
|
shardsByWorker[*assignedTo.S] = map[string]bool{}
|
||||||
|
}
|
||||||
|
shardsByWorker[*assignedTo.S][*shardID.S] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return !lastPage
|
||||||
|
})
|
||||||
|
assert.Nil(lst.t, err)
|
||||||
|
|
||||||
|
shardCountByWorker := map[string]int{}
|
||||||
|
for worker, shards := range shardsByWorker {
|
||||||
|
shardCountByWorker[worker] = len(shards)
|
||||||
|
}
|
||||||
|
return shardCountByWorker
|
||||||
|
}
|
||||||
|
|
||||||
|
type LeaseStealingAssertions struct {
|
||||||
|
expectedLeasesForIntialWorker int
|
||||||
|
expectedLeasesPerWorker int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (lst *LeaseStealingTest) Run(assertions LeaseStealingAssertions) {
|
||||||
|
// Publish records onto stream thoughtout the entire duration of the test
|
||||||
|
stop := lst.publishSomeData()
|
||||||
|
defer stop()
|
||||||
|
|
||||||
|
// Start worker 1
|
||||||
|
worker1, _ := lst.cluster.SpawnWorker()
|
||||||
|
|
||||||
|
// Wait until the above worker has all leases
|
||||||
|
var worker1ShardCount int
|
||||||
|
for i := 0; i < lst.maxRetries; i++ {
|
||||||
|
time.Sleep(time.Duration(lst.backOffSeconds) * time.Second)
|
||||||
|
|
||||||
|
shardCountByWorker := lst.getShardCountByWorker()
|
||||||
|
if shardCount, ok := shardCountByWorker[worker1]; ok && shardCount == assertions.expectedLeasesForIntialWorker {
|
||||||
|
worker1ShardCount = shardCount
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert correct number of leases
|
||||||
|
assert.Equal(lst.t, assertions.expectedLeasesForIntialWorker, worker1ShardCount)
|
||||||
|
|
||||||
|
// Spawn Remaining Wokers
|
||||||
|
for i := 0; i < lst.config.numWorkers-1; i++ {
|
||||||
|
lst.cluster.SpawnWorker()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait For Rebalance
|
||||||
|
var shardCountByWorker map[string]int
|
||||||
|
for i := 0; i < lst.maxRetries; i++ {
|
||||||
|
time.Sleep(time.Duration(lst.backOffSeconds) * time.Second)
|
||||||
|
|
||||||
|
shardCountByWorker = lst.getShardCountByWorker()
|
||||||
|
|
||||||
|
correctCount := true
|
||||||
|
for _, count := range shardCountByWorker {
|
||||||
|
if count != assertions.expectedLeasesPerWorker {
|
||||||
|
correctCount = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if correctCount {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert Rebalanced
|
||||||
|
assert.Greater(lst.t, len(shardCountByWorker), 0)
|
||||||
|
for _, count := range shardCountByWorker {
|
||||||
|
assert.Equal(lst.t, assertions.expectedLeasesPerWorker, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown Workers
|
||||||
|
time.Sleep(10 * time.Second)
|
||||||
|
lst.cluster.Shutdown()
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestWorkerFactory interface {
|
||||||
|
CreateWorker(workerID string, kclConfig *cfg.KinesisClientLibConfiguration) *wk.Worker
|
||||||
|
CreateKCLConfig(workerID string, config *TestClusterConfig) *cfg.KinesisClientLibConfiguration
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestClusterConfig struct {
|
||||||
|
numShards int
|
||||||
|
numWorkers int
|
||||||
|
|
||||||
|
appName string
|
||||||
|
streamName string
|
||||||
|
regionName string
|
||||||
|
workerIDTemplate string
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestCluster struct {
|
||||||
|
t *testing.T
|
||||||
|
config *TestClusterConfig
|
||||||
|
workerFactory TestWorkerFactory
|
||||||
|
workerIDs []string
|
||||||
|
workers map[string]*wk.Worker
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewTestCluster(t *testing.T, config *TestClusterConfig, workerFactory TestWorkerFactory) *TestCluster {
|
||||||
|
return &TestCluster{
|
||||||
|
t: t,
|
||||||
|
config: config,
|
||||||
|
workerFactory: workerFactory,
|
||||||
|
workerIDs: make([]string, 0),
|
||||||
|
workers: make(map[string]*wk.Worker),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *TestCluster) addWorker(workerID string, config *cfg.KinesisClientLibConfiguration) *wk.Worker {
|
||||||
|
worker := tc.workerFactory.CreateWorker(workerID, config)
|
||||||
|
tc.workerIDs = append(tc.workerIDs, workerID)
|
||||||
|
tc.workers[workerID] = worker
|
||||||
|
return worker
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *TestCluster) SpawnWorker() (string, *wk.Worker) {
|
||||||
|
id := len(tc.workers)
|
||||||
|
workerID := fmt.Sprintf(tc.config.workerIDTemplate, id)
|
||||||
|
|
||||||
|
config := tc.workerFactory.CreateKCLConfig(workerID, tc.config)
|
||||||
|
worker := tc.addWorker(workerID, config)
|
||||||
|
|
||||||
|
err := worker.Start()
|
||||||
|
assert.Nil(tc.t, err)
|
||||||
|
return workerID, worker
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tc *TestCluster) Shutdown() {
|
||||||
|
for workerID, worker := range tc.workers {
|
||||||
|
tc.t.Logf("Shutting down worker: %v", workerID)
|
||||||
|
worker.Shutdown()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -23,9 +23,10 @@ package test
|
||||||
import (
|
import (
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"testing"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/vmware/vmware-go-kcl/logger"
|
"github.com/vmware/vmware-go-kcl/logger"
|
||||||
zaplogger "github.com/vmware/vmware-go-kcl/logger/zap"
|
zaplogger "github.com/vmware/vmware-go-kcl/logger/zap"
|
||||||
|
|
|
||||||
|
|
@ -19,10 +19,11 @@
|
||||||
package test
|
package test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
"github.com/aws/aws-sdk-go/aws"
|
"github.com/aws/aws-sdk-go/aws"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
kc "github.com/vmware/vmware-go-kcl/clientlibrary/interfaces"
|
kc "github.com/vmware/vmware-go-kcl/clientlibrary/interfaces"
|
||||||
"testing"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Record processor factory is used to create RecordProcessor
|
// Record processor factory is used to create RecordProcessor
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,13 @@ package test
|
||||||
import (
|
import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/aws/aws-sdk-go/aws"
|
"github.com/aws/aws-sdk-go/aws"
|
||||||
"github.com/aws/aws-sdk-go/aws/credentials"
|
"github.com/aws/aws-sdk-go/aws/credentials"
|
||||||
"github.com/aws/aws-sdk-go/aws/session"
|
"github.com/aws/aws-sdk-go/aws/session"
|
||||||
|
"github.com/aws/aws-sdk-go/service/dynamodb"
|
||||||
"github.com/aws/aws-sdk-go/service/kinesis"
|
"github.com/aws/aws-sdk-go/service/kinesis"
|
||||||
"github.com/aws/aws-sdk-go/service/kinesis/kinesisiface"
|
"github.com/aws/aws-sdk-go/service/kinesis/kinesisiface"
|
||||||
rec "github.com/awslabs/kinesis-aggregation/go/records"
|
rec "github.com/awslabs/kinesis-aggregation/go/records"
|
||||||
|
|
@ -50,12 +54,79 @@ func NewKinesisClient(t *testing.T, regionName, endpoint string, credentials *cr
|
||||||
return kinesis.New(s)
|
return kinesis.New(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewDynamoDBClient to create a Kinesis Client.
|
||||||
|
func NewDynamoDBClient(t *testing.T, regionName, endpoint string, credentials *credentials.Credentials) *dynamodb.DynamoDB {
|
||||||
|
s, err := session.NewSession(&aws.Config{
|
||||||
|
Region: aws.String(regionName),
|
||||||
|
Endpoint: aws.String(endpoint),
|
||||||
|
Credentials: credentials,
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
// no need to move forward
|
||||||
|
t.Fatalf("Failed in getting DynamoDB session for creating Worker: %+v", err)
|
||||||
|
}
|
||||||
|
return dynamodb.New(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func continuouslyPublishSomeData(t *testing.T, kc kinesisiface.KinesisAPI) func() {
|
||||||
|
shards := []*kinesis.Shard{}
|
||||||
|
var nextToken *string
|
||||||
|
for {
|
||||||
|
out, err := kc.ListShards(&kinesis.ListShardsInput{
|
||||||
|
StreamName: aws.String(streamName),
|
||||||
|
NextToken: nextToken,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error in ListShards. %+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
shards = append(shards, out.Shards...)
|
||||||
|
if out.NextToken == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
nextToken = out.NextToken
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan int)
|
||||||
|
wg := &sync.WaitGroup{}
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
ticker := time.NewTicker(500 * time.Millisecond)
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
publishToAllShards(t, kc, shards)
|
||||||
|
publishSomeData(t, kc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return func() {
|
||||||
|
close(done)
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func publishToAllShards(t *testing.T, kc kinesisiface.KinesisAPI, shards []*kinesis.Shard) {
|
||||||
|
// Put records to all shards
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
for _, shard := range shards {
|
||||||
|
publishRecord(t, kc, shard.HashKeyRange.StartingHashKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// publishSomeData to put some records into Kinesis stream
|
// publishSomeData to put some records into Kinesis stream
|
||||||
func publishSomeData(t *testing.T, kc kinesisiface.KinesisAPI) {
|
func publishSomeData(t *testing.T, kc kinesisiface.KinesisAPI) {
|
||||||
// Put some data into stream.
|
// Put some data into stream.
|
||||||
t.Log("Putting data into stream using PutRecord API...")
|
t.Log("Putting data into stream using PutRecord API...")
|
||||||
for i := 0; i < 50; i++ {
|
for i := 0; i < 50; i++ {
|
||||||
publishRecord(t, kc)
|
publishRecord(t, kc, nil)
|
||||||
}
|
}
|
||||||
t.Log("Done putting data into stream using PutRecord API.")
|
t.Log("Done putting data into stream using PutRecord API.")
|
||||||
|
|
||||||
|
|
@ -75,13 +146,17 @@ func publishSomeData(t *testing.T, kc kinesisiface.KinesisAPI) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// publishRecord to put a record into Kinesis stream using PutRecord API.
|
// publishRecord to put a record into Kinesis stream using PutRecord API.
|
||||||
func publishRecord(t *testing.T, kc kinesisiface.KinesisAPI) {
|
func publishRecord(t *testing.T, kc kinesisiface.KinesisAPI, hashKey *string) {
|
||||||
// Use random string as partition key to ensure even distribution across shards
|
input := &kinesis.PutRecordInput{
|
||||||
_, err := kc.PutRecord(&kinesis.PutRecordInput{
|
|
||||||
Data: []byte(specstr),
|
Data: []byte(specstr),
|
||||||
StreamName: aws.String(streamName),
|
StreamName: aws.String(streamName),
|
||||||
PartitionKey: aws.String(utils.RandStringBytesMaskImpr(10)),
|
PartitionKey: aws.String(utils.RandStringBytesMaskImpr(10)),
|
||||||
})
|
}
|
||||||
|
if hashKey != nil {
|
||||||
|
input.ExplicitHashKey = hashKey
|
||||||
|
}
|
||||||
|
// Use random string as partition key to ensure even distribution across shards
|
||||||
|
_, err := kc.PutRecord(input)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Error in PutRecord. %+v", err)
|
t.Errorf("Error in PutRecord. %+v", err)
|
||||||
|
|
@ -94,10 +169,11 @@ func publishRecords(t *testing.T, kc kinesisiface.KinesisAPI) {
|
||||||
records := make([]*kinesis.PutRecordsRequestEntry, 5)
|
records := make([]*kinesis.PutRecordsRequestEntry, 5)
|
||||||
|
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
records[i] = &kinesis.PutRecordsRequestEntry{
|
record := &kinesis.PutRecordsRequestEntry{
|
||||||
Data: []byte(specstr),
|
Data: []byte(specstr),
|
||||||
PartitionKey: aws.String(utils.RandStringBytesMaskImpr(10)),
|
PartitionKey: aws.String(utils.RandStringBytesMaskImpr(10)),
|
||||||
}
|
}
|
||||||
|
records[i] = record
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := kc.PutRecords(&kinesis.PutRecordsInput{
|
_, err := kc.PutRecords(&kinesis.PutRecordsInput{
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestWorkerInjectCheckpointer(t *testing.T) {
|
func TestWorkerInjectCheckpointer(t *testing.T) {
|
||||||
kclConfig := cfg.NewKinesisClientLibConfig("appName", streamName, regionName, workerID).
|
kclConfig := cfg.NewKinesisClientLibConfig(appName, streamName, regionName, workerID).
|
||||||
WithInitialPositionInStream(cfg.LATEST).
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
WithMaxRecords(10).
|
WithMaxRecords(10).
|
||||||
WithMaxLeasesForWorker(1).
|
WithMaxLeasesForWorker(1).
|
||||||
|
|
@ -52,6 +52,12 @@ func TestWorkerInjectCheckpointer(t *testing.T) {
|
||||||
// configure cloudwatch as metrics system
|
// configure cloudwatch as metrics system
|
||||||
kclConfig.WithMonitoringService(getMetricsConfig(kclConfig, metricsSystem))
|
kclConfig.WithMonitoringService(getMetricsConfig(kclConfig, metricsSystem))
|
||||||
|
|
||||||
|
// Put some data into stream.
|
||||||
|
kc := NewKinesisClient(t, regionName, kclConfig.KinesisEndpoint, kclConfig.KinesisCredentials)
|
||||||
|
// publishSomeData(t, kc)
|
||||||
|
stop := continuouslyPublishSomeData(t, kc)
|
||||||
|
defer stop()
|
||||||
|
|
||||||
// custom checkpointer or a mock checkpointer.
|
// custom checkpointer or a mock checkpointer.
|
||||||
checkpointer := chk.NewDynamoCheckpoint(kclConfig)
|
checkpointer := chk.NewDynamoCheckpoint(kclConfig)
|
||||||
|
|
||||||
|
|
@ -62,12 +68,8 @@ func TestWorkerInjectCheckpointer(t *testing.T) {
|
||||||
err := worker.Start()
|
err := worker.Start()
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
// Put some data into stream.
|
|
||||||
kc := NewKinesisClient(t, regionName, kclConfig.KinesisEndpoint, kclConfig.KinesisCredentials)
|
|
||||||
publishSomeData(t, kc)
|
|
||||||
|
|
||||||
// wait a few seconds before shutdown processing
|
// wait a few seconds before shutdown processing
|
||||||
time.Sleep(10 * time.Second)
|
time.Sleep(30 * time.Second)
|
||||||
worker.Shutdown()
|
worker.Shutdown()
|
||||||
|
|
||||||
// verify the checkpointer after graceful shutdown
|
// verify the checkpointer after graceful shutdown
|
||||||
|
|
@ -86,7 +88,7 @@ func TestWorkerInjectCheckpointer(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkerInjectKinesis(t *testing.T) {
|
func TestWorkerInjectKinesis(t *testing.T) {
|
||||||
kclConfig := cfg.NewKinesisClientLibConfig("appName", streamName, regionName, workerID).
|
kclConfig := cfg.NewKinesisClientLibConfig(appName, streamName, regionName, workerID).
|
||||||
WithInitialPositionInStream(cfg.LATEST).
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
WithMaxRecords(10).
|
WithMaxRecords(10).
|
||||||
WithMaxLeasesForWorker(1).
|
WithMaxLeasesForWorker(1).
|
||||||
|
|
@ -109,6 +111,11 @@ func TestWorkerInjectKinesis(t *testing.T) {
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
kc := kinesis.New(s)
|
kc := kinesis.New(s)
|
||||||
|
|
||||||
|
// Put some data into stream.
|
||||||
|
// publishSomeData(t, kc)
|
||||||
|
stop := continuouslyPublishSomeData(t, kc)
|
||||||
|
defer stop()
|
||||||
|
|
||||||
// Inject a custom checkpointer into the worker.
|
// Inject a custom checkpointer into the worker.
|
||||||
worker := wk.NewWorker(recordProcessorFactory(t), kclConfig).
|
worker := wk.NewWorker(recordProcessorFactory(t), kclConfig).
|
||||||
WithKinesis(kc)
|
WithKinesis(kc)
|
||||||
|
|
@ -116,16 +123,13 @@ func TestWorkerInjectKinesis(t *testing.T) {
|
||||||
err = worker.Start()
|
err = worker.Start()
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
// Put some data into stream.
|
|
||||||
publishSomeData(t, kc)
|
|
||||||
|
|
||||||
// wait a few seconds before shutdown processing
|
// wait a few seconds before shutdown processing
|
||||||
time.Sleep(10 * time.Second)
|
time.Sleep(30 * time.Second)
|
||||||
worker.Shutdown()
|
worker.Shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWorkerInjectKinesisAndCheckpointer(t *testing.T) {
|
func TestWorkerInjectKinesisAndCheckpointer(t *testing.T) {
|
||||||
kclConfig := cfg.NewKinesisClientLibConfig("appName", streamName, regionName, workerID).
|
kclConfig := cfg.NewKinesisClientLibConfig(appName, streamName, regionName, workerID).
|
||||||
WithInitialPositionInStream(cfg.LATEST).
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
WithMaxRecords(10).
|
WithMaxRecords(10).
|
||||||
WithMaxLeasesForWorker(1).
|
WithMaxLeasesForWorker(1).
|
||||||
|
|
@ -148,6 +152,11 @@ func TestWorkerInjectKinesisAndCheckpointer(t *testing.T) {
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
kc := kinesis.New(s)
|
kc := kinesis.New(s)
|
||||||
|
|
||||||
|
// Put some data into stream.
|
||||||
|
// publishSomeData(t, kc)
|
||||||
|
stop := continuouslyPublishSomeData(t, kc)
|
||||||
|
defer stop()
|
||||||
|
|
||||||
// custom checkpointer or a mock checkpointer.
|
// custom checkpointer or a mock checkpointer.
|
||||||
checkpointer := chk.NewDynamoCheckpoint(kclConfig)
|
checkpointer := chk.NewDynamoCheckpoint(kclConfig)
|
||||||
|
|
||||||
|
|
@ -159,10 +168,7 @@ func TestWorkerInjectKinesisAndCheckpointer(t *testing.T) {
|
||||||
err = worker.Start()
|
err = worker.Start()
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
// Put some data into stream.
|
|
||||||
publishSomeData(t, kc)
|
|
||||||
|
|
||||||
// wait a few seconds before shutdown processing
|
// wait a few seconds before shutdown processing
|
||||||
time.Sleep(10 * time.Second)
|
time.Sleep(30 * time.Second)
|
||||||
worker.Shutdown()
|
worker.Shutdown()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
127
test/worker_lease_stealing_test.go
Normal file
127
test/worker_lease_stealing_test.go
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
package test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
chk "github.com/vmware/vmware-go-kcl/clientlibrary/checkpoint"
|
||||||
|
cfg "github.com/vmware/vmware-go-kcl/clientlibrary/config"
|
||||||
|
wk "github.com/vmware/vmware-go-kcl/clientlibrary/worker"
|
||||||
|
"github.com/vmware/vmware-go-kcl/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLeaseStealing(t *testing.T) {
|
||||||
|
config := &TestClusterConfig{
|
||||||
|
numShards: 4,
|
||||||
|
numWorkers: 2,
|
||||||
|
appName: appName,
|
||||||
|
streamName: streamName,
|
||||||
|
regionName: regionName,
|
||||||
|
workerIDTemplate: workerID + "-%v",
|
||||||
|
}
|
||||||
|
test := NewLeaseStealingTest(t, config, newLeaseStealingWorkerFactory(t))
|
||||||
|
test.Run(LeaseStealingAssertions{
|
||||||
|
expectedLeasesForIntialWorker: config.numShards,
|
||||||
|
expectedLeasesPerWorker: config.numShards / config.numWorkers,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type leaseStealingWorkerFactory struct {
|
||||||
|
t *testing.T
|
||||||
|
}
|
||||||
|
|
||||||
|
func newLeaseStealingWorkerFactory(t *testing.T) *leaseStealingWorkerFactory {
|
||||||
|
return &leaseStealingWorkerFactory{t}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wf *leaseStealingWorkerFactory) CreateKCLConfig(workerID string, config *TestClusterConfig) *cfg.KinesisClientLibConfiguration {
|
||||||
|
log := logger.NewLogrusLoggerWithConfig(logger.Configuration{
|
||||||
|
EnableConsole: true,
|
||||||
|
ConsoleLevel: logger.Error,
|
||||||
|
ConsoleJSONFormat: false,
|
||||||
|
EnableFile: true,
|
||||||
|
FileLevel: logger.Info,
|
||||||
|
FileJSONFormat: true,
|
||||||
|
Filename: "log.log",
|
||||||
|
})
|
||||||
|
|
||||||
|
log.WithFields(logger.Fields{"worker": workerID})
|
||||||
|
|
||||||
|
return cfg.NewKinesisClientLibConfig(config.appName, config.streamName, config.regionName, workerID).
|
||||||
|
WithInitialPositionInStream(cfg.LATEST).
|
||||||
|
WithMaxRecords(10).
|
||||||
|
WithShardSyncIntervalMillis(5000).
|
||||||
|
WithFailoverTimeMillis(10000).
|
||||||
|
WithLeaseStealing(true).
|
||||||
|
WithLogger(log)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wf *leaseStealingWorkerFactory) CreateWorker(workerID string, kclConfig *cfg.KinesisClientLibConfiguration) *wk.Worker {
|
||||||
|
worker := wk.NewWorker(recordProcessorFactory(wf.t), kclConfig)
|
||||||
|
return worker
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeaseStealingInjectCheckpointer(t *testing.T) {
|
||||||
|
config := &TestClusterConfig{
|
||||||
|
numShards: 4,
|
||||||
|
numWorkers: 2,
|
||||||
|
appName: appName,
|
||||||
|
streamName: streamName,
|
||||||
|
regionName: regionName,
|
||||||
|
workerIDTemplate: workerID + "-%v",
|
||||||
|
}
|
||||||
|
test := NewLeaseStealingTest(t, config, newleaseStealingWorkerFactoryCustomChk(t))
|
||||||
|
test.Run(LeaseStealingAssertions{
|
||||||
|
expectedLeasesForIntialWorker: config.numShards,
|
||||||
|
expectedLeasesPerWorker: config.numShards / config.numWorkers,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type leaseStealingWorkerFactoryCustom struct {
|
||||||
|
*leaseStealingWorkerFactory
|
||||||
|
}
|
||||||
|
|
||||||
|
func newleaseStealingWorkerFactoryCustomChk(t *testing.T) *leaseStealingWorkerFactoryCustom {
|
||||||
|
return &leaseStealingWorkerFactoryCustom{
|
||||||
|
newLeaseStealingWorkerFactory(t),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wfc *leaseStealingWorkerFactoryCustom) CreateWorker(workerID string, kclConfig *cfg.KinesisClientLibConfiguration) *wk.Worker {
|
||||||
|
worker := wfc.leaseStealingWorkerFactory.CreateWorker(workerID, kclConfig)
|
||||||
|
checkpointer := chk.NewDynamoCheckpoint(kclConfig)
|
||||||
|
return worker.WithCheckpointer(checkpointer)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLeaseStealingWithMaxLeasesForWorker(t *testing.T) {
|
||||||
|
config := &TestClusterConfig{
|
||||||
|
numShards: 4,
|
||||||
|
numWorkers: 2,
|
||||||
|
appName: appName,
|
||||||
|
streamName: streamName,
|
||||||
|
regionName: regionName,
|
||||||
|
workerIDTemplate: workerID + "-%v",
|
||||||
|
}
|
||||||
|
test := NewLeaseStealingTest(t, config, newleaseStealingWorkerFactoryMaxLeases(t, config.numShards-1))
|
||||||
|
test.Run(LeaseStealingAssertions{
|
||||||
|
expectedLeasesForIntialWorker: config.numShards - 1,
|
||||||
|
expectedLeasesPerWorker: 2,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type leaseStealingWorkerFactoryMaxLeases struct {
|
||||||
|
maxLeases int
|
||||||
|
*leaseStealingWorkerFactory
|
||||||
|
}
|
||||||
|
|
||||||
|
func newleaseStealingWorkerFactoryMaxLeases(t *testing.T, maxLeases int) *leaseStealingWorkerFactoryMaxLeases {
|
||||||
|
return &leaseStealingWorkerFactoryMaxLeases{
|
||||||
|
maxLeases,
|
||||||
|
newLeaseStealingWorkerFactory(t),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wfm *leaseStealingWorkerFactoryMaxLeases) CreateKCLConfig(workerID string, config *TestClusterConfig) *cfg.KinesisClientLibConfiguration {
|
||||||
|
kclConfig := wfm.leaseStealingWorkerFactory.CreateKCLConfig(workerID, config)
|
||||||
|
kclConfig.WithMaxLeasesForWorker(wfm.maxLeases)
|
||||||
|
return kclConfig
|
||||||
|
}
|
||||||
|
|
@ -60,7 +60,7 @@ func TestWorker(t *testing.T) {
|
||||||
// In order to have precise control over logging. Use logger with config
|
// In order to have precise control over logging. Use logger with config
|
||||||
config := logger.Configuration{
|
config := logger.Configuration{
|
||||||
EnableConsole: true,
|
EnableConsole: true,
|
||||||
ConsoleLevel: logger.Debug,
|
ConsoleLevel: logger.Error,
|
||||||
ConsoleJSONFormat: false,
|
ConsoleJSONFormat: false,
|
||||||
EnableFile: true,
|
EnableFile: true,
|
||||||
FileLevel: logger.Info,
|
FileLevel: logger.Info,
|
||||||
|
|
@ -269,8 +269,13 @@ func runTest(kclConfig *cfg.KinesisClientLibConfiguration, triggersig bool, t *t
|
||||||
// configure cloudwatch as metrics system
|
// configure cloudwatch as metrics system
|
||||||
kclConfig.WithMonitoringService(getMetricsConfig(kclConfig, metricsSystem))
|
kclConfig.WithMonitoringService(getMetricsConfig(kclConfig, metricsSystem))
|
||||||
|
|
||||||
worker := wk.NewWorker(recordProcessorFactory(t), kclConfig)
|
// Put some data into stream.
|
||||||
|
kc := NewKinesisClient(t, regionName, kclConfig.KinesisEndpoint, kclConfig.KinesisCredentials)
|
||||||
|
// publishSomeData(t, kc)
|
||||||
|
stop := continuouslyPublishSomeData(t, kc)
|
||||||
|
defer stop()
|
||||||
|
|
||||||
|
worker := wk.NewWorker(recordProcessorFactory(t), kclConfig)
|
||||||
err := worker.Start()
|
err := worker.Start()
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
|
@ -286,10 +291,6 @@ func runTest(kclConfig *cfg.KinesisClientLibConfiguration, triggersig bool, t *t
|
||||||
//os.Exit(0)
|
//os.Exit(0)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Put some data into stream.
|
|
||||||
kc := NewKinesisClient(t, regionName, kclConfig.KinesisEndpoint, kclConfig.KinesisCredentials)
|
|
||||||
publishSomeData(t, kc)
|
|
||||||
|
|
||||||
if triggersig {
|
if triggersig {
|
||||||
t.Log("Trigger signal SIGINT")
|
t.Log("Trigger signal SIGINT")
|
||||||
p, _ := os.FindProcess(os.Getpid())
|
p, _ := os.FindProcess(os.Getpid())
|
||||||
|
|
@ -297,7 +298,7 @@ func runTest(kclConfig *cfg.KinesisClientLibConfiguration, triggersig bool, t *t
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait a few seconds before shutdown processing
|
// wait a few seconds before shutdown processing
|
||||||
time.Sleep(10 * time.Second)
|
time.Sleep(30 * time.Second)
|
||||||
|
|
||||||
if metricsSystem == "prometheus" {
|
if metricsSystem == "prometheus" {
|
||||||
res, err := http.Get("http://localhost:8080/metrics")
|
res, err := http.Get("http://localhost:8080/metrics")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue