2018-06-05 03:07:58 +00:00
|
|
|
package ddb
|
2017-11-20 17:37:30 +00:00
|
|
|
|
|
|
|
|
import (
|
2021-09-22 05:00:14 +00:00
|
|
|
"context"
|
2017-11-20 17:37:30 +00:00
|
|
|
"fmt"
|
|
|
|
|
"log"
|
2017-12-31 04:21:10 +00:00
|
|
|
"sync"
|
|
|
|
|
"time"
|
2017-11-20 17:37:30 +00:00
|
|
|
|
2021-09-22 05:00:14 +00:00
|
|
|
"github.com/aws/aws-sdk-go-v2/aws"
|
|
|
|
|
"github.com/aws/aws-sdk-go-v2/config"
|
|
|
|
|
"github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue"
|
|
|
|
|
"github.com/aws/aws-sdk-go-v2/service/dynamodb"
|
|
|
|
|
"github.com/aws/aws-sdk-go-v2/service/dynamodb/types"
|
2017-11-20 17:37:30 +00:00
|
|
|
)
|
|
|
|
|
|
2017-12-31 04:21:10 +00:00
|
|
|
// Option is used to override defaults when creating a new Checkpoint
|
|
|
|
|
type Option func(*Checkpoint)
|
|
|
|
|
|
|
|
|
|
// WithMaxInterval sets the flush interval
|
|
|
|
|
func WithMaxInterval(maxInterval time.Duration) Option {
|
|
|
|
|
return func(c *Checkpoint) {
|
|
|
|
|
c.maxInterval = maxInterval
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-01 00:41:14 +00:00
|
|
|
// WithDynamoClient sets the dynamoDb client
|
2021-09-22 05:00:14 +00:00
|
|
|
func WithDynamoClient(svc *dynamodb.Client) Option {
|
2018-06-01 00:41:14 +00:00
|
|
|
return func(c *Checkpoint) {
|
|
|
|
|
c.client = svc
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-05 03:07:58 +00:00
|
|
|
// WithRetryer sets the retryer
|
|
|
|
|
func WithRetryer(r Retryer) Option {
|
|
|
|
|
return func(c *Checkpoint) {
|
|
|
|
|
c.retryer = r
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-20 17:37:30 +00:00
|
|
|
// New returns a checkpoint that uses DynamoDB for underlying storage
|
2017-12-31 04:21:10 +00:00
|
|
|
func New(appName, tableName string, opts ...Option) (*Checkpoint, error) {
|
|
|
|
|
ck := &Checkpoint{
|
|
|
|
|
tableName: tableName,
|
|
|
|
|
appName: appName,
|
|
|
|
|
maxInterval: time.Duration(1 * time.Minute),
|
|
|
|
|
done: make(chan struct{}),
|
|
|
|
|
mu: &sync.Mutex{},
|
|
|
|
|
checkpoints: map[key]string{},
|
2018-06-05 03:07:58 +00:00
|
|
|
retryer: &DefaultRetryer{},
|
2017-12-31 04:21:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, opt := range opts {
|
|
|
|
|
opt(ck)
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-17 18:02:37 +00:00
|
|
|
// default client
|
|
|
|
|
if ck.client == nil {
|
2021-09-22 05:00:14 +00:00
|
|
|
cfg, err := config.LoadDefaultConfig(context.TODO())
|
2020-01-17 18:02:37 +00:00
|
|
|
if err != nil {
|
2021-09-22 05:00:14 +00:00
|
|
|
log.Fatalf("unable to load SDK config, %v", err)
|
2020-01-17 18:02:37 +00:00
|
|
|
}
|
2021-09-22 05:00:14 +00:00
|
|
|
ck.client = dynamodb.NewFromConfig(cfg)
|
2020-01-17 18:02:37 +00:00
|
|
|
}
|
|
|
|
|
|
2017-12-31 04:21:10 +00:00
|
|
|
go ck.loop()
|
|
|
|
|
|
|
|
|
|
return ck, nil
|
2017-11-20 17:37:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Checkpoint stores and retreives the last evaluated key from a DDB scan
|
|
|
|
|
type Checkpoint struct {
|
2017-12-31 04:21:10 +00:00
|
|
|
tableName string
|
|
|
|
|
appName string
|
2021-09-22 05:00:14 +00:00
|
|
|
client *dynamodb.Client
|
2017-12-31 04:21:10 +00:00
|
|
|
maxInterval time.Duration
|
|
|
|
|
mu *sync.Mutex // protects the checkpoints
|
|
|
|
|
checkpoints map[key]string
|
|
|
|
|
done chan struct{}
|
2018-06-05 03:07:58 +00:00
|
|
|
retryer Retryer
|
2017-12-31 04:21:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type key struct {
|
2021-12-04 21:40:26 +00:00
|
|
|
StreamName string
|
|
|
|
|
ShardID string
|
2017-11-20 17:37:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type item struct {
|
2021-12-04 21:40:26 +00:00
|
|
|
Namespace string `json:"namespace" dynamodbav:"namespace"`
|
|
|
|
|
ShardID string `json:"shard_id" dynamodbav:"shard_id"`
|
|
|
|
|
SequenceNumber string `json:"sequence_number" dynamodbav:"sequence_number"`
|
2017-11-20 17:37:30 +00:00
|
|
|
}
|
|
|
|
|
|
2019-09-01 01:43:26 +00:00
|
|
|
// GetCheckpoint determines if a checkpoint for a particular Shard exists.
|
2017-11-20 17:37:30 +00:00
|
|
|
// Typically used to determine whether we should start processing the shard with
|
|
|
|
|
// TRIM_HORIZON or AFTER_SEQUENCE_NUMBER (if checkpoint exists).
|
2019-07-29 04:18:40 +00:00
|
|
|
func (c *Checkpoint) GetCheckpoint(streamName, shardID string) (string, error) {
|
2017-11-23 04:01:31 +00:00
|
|
|
namespace := fmt.Sprintf("%s-%s", c.appName, streamName)
|
|
|
|
|
|
2017-11-20 17:37:30 +00:00
|
|
|
params := &dynamodb.GetItemInput{
|
2017-11-23 04:01:31 +00:00
|
|
|
TableName: aws.String(c.tableName),
|
2017-11-20 17:37:30 +00:00
|
|
|
ConsistentRead: aws.Bool(true),
|
2021-09-22 05:00:14 +00:00
|
|
|
Key: map[string]types.AttributeValue{
|
2021-12-04 21:40:26 +00:00
|
|
|
"namespace": &types.AttributeValueMemberS{Value: namespace},
|
|
|
|
|
"shard_id": &types.AttributeValueMemberS{Value: shardID},
|
2017-11-20 17:37:30 +00:00
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-22 05:00:14 +00:00
|
|
|
resp, err := c.client.GetItem(context.Background(), params)
|
2017-11-20 17:37:30 +00:00
|
|
|
if err != nil {
|
2018-06-05 03:07:58 +00:00
|
|
|
if c.retryer.ShouldRetry(err) {
|
2019-07-29 04:18:40 +00:00
|
|
|
return c.GetCheckpoint(streamName, shardID)
|
2017-11-20 17:37:30 +00:00
|
|
|
}
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var i item
|
2021-09-22 05:00:14 +00:00
|
|
|
attributevalue.UnmarshalMap(resp.Item, &i)
|
2017-11-20 17:37:30 +00:00
|
|
|
return i.SequenceNumber, nil
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-29 04:18:40 +00:00
|
|
|
// SetCheckpoint stores a checkpoint for a shard (e.g. sequence number of last record processed by application).
|
2017-11-20 17:37:30 +00:00
|
|
|
// Upon failover, record processing is resumed from this point.
|
2019-07-29 04:18:40 +00:00
|
|
|
func (c *Checkpoint) SetCheckpoint(streamName, shardID, sequenceNumber string) error {
|
2017-12-31 04:21:10 +00:00
|
|
|
c.mu.Lock()
|
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
|
2017-11-21 16:58:16 +00:00
|
|
|
if sequenceNumber == "" {
|
|
|
|
|
return fmt.Errorf("sequence number should not be empty")
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-31 04:21:10 +00:00
|
|
|
key := key{
|
2021-12-04 21:40:26 +00:00
|
|
|
StreamName: streamName,
|
|
|
|
|
ShardID: shardID,
|
2017-12-31 04:21:10 +00:00
|
|
|
}
|
|
|
|
|
c.checkpoints[key] = sequenceNumber
|
2017-11-23 04:01:31 +00:00
|
|
|
|
2017-12-31 04:21:10 +00:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Shutdown the checkpoint. Save any in-flight data.
|
|
|
|
|
func (c *Checkpoint) Shutdown() error {
|
|
|
|
|
c.done <- struct{}{}
|
|
|
|
|
return c.save()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (c *Checkpoint) loop() {
|
|
|
|
|
tick := time.NewTicker(c.maxInterval)
|
|
|
|
|
defer tick.Stop()
|
|
|
|
|
defer close(c.done)
|
|
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case <-tick.C:
|
|
|
|
|
c.save()
|
|
|
|
|
case <-c.done:
|
|
|
|
|
return
|
|
|
|
|
}
|
2017-11-20 17:37:30 +00:00
|
|
|
}
|
2017-12-31 04:21:10 +00:00
|
|
|
}
|
2017-11-20 17:37:30 +00:00
|
|
|
|
2017-12-31 04:21:10 +00:00
|
|
|
func (c *Checkpoint) save() error {
|
|
|
|
|
c.mu.Lock()
|
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
|
|
|
|
|
for key, sequenceNumber := range c.checkpoints {
|
2021-09-22 05:00:14 +00:00
|
|
|
item, err := attributevalue.MarshalMap(item{
|
2021-12-04 21:40:26 +00:00
|
|
|
Namespace: fmt.Sprintf("%s-%s", c.appName, key.StreamName),
|
|
|
|
|
ShardID: key.ShardID,
|
2017-12-31 04:21:10 +00:00
|
|
|
SequenceNumber: sequenceNumber,
|
|
|
|
|
})
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Printf("marshal map error: %v", err)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-22 05:00:14 +00:00
|
|
|
_, err = c.client.PutItem(
|
|
|
|
|
context.TODO(),
|
|
|
|
|
&dynamodb.PutItemInput{
|
|
|
|
|
TableName: aws.String(c.tableName),
|
|
|
|
|
Item: item,
|
|
|
|
|
})
|
2017-12-31 04:21:10 +00:00
|
|
|
if err != nil {
|
2018-06-05 03:07:58 +00:00
|
|
|
if !c.retryer.ShouldRetry(err) {
|
2017-12-31 04:21:10 +00:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return c.save()
|
2017-11-20 17:37:30 +00:00
|
|
|
}
|
|
|
|
|
}
|
2017-12-31 04:21:10 +00:00
|
|
|
|
2017-11-20 17:37:30 +00:00
|
|
|
return nil
|
|
|
|
|
}
|