Make the Checkpoint a required input for Consumer

The Checkpoint functionality is an important part of the library and
previously it wasn't obvious that the Consumer was defaulting to Redis
for this functionality.

* Add Checkpoint as required param for new consumer
This commit is contained in:
Harlow Ward 2017-11-21 08:58:16 -08:00
parent 154595b9a3
commit 4d6a85e901
7 changed files with 147 additions and 130 deletions

107
README.md
View file

@ -17,26 +17,37 @@ Get the package source:
The consumer leverages a handler func that accepts a Kinesis record. The `Scan` method will consume all shards concurrently and call the callback func as it receives records from the stream. The consumer leverages a handler func that accepts a Kinesis record. The `Scan` method will consume all shards concurrently and call the callback func as it receives records from the stream.
```go ```go
import consumer "github.com/harlow/kinesis-consumer" import(
// ...
consumer "github.com/harlow/kinesis-consumer"
checkpoint "github.com/harlow/kinesis-consumer/checkpoint/redis"
)
func main() { func main() {
log.SetHandler(text.New(os.Stderr)) log.SetHandler(text.New(os.Stderr))
log.SetLevel(log.DebugLevel) log.SetLevel(log.DebugLevel)
var ( var (
app = flag.String("app", "", "App name") // name of consumer group app = flag.String("app", "", "App name")
stream = flag.String("stream", "", "Stream name") stream = flag.String("stream", "", "Stream name")
) )
flag.Parse() flag.Parse()
c, err := consumer.New(*app, *stream) // new checkpoint
ck, err := checkpoint.New(*app, *stream)
if err != nil {
log.Fatalf("checkpoint error: %v", err)
}
// new consumer
c, err := consumer.New(ck, *app, *stream)
if err != nil { if err != nil {
log.Fatalf("consumer error: %v", err) log.Fatalf("consumer error: %v", err)
} }
err = c.Scan(context.TODO(), func(r *kinesis.Record) bool { // scan stream
err = c.Scan(context.TODO(), func(r *consumer.Record) bool {
fmt.Println(string(r.Data)) fmt.Println(string(r.Data))
return true // continue scanning return true // continue scanning
}) })
if err != nil { if err != nil {
@ -48,17 +59,55 @@ func main() {
} }
``` ```
### Configuration ### Checkpoint
The consumer requires the following config: To record the progress of the consumer in the stream we use a checkpoint to store the last sequence number the consumer has read from a particular shard.
* App Name (used for checkpoints) This will allow consumers to re-launch and pick up at the position in the stream where they left off.
* Stream Name (kinesis stream name)
It also accepts the following optional overrides: The uniq identifier for a consumer is `[appName, streamName, shardID]`
<img width="687" alt="kinesis-checkpoints" src="https://user-images.githubusercontent.com/739782/33036582-b6f3c4b4-cde3-11e7-9334-c4bfbe34d984.png">
There are two types of checkpoints:
### Redis
The Redis checkpoint requries App Name, and Stream Name:
```go
import checkpoint "github.com/harlow/kinesis-consumer/checkpoint/redis"
// redis checkpoint
ck, err := checkpoint.New(appName, streamName)
if err != nil {
log.Fatalf("new checkpoint error: %v", err)
}
```
### DynamoDB
The DynamoDB checkpoint requires Table Name, App Name, and Stream Name:
```go
import checkpoint "github.com/harlow/kinesis-consumer/checkpoint/ddb"
// ddb checkpoint
ck, err := checkpoint.New(tableName, appName, streamName)
if err != nil {
log.Fatalf("new checkpoint error: %v", err)
}
```
To leverage the DDB checkpoint we'll also need to create a table:
<img width="659" alt="screen shot 2017-11-20 at 9 16 14 am" src="https://user-images.githubusercontent.com/739782/33033316-db85f848-cdd8-11e7-941a-0a87d8ace479.png">
### Options
The consumer allows the following optional overrides:
* Kinesis Client * Kinesis Client
* Checkpoint Storage
* Logger * Logger
```go ```go
@ -67,46 +116,12 @@ svc := kinesis.New(session.New(aws.NewConfig()))
// new consumer with custom client // new consumer with custom client
c, err := consumer.New( c, err := consumer.New(
appName, consumer,
streamName, streamName,
consumer.WithClient(svc), consumer.WithClient(svc),
) )
``` ```
### Checkpoint Storage
To record the progress of the consumer in the stream we store the last sequence number the consumer has read from a particular shard. This will allow consumers to re-launch and pick up at the position in the stream where they left off.
<img width="687" alt="kinesis-checkpoints" src="https://user-images.githubusercontent.com/739782/33036582-b6f3c4b4-cde3-11e7-9334-c4bfbe34d984.png">
The default checkpoint uses Redis on localhost; to set a custom Redis URL use ENV vars:
```
REDIS_URL=redis.yoursite.com:6379
```
To leverage DynamoDB as the backend for checkpoint we'll need a new table:
<img width="659" alt="screen shot 2017-11-20 at 9 16 14 am" src="https://user-images.githubusercontent.com/739782/33033316-db85f848-cdd8-11e7-941a-0a87d8ace479.png">
Then override the checkpoint config option:
```go
// ddb checkpoint
ck, err := checkpoint.New(tableName, appName, streamName)
if err != nil {
log.Fatalf("new checkpoint error: %v", err)
}
// consumer with checkpoint
c, err := consumer.New(
appName,
streamName,
consumer.WithCheckpoint(ck),
)
```
### Logging ### Logging
[Apex Log](https://medium.com/@tjholowaychuk/apex-log-e8d9627f4a9a#.5x1uo1767) is used for logging Info. Override the logs format with other [Log Handlers](https://github.com/apex/log/tree/master/_examples). For example using the "json" log handler: [Apex Log](https://medium.com/@tjholowaychuk/apex-log-e8d9627f4a9a#.5x1uo1767) is used for logging Info. Override the logs format with other [Log Handlers](https://github.com/apex/log/tree/master/_examples). For example using the "json" log handler:

View file

@ -78,6 +78,10 @@ func (c *Checkpoint) Get(shardID string) (string, error) {
// Set stores a checkpoint for a shard (e.g. sequence number of last record processed by application). // Set stores a checkpoint for a shard (e.g. sequence number of last record processed by application).
// Upon failover, record processing is resumed from this point. // Upon failover, record processing is resumed from this point.
func (c *Checkpoint) Set(shardID string, sequenceNumber string) error { func (c *Checkpoint) Set(shardID string, sequenceNumber string) error {
if sequenceNumber == "" {
return fmt.Errorf("sequence number should not be empty")
}
item, err := dynamodbattribute.MarshalMap(item{ item, err := dynamodbattribute.MarshalMap(item{
ConsumerGroup: c.consumerGroupName(), ConsumerGroup: c.consumerGroupName(),
ShardID: shardID, ShardID: shardID,

View file

@ -25,38 +25,39 @@ func New(appName, streamName string) (*Checkpoint, error) {
} }
return &Checkpoint{ return &Checkpoint{
AppName: appName, appName: appName,
StreamName: streamName, streamName: streamName,
client: client, client: client,
}, nil }, nil
} }
// Checkpoint stores and retreives the last evaluated key from a DDB scan // Checkpoint stores and retreives the last evaluated key from a DDB scan
type Checkpoint struct { type Checkpoint struct {
AppName string appName string
StreamName string streamName string
client *redis.Client
client *redis.Client
} }
// Get determines if a checkpoint for a particular Shard exists. // Get fetches the checkpoint for a particular Shard.
// Typically used to determine whether we should start processing the shard with
// TRIM_HORIZON or AFTER_SEQUENCE_NUMBER (if checkpoint exists).
func (c *Checkpoint) Get(shardID string) (string, error) { func (c *Checkpoint) Get(shardID string) (string, error) {
return c.client.Get(c.key(shardID)).Result() val, _ := c.client.Get(c.key(shardID)).Result()
return val, nil
} }
// Set stores a checkpoint for a shard (e.g. sequence number of last record processed by application). // Set stores a checkpoint for a shard (e.g. sequence number of last record processed by application).
// Upon failover, record processing is resumed from this point. // Upon failover, record processing is resumed from this point.
func (c *Checkpoint) Set(shardID string, sequenceNumber string) error { func (c *Checkpoint) Set(shardID string, sequenceNumber string) error {
if sequenceNumber == "" {
return fmt.Errorf("sequence number should not be empty")
}
err := c.client.Set(c.key(shardID), sequenceNumber, 0).Err() err := c.client.Set(c.key(shardID), sequenceNumber, 0).Err()
if err != nil { if err != nil {
return fmt.Errorf("redis checkpoint error: %v", err) return err
} }
return nil return nil
} }
// key generates a unique Redis key for storage of Checkpoint. // key generates a unique Redis key for storage of Checkpoint.
func (c *Checkpoint) key(shardID string) string { func (c *Checkpoint) key(shardID string) string {
return fmt.Sprintf("%v:checkpoint:%v:%v", c.AppName, c.StreamName, shardID) return fmt.Sprintf("%v:checkpoint:%v:%v", c.appName, c.streamName, shardID)
} }

View file

@ -12,33 +12,33 @@ func Test_CheckpointLifecycle(t *testing.T) {
client := redis.NewClient(&redis.Options{Addr: defaultAddr}) client := redis.NewClient(&redis.Options{Addr: defaultAddr})
c := &Checkpoint{ c := &Checkpoint{
AppName: "app", appName: "app",
StreamName: "stream", streamName: "stream",
client: client, client: client,
} }
// set checkpoint // set checkpoint
c.SetCheckpoint("shard_id", "testSeqNum") c.Set("shard_id", "testSeqNum")
// checkpoint exists
if val := c.CheckpointExists("shard_id"); val != true {
t.Fatalf("checkpoint exists expected true, got %t", val)
}
// get checkpoint // get checkpoint
if val := c.SequenceNumber(); val != "testSeqNum" { val, err := c.Get("shard_id")
if err != nil {
t.Fatalf("get checkpoint error: %v", err)
}
if val != "testSeqNum" {
t.Fatalf("checkpoint exists expected %s, got %s", "testSeqNum", val) t.Fatalf("checkpoint exists expected %s, got %s", "testSeqNum", val)
} }
client.Del("app:checkpoint:stream:shard_id") client.Del(c.key("shard_id"))
} }
func Test_key(t *testing.T) { func Test_key(t *testing.T) {
client := redis.NewClient(&redis.Options{Addr: defaultAddr}) client := redis.NewClient(&redis.Options{Addr: defaultAddr})
c := &Checkpoint{ c := &Checkpoint{
AppName: "app", appName: "app",
StreamName: "stream", streamName: "stream",
client: client, client: client,
} }

View file

@ -10,20 +10,13 @@ import (
"github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/kinesis" "github.com/aws/aws-sdk-go/service/kinesis"
"github.com/harlow/kinesis-consumer/checkpoint" "github.com/harlow/kinesis-consumer/checkpoint"
"github.com/harlow/kinesis-consumer/checkpoint/redis"
) )
type Record = kinesis.Record
// Option is used to override defaults when creating a new Consumer // Option is used to override defaults when creating a new Consumer
type Option func(*Consumer) error type Option func(*Consumer) error
// WithClient the Kinesis client
func WithClient(client *kinesis.Kinesis) Option {
return func(c *Consumer) error {
c.svc = client
return nil
}
}
// WithCheckpoint overrides the default checkpoint // WithCheckpoint overrides the default checkpoint
func WithCheckpoint(checkpoint checkpoint.Checkpoint) Option { func WithCheckpoint(checkpoint checkpoint.Checkpoint) Option {
return func(c *Consumer) error { return func(c *Consumer) error {
@ -42,18 +35,23 @@ func WithLogger(logger log.Interface) Option {
// New creates a kinesis consumer with default settings. Use Option to override // New creates a kinesis consumer with default settings. Use Option to override
// any of the optional attributes. // any of the optional attributes.
func New(appName, streamName string, opts ...Option) (*Consumer, error) { func New(checkpoint checkpoint.Checkpoint, app, stream string, opts ...Option) (*Consumer, error) {
if appName == "" { if checkpoint == nil {
return nil, fmt.Errorf("must provide app name to consumer") return nil, fmt.Errorf("must provide checkpoint")
} }
if streamName == "" { if app == "" {
return nil, fmt.Errorf("must provide stream name to consumer") return nil, fmt.Errorf("must provide app name")
}
if stream == "" {
return nil, fmt.Errorf("must provide stream name")
} }
c := &Consumer{ c := &Consumer{
appName: appName, checkpoint: checkpoint,
streamName: streamName, appName: app,
streamName: stream,
} }
// set options // set options
@ -67,23 +65,14 @@ func New(appName, streamName string, opts ...Option) (*Consumer, error) {
if c.logger == nil { if c.logger == nil {
c.logger = log.Log.WithFields(log.Fields{ c.logger = log.Log.WithFields(log.Fields{
"package": "kinesis-consumer", "package": "kinesis-consumer",
"app": appName, "app": app,
"stream": streamName, "stream": stream,
}) })
} }
// provide a default kinesis client // provide a default kinesis client
if c.svc == nil { if c.client == nil {
c.svc = kinesis.New(session.New(aws.NewConfig())) c.client = kinesis.New(session.New(aws.NewConfig()))
}
// provide default Redis checkpoint
if c.checkpoint == nil {
ck, err := redis.New(appName, streamName)
if err != nil {
return nil, err
}
c.checkpoint = ck
} }
return c, nil return c, nil
@ -91,9 +80,9 @@ func New(appName, streamName string, opts ...Option) (*Consumer, error) {
// Consumer wraps the interaction with the Kinesis stream // Consumer wraps the interaction with the Kinesis stream
type Consumer struct { type Consumer struct {
appName string appName string
streamName string streamName string
svc *kinesis.Kinesis client *kinesis.Kinesis
logger log.Interface logger log.Interface
checkpoint checkpoint.Checkpoint checkpoint checkpoint.Checkpoint
} }
@ -105,7 +94,7 @@ func (c *Consumer) Scan(ctx context.Context, fn func(*kinesis.Record) bool) erro
defer cancel() defer cancel()
// grab the stream details // grab the stream details
resp, err := c.svc.DescribeStream( resp, err := c.client.DescribeStream(
&kinesis.DescribeStreamInput{ &kinesis.DescribeStreamInput{
StreamName: aws.String(c.streamName), StreamName: aws.String(c.streamName),
}, },
@ -134,12 +123,15 @@ func (c *Consumer) Scan(ctx context.Context, fn func(*kinesis.Record) bool) erro
// for each record and checkpoints after each page is processed. // for each record and checkpoints after each page is processed.
// Note: returning `false` from the callback func will end the scan. // Note: returning `false` from the callback func will end the scan.
func (c *Consumer) ScanShard(ctx context.Context, shardID string, fn func(*kinesis.Record) bool) { func (c *Consumer) ScanShard(ctx context.Context, shardID string, fn func(*kinesis.Record) bool) {
var ( var logger = c.logger.WithFields(log.Fields{"shard": shardID})
logger = c.logger.WithFields(log.Fields{"shard": shardID})
sequenceNumber string
)
shardIterator, err := c.getShardIterator(shardID) lastSeqNum, err := c.checkpoint.Get(shardID)
if err != nil {
logger.WithError(err).Error("get checkpoint")
return
}
shardIterator, err := c.getShardIterator(shardID, lastSeqNum)
if err != nil { if err != nil {
logger.WithError(err).Error("getShardIterator") logger.WithError(err).Error("getShardIterator")
return return
@ -153,14 +145,14 @@ loop:
case <-ctx.Done(): case <-ctx.Done():
break loop break loop
default: default:
resp, err := c.svc.GetRecords( resp, err := c.client.GetRecords(
&kinesis.GetRecordsInput{ &kinesis.GetRecordsInput{
ShardIterator: shardIterator, ShardIterator: shardIterator,
}, },
) )
if err != nil { if err != nil {
shardIterator, err = c.getShardIterator(shardID) shardIterator, err = c.getShardIterator(shardID, lastSeqNum)
if err != nil { if err != nil {
logger.WithError(err).Error("getShardIterator") logger.WithError(err).Error("getShardIterator")
break loop break loop
@ -174,21 +166,21 @@ loop:
case <-ctx.Done(): case <-ctx.Done():
break loop break loop
default: default:
sequenceNumber = *r.SequenceNumber lastSeqNum = *r.SequenceNumber
if ok := fn(r); !ok { if ok := fn(r); !ok {
break loop break loop
} }
} }
} }
logger.WithField("records", len(resp.Records)).Info("checkpoint") logger.WithField("count", len(resp.Records)).Info("checkpoint")
if err := c.checkpoint.Set(shardID, sequenceNumber); err != nil { if err := c.checkpoint.Set(shardID, lastSeqNum); err != nil {
c.logger.WithError(err).Error("set checkpoint error") c.logger.WithError(err).Error("set checkpoint error")
} }
} }
if resp.NextShardIterator == nil || shardIterator == resp.NextShardIterator { if resp.NextShardIterator == nil || shardIterator == resp.NextShardIterator {
shardIterator, err = c.getShardIterator(shardID) shardIterator, err = c.getShardIterator(shardID, lastSeqNum)
if err != nil { if err != nil {
logger.WithError(err).Error("getShardIterator") logger.WithError(err).Error("getShardIterator")
break loop break loop
@ -199,32 +191,29 @@ loop:
} }
} }
if sequenceNumber != "" { if lastSeqNum == "" {
if err := c.checkpoint.Set(shardID, sequenceNumber); err != nil { return
c.logger.WithError(err).Error("set checkpoint error") }
}
if err := c.checkpoint.Set(shardID, lastSeqNum); err != nil {
c.logger.WithError(err).Error("set checkpoint error")
} }
} }
func (c *Consumer) getShardIterator(shardID string) (*string, error) { func (c *Consumer) getShardIterator(shardID, lastSeqNum string) (*string, error) {
params := &kinesis.GetShardIteratorInput{ params := &kinesis.GetShardIteratorInput{
ShardId: aws.String(shardID), ShardId: aws.String(shardID),
StreamName: aws.String(c.streamName), StreamName: aws.String(c.streamName),
} }
seqNum, err := c.checkpoint.Get(shardID) if lastSeqNum != "" {
if err != nil {
return nil, err
}
if seqNum != "" {
params.ShardIteratorType = aws.String("AFTER_SEQUENCE_NUMBER") params.ShardIteratorType = aws.String("AFTER_SEQUENCE_NUMBER")
params.StartingSequenceNumber = aws.String(seqNum) params.StartingSequenceNumber = aws.String(lastSeqNum)
} else { } else {
params.ShardIteratorType = aws.String("TRIM_HORIZON") params.ShardIteratorType = aws.String("TRIM_HORIZON")
} }
resp, err := c.svc.GetShardIterator(params) resp, err := c.client.GetShardIterator(params)
if err != nil { if err != nil {
c.logger.WithError(err).Error("GetShardIterator") c.logger.WithError(err).Error("GetShardIterator")
return nil, err return nil, err

View file

@ -8,8 +8,8 @@ import (
"github.com/apex/log" "github.com/apex/log"
"github.com/apex/log/handlers/text" "github.com/apex/log/handlers/text"
"github.com/aws/aws-sdk-go/service/kinesis"
consumer "github.com/harlow/kinesis-consumer" consumer "github.com/harlow/kinesis-consumer"
checkpoint "github.com/harlow/kinesis-consumer/checkpoint/redis"
) )
func main() { func main() {
@ -22,12 +22,20 @@ func main() {
) )
flag.Parse() flag.Parse()
c, err := consumer.New(*app, *stream) // new checkpoint
ck, err := checkpoint.New(*app, *stream)
if err != nil {
log.Fatalf("checkpoint error: %v", err)
}
// new consumer
c, err := consumer.New(ck, *app, *stream)
if err != nil { if err != nil {
log.Fatalf("consumer error: %v", err) log.Fatalf("consumer error: %v", err)
} }
err = c.Scan(context.TODO(), func(r *kinesis.Record) bool { // scan stream
err = c.Scan(context.TODO(), func(r *consumer.Record) bool {
fmt.Println(string(r.Data)) fmt.Println(string(r.Data))
return true // continue scanning return true // continue scanning
}) })

View file

@ -18,7 +18,7 @@ func main() {
log.SetHandler(text.New(os.Stderr)) log.SetHandler(text.New(os.Stderr))
log.SetLevel(log.DebugLevel) log.SetLevel(log.DebugLevel)
var streamName = flag.String("s", "", "Stream name") var streamName = flag.String("stream", "", "Stream name")
flag.Parse() flag.Parse()
// download file with test data // download file with test data