kinesis-consumer/pipeline.go
dan e52fcb4f8c Checkpoint after filtered messages in the pipeline
When records are filtered we still want to count them in the checkpoint.
This will allow the checkpoint to pick up at the appropriate spot if any
messages are filtered out.
2015-05-23 12:56:38 -07:00

103 lines
2.8 KiB
Go

package connector
import (
"time"
"github.com/sendgridlabs/go-kinesis"
)
// Pipeline is used as a record processor to configure a pipline.
//
// The user should implement this such that each method returns a configured implementation of each
// interface. It has a data type (Model) as Records come in as a byte[] and are transformed to a Model.
// Then they are buffered in Model form and when the buffer is full, Models's are passed to the emitter.
type Pipeline struct {
Buffer Buffer
Checkpoint Checkpoint
Emitter Emitter
Filter Filter
StreamName string
Transformer Transformer
CheckpointFilteredRecords bool
}
// ProcessShard kicks off the process of a Kinesis Shard.
// It is a long running process that will continue to read from the shard.
func (p Pipeline) ProcessShard(ksis *kinesis.Kinesis, shardID string) {
args := kinesis.NewArgs()
args.Add("ShardId", shardID)
args.Add("StreamName", p.StreamName)
if p.Checkpoint.CheckpointExists(shardID) {
args.Add("ShardIteratorType", "AFTER_SEQUENCE_NUMBER")
args.Add("StartingSequenceNumber", p.Checkpoint.SequenceNumber())
} else {
args.Add("ShardIteratorType", "TRIM_HORIZON")
}
shardInfo, err := ksis.GetShardIterator(args)
if err != nil {
logger.Fatalf("GetShardIterator ERROR: %v\n", err)
}
shardIterator := shardInfo.ShardIterator
consecutiveErrorAttempts := 0
for {
if consecutiveErrorAttempts > 50 {
logger.Fatalf("Too many consecutive error attempts")
}
args = kinesis.NewArgs()
args.Add("ShardIterator", shardIterator)
recordSet, err := ksis.GetRecords(args)
if err != nil {
if isRecoverableError(err) {
logger.Printf("GetRecords RECOVERABLE_ERROR: %v\n", err)
consecutiveErrorAttempts++
handleAwsWaitTimeExp(consecutiveErrorAttempts)
continue
} else {
logger.Fatalf("GetRecords ERROR: %v\n", err)
}
} else {
consecutiveErrorAttempts = 0
}
if len(recordSet.Records) > 0 {
for _, v := range recordSet.Records {
data := v.GetData()
if err != nil {
logger.Printf("GetData ERROR: %v\n", err)
continue
}
r := p.Transformer.ToRecord(data)
if p.Filter.KeepRecord(r) {
p.Buffer.ProcessRecord(r, v.SequenceNumber)
} else if p.CheckpointFilteredRecords {
p.Buffer.ProcessRecord(nil, v.SequenceNumber)
}
}
} else if recordSet.NextShardIterator == "" || shardIterator == recordSet.NextShardIterator || err != nil {
logger.Printf("NextShardIterator ERROR: %v\n", err)
break
} else {
time.Sleep(5 * time.Second)
}
if p.Buffer.ShouldFlush() {
if p.Buffer.NumRecordsInBuffer() > 0 {
p.Emitter.Emit(p.Buffer, p.Transformer)
}
p.Checkpoint.SetCheckpoint(shardID, p.Buffer.LastSequenceNumber())
p.Buffer.Flush()
}
shardIterator = recordSet.NextShardIterator
}
}