2014-07-25 06:03:41 +00:00
|
|
|
package connector
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"time"
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
"github.com/aws/aws-sdk-go/aws"
|
|
|
|
|
"github.com/aws/aws-sdk-go/aws/awserr"
|
|
|
|
|
"github.com/aws/aws-sdk-go/service/kinesis"
|
2014-07-25 06:03:41 +00:00
|
|
|
)
|
|
|
|
|
|
2014-12-10 23:38:19 +00:00
|
|
|
// Pipeline is used as a record processor to configure a pipline.
|
|
|
|
|
//
|
2014-07-25 06:03:41 +00:00
|
|
|
// The user should implement this such that each method returns a configured implementation of each
|
|
|
|
|
// interface. It has a data type (Model) as Records come in as a byte[] and are transformed to a Model.
|
|
|
|
|
// Then they are buffered in Model form and when the buffer is full, Models's are passed to the emitter.
|
|
|
|
|
type Pipeline struct {
|
2015-08-16 05:20:34 +00:00
|
|
|
Buffer Buffer
|
|
|
|
|
Checkpoint Checkpoint
|
|
|
|
|
Emitter Emitter
|
|
|
|
|
Filter Filter
|
|
|
|
|
Kinesis *kinesis.Kinesis
|
|
|
|
|
StreamName string
|
|
|
|
|
Transformer Transformer
|
|
|
|
|
|
|
|
|
|
checkpointSequenceNumber string
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
// ProcessShard is a long running process that handles reading records from a Kinesis shard.
|
|
|
|
|
func (p Pipeline) ProcessShard(shardID string) {
|
|
|
|
|
svc := kinesis.New(&aws.Config{Region: "us-east-1"})
|
|
|
|
|
|
|
|
|
|
args := &kinesis.GetShardIteratorInput{
|
|
|
|
|
ShardID: aws.String(shardID),
|
|
|
|
|
StreamName: aws.String(p.StreamName),
|
|
|
|
|
}
|
2014-07-25 06:03:41 +00:00
|
|
|
|
|
|
|
|
if p.Checkpoint.CheckpointExists(shardID) {
|
2015-08-16 05:20:34 +00:00
|
|
|
args.ShardIteratorType = aws.String("AFTER_SEQUENCE_NUMBER")
|
|
|
|
|
args.StartingSequenceNumber = aws.String(p.Checkpoint.SequenceNumber())
|
2014-07-25 06:03:41 +00:00
|
|
|
} else {
|
2015-08-16 05:20:34 +00:00
|
|
|
args.ShardIteratorType = aws.String("TRIM_HORIZON")
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
resp, err := svc.GetShardIterator(args)
|
2014-07-25 06:03:41 +00:00
|
|
|
|
|
|
|
|
if err != nil {
|
2015-08-16 05:20:34 +00:00
|
|
|
if awsErr, ok := err.(awserr.Error); ok {
|
|
|
|
|
logger.Log("error", "GetShardIterator", "code", awsErr.Code(), "msg", awsErr.Message(), "origError", awsErr.OrigErr())
|
|
|
|
|
return
|
|
|
|
|
}
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
shardIterator := resp.ShardIterator
|
2014-07-25 06:03:41 +00:00
|
|
|
|
|
|
|
|
for {
|
2015-08-16 05:20:34 +00:00
|
|
|
args := &kinesis.GetRecordsInput{ShardIterator: shardIterator}
|
|
|
|
|
resp, err := svc.GetRecords(args)
|
2014-07-25 06:03:41 +00:00
|
|
|
|
|
|
|
|
if err != nil {
|
2015-08-16 05:20:34 +00:00
|
|
|
if awsErr, ok := err.(awserr.Error); ok {
|
|
|
|
|
if awsErr.Code() == "ProvisionedThroughputExceededException" {
|
|
|
|
|
logger.Log("info", "GetRecords", "shardId", shardID, "msg", "rateLimit")
|
|
|
|
|
time.Sleep(5 * time.Second)
|
|
|
|
|
continue
|
|
|
|
|
} else {
|
|
|
|
|
logger.Log("error", "GetRecords", "shardId", shardID, "code", awsErr.Code(), "msg", awsErr.Message())
|
|
|
|
|
break
|
|
|
|
|
}
|
2015-04-03 22:33:34 +00:00
|
|
|
}
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
if len(resp.Records) > 0 {
|
|
|
|
|
for _, r := range resp.Records {
|
|
|
|
|
transformedRecord := p.Transformer.ToRecord(r.Data)
|
2014-07-25 06:03:41 +00:00
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
if p.Filter.KeepRecord(transformedRecord) {
|
|
|
|
|
p.Buffer.ProcessRecord(transformedRecord, *r.SequenceNumber)
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
p.checkpointSequenceNumber = *r.SequenceNumber
|
|
|
|
|
}
|
2014-07-25 06:03:41 +00:00
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
if p.Buffer.ShouldFlush() {
|
|
|
|
|
p.Emitter.Emit(p.Buffer, p.Transformer)
|
|
|
|
|
p.Checkpoint.SetCheckpoint(shardID, p.checkpointSequenceNumber)
|
|
|
|
|
p.Buffer.Flush()
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
2015-08-16 05:20:34 +00:00
|
|
|
} else if resp.NextShardIterator == aws.String("") || shardIterator == resp.NextShardIterator {
|
2015-05-26 04:51:53 +00:00
|
|
|
logger.Log("error", "NextShardIterator", "msg", err.Error())
|
2014-07-25 06:03:41 +00:00
|
|
|
break
|
|
|
|
|
} else {
|
2015-08-16 05:20:34 +00:00
|
|
|
time.Sleep(1 * time.Second)
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
|
2015-08-16 05:20:34 +00:00
|
|
|
shardIterator = resp.NextShardIterator
|
2014-07-25 06:03:41 +00:00
|
|
|
}
|
|
|
|
|
}
|