2017-08-04 09:36:42 +00:00
|
|
|
package batchconsumer
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"os"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
kv "gopkg.in/Clever/kayvee-go.v6/logger"
|
|
|
|
|
|
2017-08-07 03:05:41 +00:00
|
|
|
"github.com/Clever/amazon-kinesis-client-go/batchconsumer/stats"
|
2017-08-04 09:36:42 +00:00
|
|
|
"github.com/Clever/amazon-kinesis-client-go/kcl"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type tagMsgPair struct {
|
|
|
|
|
tag string
|
|
|
|
|
msg []byte
|
|
|
|
|
pair kcl.SequencePair
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-10 20:11:24 +00:00
|
|
|
type batcherManagerConfig struct {
|
|
|
|
|
BatchCount int
|
|
|
|
|
BatchSize int
|
|
|
|
|
BatchInterval time.Duration
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-04 09:36:42 +00:00
|
|
|
type batcherManager struct {
|
|
|
|
|
log kv.KayveeLogger
|
|
|
|
|
sender Sender
|
|
|
|
|
chkpntManager *checkpointManager
|
|
|
|
|
|
|
|
|
|
batchCount int
|
|
|
|
|
batchSize int
|
|
|
|
|
batchInterval time.Duration
|
|
|
|
|
|
|
|
|
|
batchMsg chan tagMsgPair
|
|
|
|
|
lastIgnored chan kcl.SequencePair
|
|
|
|
|
lastProcessed chan kcl.SequencePair
|
2017-08-08 19:09:31 +00:00
|
|
|
shutdown chan chan<- struct{}
|
2017-08-04 09:36:42 +00:00
|
|
|
}
|
|
|
|
|
|
2017-08-10 19:21:46 +00:00
|
|
|
func newBatcherManager(
|
2017-08-10 20:11:24 +00:00
|
|
|
sender Sender, chkpntManager *checkpointManager, cfg batcherManagerConfig, log kv.KayveeLogger,
|
2017-08-04 09:36:42 +00:00
|
|
|
) *batcherManager {
|
|
|
|
|
bm := &batcherManager{
|
|
|
|
|
log: log,
|
|
|
|
|
sender: sender,
|
|
|
|
|
chkpntManager: chkpntManager,
|
|
|
|
|
|
2017-08-10 20:11:24 +00:00
|
|
|
batchCount: cfg.BatchCount,
|
|
|
|
|
batchSize: cfg.BatchSize,
|
|
|
|
|
batchInterval: cfg.BatchInterval,
|
2017-08-04 09:36:42 +00:00
|
|
|
|
|
|
|
|
batchMsg: make(chan tagMsgPair),
|
|
|
|
|
lastIgnored: make(chan kcl.SequencePair),
|
|
|
|
|
lastProcessed: make(chan kcl.SequencePair),
|
2017-08-10 19:56:17 +00:00
|
|
|
// shutdown chan takes "done" channel to signal when batchermanager is done shutting down
|
|
|
|
|
shutdown: make(chan chan<- struct{}),
|
2017-08-04 09:36:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bm.startMessageHandler(bm.batchMsg, bm.lastIgnored, bm.lastProcessed, bm.shutdown)
|
|
|
|
|
|
|
|
|
|
return bm
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (b *batcherManager) BatchMessage(tag string, msg []byte, pair kcl.SequencePair) {
|
|
|
|
|
b.batchMsg <- tagMsgPair{tag, msg, pair}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (b *batcherManager) LatestIgnored(pair kcl.SequencePair) {
|
|
|
|
|
b.lastIgnored <- pair
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (b *batcherManager) LatestProcessed(pair kcl.SequencePair) {
|
|
|
|
|
b.lastProcessed <- pair
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-08 19:09:31 +00:00
|
|
|
func (b *batcherManager) Shutdown() <-chan struct{} {
|
|
|
|
|
done := make(chan struct{})
|
|
|
|
|
b.shutdown <- done
|
|
|
|
|
|
|
|
|
|
return done
|
2017-08-04 09:36:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (b *batcherManager) createBatcher() *batcher {
|
|
|
|
|
return &batcher{
|
|
|
|
|
flushCount: b.batchCount,
|
|
|
|
|
flushSize: b.batchSize,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (b *batcherManager) sendBatch(batcher *batcher, tag string) {
|
|
|
|
|
if len(batcher.Batch) <= 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err := b.sender.SendBatch(batcher.Batch, tag)
|
|
|
|
|
switch e := err.(type) {
|
|
|
|
|
case nil: // Do nothing
|
|
|
|
|
case PartialSendBatchError:
|
|
|
|
|
b.log.ErrorD("send-batch", kv.M{"msg": e.Error()})
|
|
|
|
|
for _, line := range e.FailedMessages {
|
|
|
|
|
b.log.ErrorD("failed-log", kv.M{"log": line})
|
|
|
|
|
}
|
2017-08-07 03:05:41 +00:00
|
|
|
stats.Counter("batch-log-failures", len(e.FailedMessages))
|
2017-08-04 09:36:42 +00:00
|
|
|
case CatastrophicSendBatchError:
|
|
|
|
|
b.log.CriticalD("send-batch", kv.M{"msg": e.Error()})
|
|
|
|
|
os.Exit(1)
|
|
|
|
|
default:
|
|
|
|
|
b.log.CriticalD("send-batch", kv.M{"msg": e.Error()})
|
|
|
|
|
os.Exit(1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
batcher.Clear()
|
2017-08-07 03:05:41 +00:00
|
|
|
stats.Counter("batches-sent", 1)
|
2017-08-04 09:36:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (b *batcherManager) sendCheckpoint(
|
|
|
|
|
tag string, lastIgnoredPair kcl.SequencePair, batchers map[string]*batcher,
|
|
|
|
|
) {
|
|
|
|
|
smallest := lastIgnoredPair
|
|
|
|
|
|
|
|
|
|
for name, batcher := range batchers {
|
|
|
|
|
if tag == name {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(batcher.Batch) <= 0 {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for empty because it's possible that no messages have been ignored
|
2017-08-10 20:16:41 +00:00
|
|
|
if smallest.IsNil() || batcher.SmallestSeq.IsLessThan(smallest) {
|
2017-08-04 09:36:42 +00:00
|
|
|
smallest = batcher.SmallestSeq
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-10 20:16:41 +00:00
|
|
|
if !smallest.IsNil() {
|
2017-08-04 09:36:42 +00:00
|
|
|
b.chkpntManager.Checkpoint(smallest)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// startMessageDistributer starts a go-routine that routes messages to batches. It's in uses a
|
|
|
|
|
// go routine to avoid racey conditions.
|
|
|
|
|
func (b *batcherManager) startMessageHandler(
|
|
|
|
|
batchMsg <-chan tagMsgPair, lastIgnored, lastProcessed <-chan kcl.SequencePair,
|
2017-08-08 19:09:31 +00:00
|
|
|
shutdown <-chan chan<- struct{},
|
2017-08-04 09:36:42 +00:00
|
|
|
) {
|
2017-08-10 19:47:08 +00:00
|
|
|
flushStaleBatches := make(chan struct{})
|
|
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
|
for { // Flush batches that haven't been updated recently
|
|
|
|
|
<-time.NewTimer(time.Second).C
|
|
|
|
|
flushStaleBatches <- struct{}{}
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
2017-08-04 09:36:42 +00:00
|
|
|
go func() {
|
|
|
|
|
var lastProcessedPair kcl.SequencePair
|
|
|
|
|
var lastIgnoredPair kcl.SequencePair
|
|
|
|
|
batchers := map[string]*batcher{}
|
|
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
select {
|
2017-08-10 19:47:08 +00:00
|
|
|
case <-flushStaleBatches:
|
|
|
|
|
for tag, batcher := range batchers {
|
|
|
|
|
if b.batchInterval <= time.Now().Sub(batcher.LastUpdated) {
|
|
|
|
|
b.sendBatch(batcher, tag)
|
|
|
|
|
b.sendCheckpoint(tag, lastIgnoredPair, batchers)
|
|
|
|
|
batcher.Clear()
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-08-04 09:36:42 +00:00
|
|
|
case tmp := <-batchMsg:
|
|
|
|
|
batcher, ok := batchers[tmp.tag]
|
|
|
|
|
if !ok {
|
|
|
|
|
batcher = b.createBatcher()
|
|
|
|
|
batchers[tmp.tag] = batcher
|
2017-08-07 03:05:41 +00:00
|
|
|
stats.Gauge("tag-count", len(batchers))
|
2017-08-04 09:36:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err := batcher.AddMessage(tmp.msg, tmp.pair)
|
|
|
|
|
if err == ErrBatchFull {
|
|
|
|
|
b.sendBatch(batcher, tmp.tag)
|
|
|
|
|
b.sendCheckpoint(tmp.tag, lastIgnoredPair, batchers)
|
|
|
|
|
|
|
|
|
|
batcher.AddMessage(tmp.msg, tmp.pair)
|
|
|
|
|
} else if err != nil {
|
|
|
|
|
b.log.ErrorD("add-message", kv.M{
|
|
|
|
|
"err": err.Error(), "msg": string(tmp.msg), "tag": tmp.tag,
|
|
|
|
|
})
|
|
|
|
|
}
|
2017-08-07 03:05:41 +00:00
|
|
|
stats.Counter("msg-batched", 1)
|
2017-08-04 09:36:42 +00:00
|
|
|
case pair := <-lastIgnored:
|
|
|
|
|
lastIgnoredPair = pair
|
|
|
|
|
|
|
|
|
|
isPendingMessages := false
|
|
|
|
|
for _, batcher := range batchers {
|
|
|
|
|
if len(batcher.Batch) > 0 {
|
|
|
|
|
isPendingMessages = true
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !isPendingMessages {
|
|
|
|
|
b.chkpntManager.Checkpoint(lastIgnoredPair)
|
|
|
|
|
}
|
|
|
|
|
case pair := <-lastProcessed:
|
|
|
|
|
lastProcessedPair = pair
|
2017-08-08 19:09:31 +00:00
|
|
|
case done := <-shutdown:
|
2017-08-04 09:36:42 +00:00
|
|
|
for tag, batcher := range batchers {
|
|
|
|
|
b.sendBatch(batcher, tag)
|
|
|
|
|
}
|
|
|
|
|
b.chkpntManager.Checkpoint(lastProcessedPair)
|
2017-08-08 19:09:31 +00:00
|
|
|
chkDone := b.chkpntManager.Shutdown()
|
|
|
|
|
<-chkDone
|
|
|
|
|
|
|
|
|
|
done <- struct{}{}
|
|
|
|
|
return
|
2017-08-04 09:36:42 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
}
|