Add required fields to Config

Taking some inspiration from:
https://github.com/tj/go-kinesis/blob/master/kinesis.go#L50-L75
This commit is contained in:
Harlow Ward 2016-05-07 18:05:52 -07:00
parent f4c8d8090d
commit ceca88b96a
8 changed files with 107 additions and 67 deletions

View file

@ -22,7 +22,7 @@ func main() {
// override library defaults // override library defaults
cfg := connector.Config{ cfg := connector.Config{
MaxBatchCount: 400, MaxRecordCount: 400,
} }
// create new consumer // create new consumer

View file

@ -9,7 +9,7 @@ type Buffer struct {
firstSequenceNumber string firstSequenceNumber string
lastSequenceNumber string lastSequenceNumber string
MaxBatchCount int MaxRecordCount int
} }
// AddRecord adds a record to the buffer. // AddRecord adds a record to the buffer.
@ -24,7 +24,7 @@ func (b *Buffer) AddRecord(r *kinesis.Record) {
// ShouldFlush determines if the buffer has reached its target size. // ShouldFlush determines if the buffer has reached its target size.
func (b *Buffer) ShouldFlush() bool { func (b *Buffer) ShouldFlush() bool {
return b.RecordCount() >= b.MaxBatchCount return b.RecordCount() >= b.MaxRecordCount
} }
// Flush empties the buffer and resets the sequence counter. // Flush empties the buffer and resets the sequence counter.

View file

@ -8,7 +8,7 @@ import (
) )
func BenchmarkBufferLifecycle(b *testing.B) { func BenchmarkBufferLifecycle(b *testing.B) {
buf := Buffer{MaxBatchCount: 1000} buf := Buffer{MaxRecordCount: 1000}
seq := "1" seq := "1"
rec := &kinesis.Record{SequenceNumber: &seq} rec := &kinesis.Record{SequenceNumber: &seq}
@ -48,7 +48,7 @@ func Test_LastSeq(t *testing.T) {
} }
func Test_ShouldFlush(t *testing.T) { func Test_ShouldFlush(t *testing.T) {
b := Buffer{MaxBatchCount: 2} b := Buffer{MaxRecordCount: 2}
s1, s2 := "1", "2" s1, s2 := "1", "2"
r1 := &kinesis.Record{SequenceNumber: &s1} r1 := &kinesis.Record{SequenceNumber: &s1}
r2 := &kinesis.Record{SequenceNumber: &s2} r2 := &kinesis.Record{SequenceNumber: &s2}

View file

@ -1,10 +1,63 @@
package connector package connector
import ( import (
"os"
"time"
"github.com/apex/log" "github.com/apex/log"
) )
const (
defaultBufferSize = 500
)
type Config struct { type Config struct {
MaxBatchCount int // AppName is the application name.
LogHandler log.Handler AppName string
// StreamName is the Kinesis stream.
StreamName string
// FlushInterval is a regular interval for flushing the buffer. Defaults to 1s.
FlushInterval time.Duration
// BufferSize determines the batch request size. Must not exceed 500. Defaults to 500.
BufferSize int
// Logger is the logger used. Defaults to log.Log.
Logger log.Interface
}
// defaults for configuration.
func (c *Config) setDefaults() {
if c.Logger == nil {
c.Logger = log.Log
}
c.Logger = c.Logger.WithFields(log.Fields{
"package": "kinesis-connectors",
})
if c.AppName == "" {
c.Logger.WithField("type", "config").Error("AppName required")
os.Exit(1)
}
if c.StreamName == "" {
c.Logger.WithField("type", "config").Error("AppName required")
os.Exit(1)
}
c.Logger = c.Logger.WithFields(log.Fields{
"app": c.AppName,
"stream": c.StreamName,
})
if c.BufferSize == 0 {
c.BufferSize = defaultBufferSize
}
if c.FlushInterval == 0 {
c.FlushInterval = time.Second
}
} }

View file

@ -1,28 +1,17 @@
package connector package connector
import ( import (
"log" "os"
apexlog "github.com/apex/log" "github.com/apex/log"
"github.com/apex/log/handlers/discard"
"github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/kinesis" "github.com/aws/aws-sdk-go/service/kinesis"
) )
const (
defaultMaxBatchCount = 1000
)
// NewConsumer creates a new consumer with initialied kinesis connection // NewConsumer creates a new consumer with initialied kinesis connection
func NewConsumer(appName, streamName string, cfg Config) *Consumer { func NewConsumer(config Config) *Consumer {
if cfg.LogHandler == nil { config.setDefaults()
cfg.LogHandler = discard.New()
}
if cfg.MaxBatchCount == 0 {
cfg.MaxBatchCount = defaultMaxBatchCount
}
svc := kinesis.New( svc := kinesis.New(
session.New( session.New(
@ -31,33 +20,28 @@ func NewConsumer(appName, streamName string, cfg Config) *Consumer {
) )
return &Consumer{ return &Consumer{
appName: appName, svc: svc,
streamName: streamName, Config: config,
svc: svc,
cfg: cfg,
} }
} }
type Consumer struct { type Consumer struct {
appName string svc *kinesis.Kinesis
streamName string Config
svc *kinesis.Kinesis
cfg Config
} }
// Start takes a handler and then loops over each of the shards // Start takes a handler and then loops over each of the shards
// processing each one with the handler. // processing each one with the handler.
func (c *Consumer) Start(handler Handler) { func (c *Consumer) Start(handler Handler) {
apexlog.SetHandler(c.cfg.LogHandler)
resp, err := c.svc.DescribeStream( resp, err := c.svc.DescribeStream(
&kinesis.DescribeStreamInput{ &kinesis.DescribeStreamInput{
StreamName: aws.String(c.streamName), StreamName: aws.String(c.StreamName),
}, },
) )
if err != nil { if err != nil {
log.Fatalf("Error DescribeStream %v", err) c.Logger.WithError(err).Error("DescribeStream")
os.Exit(1)
} }
for _, shard := range resp.StreamDescription.Shards { for _, shard := range resp.StreamDescription.Shards {
@ -66,24 +50,18 @@ func (c *Consumer) Start(handler Handler) {
} }
func (c *Consumer) handlerLoop(shardID string, handler Handler) { func (c *Consumer) handlerLoop(shardID string, handler Handler) {
ctx := apexlog.WithFields(apexlog.Fields{
"app": c.appName,
"stream": c.streamName,
"shard": shardID,
})
buf := &Buffer{ buf := &Buffer{
MaxBatchCount: c.cfg.MaxBatchCount, MaxRecordCount: c.BufferSize,
} }
checkpoint := &Checkpoint{ checkpoint := &Checkpoint{
AppName: c.appName, AppName: c.AppName,
StreamName: c.streamName, StreamName: c.StreamName,
} }
params := &kinesis.GetShardIteratorInput{ params := &kinesis.GetShardIteratorInput{
ShardId: aws.String(shardID), ShardId: aws.String(shardID),
StreamName: aws.String(c.streamName), StreamName: aws.String(c.StreamName),
} }
if checkpoint.CheckpointExists(shardID) { if checkpoint.CheckpointExists(shardID) {
@ -95,10 +73,16 @@ func (c *Consumer) handlerLoop(shardID string, handler Handler) {
resp, err := c.svc.GetShardIterator(params) resp, err := c.svc.GetShardIterator(params)
if err != nil { if err != nil {
log.Fatalf("Error GetShardIterator %v", err) c.Logger.WithError(err).Error("GetShardIterator")
os.Exit(1)
} }
shardIterator := resp.ShardIterator shardIterator := resp.ShardIterator
ctx := c.Logger.WithFields(log.Fields{
"shard": shardID,
})
ctx.Info("processing") ctx.Info("processing")
for { for {
@ -118,13 +102,14 @@ func (c *Consumer) handlerLoop(shardID string, handler Handler) {
if buf.ShouldFlush() { if buf.ShouldFlush() {
handler.HandleRecords(*buf) handler.HandleRecords(*buf)
ctx.WithField("count", buf.RecordCount()).Info("emitted") ctx.WithField("count", buf.RecordCount()).Info("flushed")
checkpoint.SetCheckpoint(shardID, buf.LastSeq()) checkpoint.SetCheckpoint(shardID, buf.LastSeq())
buf.Flush() buf.Flush()
} }
} }
} else if resp.NextShardIterator == aws.String("") || shardIterator == resp.NextShardIterator { } else if resp.NextShardIterator == aws.String("") || shardIterator == resp.NextShardIterator {
log.Fatalf("Error NextShardIterator") c.Logger.Error("NextShardIterator")
os.Exit(1)
} }
shardIterator = resp.NextShardIterator shardIterator = resp.NextShardIterator

View file

@ -32,7 +32,7 @@ func main() {
svc := firehose.New(session.New()) svc := firehose.New(session.New())
cfg := connector.Config{ cfg := connector.Config{
MaxBatchCount: 400, MaxRecordCount: 400,
} }
c := connector.NewConsumer(*app, *stream, cfg) c := connector.NewConsumer(*app, *stream, cfg)

View file

@ -9,7 +9,7 @@ import (
"github.com/apex/log/handlers/text" "github.com/apex/log/handlers/text"
"github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/kinesis" "github.com/aws/aws-sdk-go/service/kinesis"
prdcr "github.com/tj/go-kinesis" producer "github.com/tj/go-kinesis"
) )
// Note: download file with test data // Note: download file with test data
@ -19,15 +19,16 @@ var stream = flag.String("s", "", "Stream name")
func main() { func main() {
flag.Parse() flag.Parse()
log.SetHandler(text.New(os.Stderr)) log.SetHandler(text.New(os.Stderr))
log.SetLevel(log.DebugLevel)
// set up producer // set up producer
svc := kinesis.New(session.New()) svc := kinesis.New(session.New())
producer := prdcr.New(prdcr.Config{ p := producer.New(producer.Config{
StreamName: *stream, StreamName: *stream,
BacklogSize: 500, BacklogSize: 500,
Client: svc, Client: svc,
}) })
producer.Start() p.Start()
// open data file // open data file
f, err := os.Open("/tmp/users.txt") f, err := os.Open("/tmp/users.txt")
@ -39,12 +40,12 @@ func main() {
// loop over file data // loop over file data
b := bufio.NewScanner(f) b := bufio.NewScanner(f)
for b.Scan() { for b.Scan() {
err := producer.Put(b.Bytes(), "site") err := p.Put(b.Bytes(), "site")
if err != nil { if err != nil {
log.WithError(err).Fatal("error producing") log.WithError(err).Fatal("error producing")
} }
} }
producer.Stop() p.Stop()
} }

View file

@ -6,31 +6,32 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/apex/log"
"github.com/apex/log/handlers/text" "github.com/apex/log/handlers/text"
"github.com/harlow/kinesis-connectors" "github.com/harlow/kinesis-connectors"
"github.com/harlow/kinesis-connectors/emitter/s3" "github.com/harlow/kinesis-connectors/emitter/s3"
) )
var (
app = flag.String("a", "", "App name")
bucket = flag.String("b", "", "Bucket name")
stream = flag.String("s", "", "Stream name")
)
func main() { func main() {
log.SetHandler(text.New(os.Stderr))
log.SetLevel(log.DebugLevel)
var (
app = flag.String("a", "", "App name")
bucket = flag.String("b", "", "Bucket name")
stream = flag.String("s", "", "Stream name")
)
flag.Parse() flag.Parse()
emitter := &s3.Emitter{ e := &s3.Emitter{
Bucket: *bucket, Bucket: *bucket,
Region: "us-west-1", Region: "us-west-1",
} }
cfg := connector.Config{ c := connector.NewConsumer(connector.Config{
MaxBatchCount: 500, AppName: *app,
LogHandler: text.New(os.Stderr), StreamName: *stream,
} })
c := connector.NewConsumer(*app, *stream, cfg)
c.Start(connector.HandlerFunc(func(b connector.Buffer) { c.Start(connector.HandlerFunc(func(b connector.Buffer) {
body := new(bytes.Buffer) body := new(bytes.Buffer)
@ -39,7 +40,7 @@ func main() {
body.Write(r.Data) body.Write(r.Data)
} }
err := emitter.Emit( err := e.Emit(
s3.Key("", b.FirstSeq(), b.LastSeq()), s3.Key("", b.FirstSeq(), b.LastSeq()),
bytes.NewReader(body.Bytes()), bytes.NewReader(body.Bytes()),
) )