Add required fields to Config
Taking some inspiration from: https://github.com/tj/go-kinesis/blob/master/kinesis.go#L50-L75
This commit is contained in:
parent
f4c8d8090d
commit
ceca88b96a
8 changed files with 107 additions and 67 deletions
|
|
@ -22,7 +22,7 @@ func main() {
|
||||||
|
|
||||||
// override library defaults
|
// override library defaults
|
||||||
cfg := connector.Config{
|
cfg := connector.Config{
|
||||||
MaxBatchCount: 400,
|
MaxRecordCount: 400,
|
||||||
}
|
}
|
||||||
|
|
||||||
// create new consumer
|
// create new consumer
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ type Buffer struct {
|
||||||
firstSequenceNumber string
|
firstSequenceNumber string
|
||||||
lastSequenceNumber string
|
lastSequenceNumber string
|
||||||
|
|
||||||
MaxBatchCount int
|
MaxRecordCount int
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddRecord adds a record to the buffer.
|
// AddRecord adds a record to the buffer.
|
||||||
|
|
@ -24,7 +24,7 @@ func (b *Buffer) AddRecord(r *kinesis.Record) {
|
||||||
|
|
||||||
// ShouldFlush determines if the buffer has reached its target size.
|
// ShouldFlush determines if the buffer has reached its target size.
|
||||||
func (b *Buffer) ShouldFlush() bool {
|
func (b *Buffer) ShouldFlush() bool {
|
||||||
return b.RecordCount() >= b.MaxBatchCount
|
return b.RecordCount() >= b.MaxRecordCount
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush empties the buffer and resets the sequence counter.
|
// Flush empties the buffer and resets the sequence counter.
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkBufferLifecycle(b *testing.B) {
|
func BenchmarkBufferLifecycle(b *testing.B) {
|
||||||
buf := Buffer{MaxBatchCount: 1000}
|
buf := Buffer{MaxRecordCount: 1000}
|
||||||
seq := "1"
|
seq := "1"
|
||||||
rec := &kinesis.Record{SequenceNumber: &seq}
|
rec := &kinesis.Record{SequenceNumber: &seq}
|
||||||
|
|
||||||
|
|
@ -48,7 +48,7 @@ func Test_LastSeq(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_ShouldFlush(t *testing.T) {
|
func Test_ShouldFlush(t *testing.T) {
|
||||||
b := Buffer{MaxBatchCount: 2}
|
b := Buffer{MaxRecordCount: 2}
|
||||||
s1, s2 := "1", "2"
|
s1, s2 := "1", "2"
|
||||||
r1 := &kinesis.Record{SequenceNumber: &s1}
|
r1 := &kinesis.Record{SequenceNumber: &s1}
|
||||||
r2 := &kinesis.Record{SequenceNumber: &s2}
|
r2 := &kinesis.Record{SequenceNumber: &s2}
|
||||||
|
|
|
||||||
57
config.go
57
config.go
|
|
@ -1,10 +1,63 @@
|
||||||
package connector
|
package connector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/apex/log"
|
"github.com/apex/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultBufferSize = 500
|
||||||
|
)
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
MaxBatchCount int
|
// AppName is the application name.
|
||||||
LogHandler log.Handler
|
AppName string
|
||||||
|
|
||||||
|
// StreamName is the Kinesis stream.
|
||||||
|
StreamName string
|
||||||
|
|
||||||
|
// FlushInterval is a regular interval for flushing the buffer. Defaults to 1s.
|
||||||
|
FlushInterval time.Duration
|
||||||
|
|
||||||
|
// BufferSize determines the batch request size. Must not exceed 500. Defaults to 500.
|
||||||
|
BufferSize int
|
||||||
|
|
||||||
|
// Logger is the logger used. Defaults to log.Log.
|
||||||
|
Logger log.Interface
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaults for configuration.
|
||||||
|
func (c *Config) setDefaults() {
|
||||||
|
if c.Logger == nil {
|
||||||
|
c.Logger = log.Log
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Logger = c.Logger.WithFields(log.Fields{
|
||||||
|
"package": "kinesis-connectors",
|
||||||
|
})
|
||||||
|
|
||||||
|
if c.AppName == "" {
|
||||||
|
c.Logger.WithField("type", "config").Error("AppName required")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.StreamName == "" {
|
||||||
|
c.Logger.WithField("type", "config").Error("AppName required")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Logger = c.Logger.WithFields(log.Fields{
|
||||||
|
"app": c.AppName,
|
||||||
|
"stream": c.StreamName,
|
||||||
|
})
|
||||||
|
|
||||||
|
if c.BufferSize == 0 {
|
||||||
|
c.BufferSize = defaultBufferSize
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.FlushInterval == 0 {
|
||||||
|
c.FlushInterval = time.Second
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
61
consumer.go
61
consumer.go
|
|
@ -1,28 +1,17 @@
|
||||||
package connector
|
package connector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"os"
|
||||||
|
|
||||||
apexlog "github.com/apex/log"
|
"github.com/apex/log"
|
||||||
"github.com/apex/log/handlers/discard"
|
|
||||||
"github.com/aws/aws-sdk-go/aws"
|
"github.com/aws/aws-sdk-go/aws"
|
||||||
"github.com/aws/aws-sdk-go/aws/session"
|
"github.com/aws/aws-sdk-go/aws/session"
|
||||||
"github.com/aws/aws-sdk-go/service/kinesis"
|
"github.com/aws/aws-sdk-go/service/kinesis"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
defaultMaxBatchCount = 1000
|
|
||||||
)
|
|
||||||
|
|
||||||
// NewConsumer creates a new consumer with initialied kinesis connection
|
// NewConsumer creates a new consumer with initialied kinesis connection
|
||||||
func NewConsumer(appName, streamName string, cfg Config) *Consumer {
|
func NewConsumer(config Config) *Consumer {
|
||||||
if cfg.LogHandler == nil {
|
config.setDefaults()
|
||||||
cfg.LogHandler = discard.New()
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg.MaxBatchCount == 0 {
|
|
||||||
cfg.MaxBatchCount = defaultMaxBatchCount
|
|
||||||
}
|
|
||||||
|
|
||||||
svc := kinesis.New(
|
svc := kinesis.New(
|
||||||
session.New(
|
session.New(
|
||||||
|
|
@ -31,33 +20,28 @@ func NewConsumer(appName, streamName string, cfg Config) *Consumer {
|
||||||
)
|
)
|
||||||
|
|
||||||
return &Consumer{
|
return &Consumer{
|
||||||
appName: appName,
|
|
||||||
streamName: streamName,
|
|
||||||
svc: svc,
|
svc: svc,
|
||||||
cfg: cfg,
|
Config: config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Consumer struct {
|
type Consumer struct {
|
||||||
appName string
|
|
||||||
streamName string
|
|
||||||
svc *kinesis.Kinesis
|
svc *kinesis.Kinesis
|
||||||
cfg Config
|
Config
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start takes a handler and then loops over each of the shards
|
// Start takes a handler and then loops over each of the shards
|
||||||
// processing each one with the handler.
|
// processing each one with the handler.
|
||||||
func (c *Consumer) Start(handler Handler) {
|
func (c *Consumer) Start(handler Handler) {
|
||||||
apexlog.SetHandler(c.cfg.LogHandler)
|
|
||||||
|
|
||||||
resp, err := c.svc.DescribeStream(
|
resp, err := c.svc.DescribeStream(
|
||||||
&kinesis.DescribeStreamInput{
|
&kinesis.DescribeStreamInput{
|
||||||
StreamName: aws.String(c.streamName),
|
StreamName: aws.String(c.StreamName),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error DescribeStream %v", err)
|
c.Logger.WithError(err).Error("DescribeStream")
|
||||||
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, shard := range resp.StreamDescription.Shards {
|
for _, shard := range resp.StreamDescription.Shards {
|
||||||
|
|
@ -66,24 +50,18 @@ func (c *Consumer) Start(handler Handler) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Consumer) handlerLoop(shardID string, handler Handler) {
|
func (c *Consumer) handlerLoop(shardID string, handler Handler) {
|
||||||
ctx := apexlog.WithFields(apexlog.Fields{
|
|
||||||
"app": c.appName,
|
|
||||||
"stream": c.streamName,
|
|
||||||
"shard": shardID,
|
|
||||||
})
|
|
||||||
|
|
||||||
buf := &Buffer{
|
buf := &Buffer{
|
||||||
MaxBatchCount: c.cfg.MaxBatchCount,
|
MaxRecordCount: c.BufferSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
checkpoint := &Checkpoint{
|
checkpoint := &Checkpoint{
|
||||||
AppName: c.appName,
|
AppName: c.AppName,
|
||||||
StreamName: c.streamName,
|
StreamName: c.StreamName,
|
||||||
}
|
}
|
||||||
|
|
||||||
params := &kinesis.GetShardIteratorInput{
|
params := &kinesis.GetShardIteratorInput{
|
||||||
ShardId: aws.String(shardID),
|
ShardId: aws.String(shardID),
|
||||||
StreamName: aws.String(c.streamName),
|
StreamName: aws.String(c.StreamName),
|
||||||
}
|
}
|
||||||
|
|
||||||
if checkpoint.CheckpointExists(shardID) {
|
if checkpoint.CheckpointExists(shardID) {
|
||||||
|
|
@ -95,10 +73,16 @@ func (c *Consumer) handlerLoop(shardID string, handler Handler) {
|
||||||
|
|
||||||
resp, err := c.svc.GetShardIterator(params)
|
resp, err := c.svc.GetShardIterator(params)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error GetShardIterator %v", err)
|
c.Logger.WithError(err).Error("GetShardIterator")
|
||||||
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
shardIterator := resp.ShardIterator
|
shardIterator := resp.ShardIterator
|
||||||
|
|
||||||
|
ctx := c.Logger.WithFields(log.Fields{
|
||||||
|
"shard": shardID,
|
||||||
|
})
|
||||||
|
|
||||||
ctx.Info("processing")
|
ctx.Info("processing")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
|
@ -118,13 +102,14 @@ func (c *Consumer) handlerLoop(shardID string, handler Handler) {
|
||||||
|
|
||||||
if buf.ShouldFlush() {
|
if buf.ShouldFlush() {
|
||||||
handler.HandleRecords(*buf)
|
handler.HandleRecords(*buf)
|
||||||
ctx.WithField("count", buf.RecordCount()).Info("emitted")
|
ctx.WithField("count", buf.RecordCount()).Info("flushed")
|
||||||
checkpoint.SetCheckpoint(shardID, buf.LastSeq())
|
checkpoint.SetCheckpoint(shardID, buf.LastSeq())
|
||||||
buf.Flush()
|
buf.Flush()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if resp.NextShardIterator == aws.String("") || shardIterator == resp.NextShardIterator {
|
} else if resp.NextShardIterator == aws.String("") || shardIterator == resp.NextShardIterator {
|
||||||
log.Fatalf("Error NextShardIterator")
|
c.Logger.Error("NextShardIterator")
|
||||||
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
shardIterator = resp.NextShardIterator
|
shardIterator = resp.NextShardIterator
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ func main() {
|
||||||
svc := firehose.New(session.New())
|
svc := firehose.New(session.New())
|
||||||
|
|
||||||
cfg := connector.Config{
|
cfg := connector.Config{
|
||||||
MaxBatchCount: 400,
|
MaxRecordCount: 400,
|
||||||
}
|
}
|
||||||
|
|
||||||
c := connector.NewConsumer(*app, *stream, cfg)
|
c := connector.NewConsumer(*app, *stream, cfg)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import (
|
||||||
"github.com/apex/log/handlers/text"
|
"github.com/apex/log/handlers/text"
|
||||||
"github.com/aws/aws-sdk-go/aws/session"
|
"github.com/aws/aws-sdk-go/aws/session"
|
||||||
"github.com/aws/aws-sdk-go/service/kinesis"
|
"github.com/aws/aws-sdk-go/service/kinesis"
|
||||||
prdcr "github.com/tj/go-kinesis"
|
producer "github.com/tj/go-kinesis"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Note: download file with test data
|
// Note: download file with test data
|
||||||
|
|
@ -19,15 +19,16 @@ var stream = flag.String("s", "", "Stream name")
|
||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
log.SetHandler(text.New(os.Stderr))
|
log.SetHandler(text.New(os.Stderr))
|
||||||
|
log.SetLevel(log.DebugLevel)
|
||||||
|
|
||||||
// set up producer
|
// set up producer
|
||||||
svc := kinesis.New(session.New())
|
svc := kinesis.New(session.New())
|
||||||
producer := prdcr.New(prdcr.Config{
|
p := producer.New(producer.Config{
|
||||||
StreamName: *stream,
|
StreamName: *stream,
|
||||||
BacklogSize: 500,
|
BacklogSize: 500,
|
||||||
Client: svc,
|
Client: svc,
|
||||||
})
|
})
|
||||||
producer.Start()
|
p.Start()
|
||||||
|
|
||||||
// open data file
|
// open data file
|
||||||
f, err := os.Open("/tmp/users.txt")
|
f, err := os.Open("/tmp/users.txt")
|
||||||
|
|
@ -39,12 +40,12 @@ func main() {
|
||||||
// loop over file data
|
// loop over file data
|
||||||
b := bufio.NewScanner(f)
|
b := bufio.NewScanner(f)
|
||||||
for b.Scan() {
|
for b.Scan() {
|
||||||
err := producer.Put(b.Bytes(), "site")
|
err := p.Put(b.Bytes(), "site")
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithError(err).Fatal("error producing")
|
log.WithError(err).Fatal("error producing")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
producer.Stop()
|
p.Stop()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,31 +6,32 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"github.com/apex/log"
|
||||||
"github.com/apex/log/handlers/text"
|
"github.com/apex/log/handlers/text"
|
||||||
"github.com/harlow/kinesis-connectors"
|
"github.com/harlow/kinesis-connectors"
|
||||||
"github.com/harlow/kinesis-connectors/emitter/s3"
|
"github.com/harlow/kinesis-connectors/emitter/s3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
log.SetHandler(text.New(os.Stderr))
|
||||||
|
log.SetLevel(log.DebugLevel)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
app = flag.String("a", "", "App name")
|
app = flag.String("a", "", "App name")
|
||||||
bucket = flag.String("b", "", "Bucket name")
|
bucket = flag.String("b", "", "Bucket name")
|
||||||
stream = flag.String("s", "", "Stream name")
|
stream = flag.String("s", "", "Stream name")
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
emitter := &s3.Emitter{
|
e := &s3.Emitter{
|
||||||
Bucket: *bucket,
|
Bucket: *bucket,
|
||||||
Region: "us-west-1",
|
Region: "us-west-1",
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg := connector.Config{
|
c := connector.NewConsumer(connector.Config{
|
||||||
MaxBatchCount: 500,
|
AppName: *app,
|
||||||
LogHandler: text.New(os.Stderr),
|
StreamName: *stream,
|
||||||
}
|
})
|
||||||
|
|
||||||
c := connector.NewConsumer(*app, *stream, cfg)
|
|
||||||
|
|
||||||
c.Start(connector.HandlerFunc(func(b connector.Buffer) {
|
c.Start(connector.HandlerFunc(func(b connector.Buffer) {
|
||||||
body := new(bytes.Buffer)
|
body := new(bytes.Buffer)
|
||||||
|
|
@ -39,7 +40,7 @@ func main() {
|
||||||
body.Write(r.Data)
|
body.Write(r.Data)
|
||||||
}
|
}
|
||||||
|
|
||||||
err := emitter.Emit(
|
err := e.Emit(
|
||||||
s3.Key("", b.FirstSeq(), b.LastSeq()),
|
s3.Key("", b.FirstSeq(), b.LastSeq()),
|
||||||
bytes.NewReader(body.Bytes()),
|
bytes.NewReader(body.Bytes()),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue