Base pipeline components

* Create base Interfaces for Pipeline
* Add first base implementations for Pipeline
* Add initial test for core functionality
This commit is contained in:
Harlow Ward 2014-07-24 23:03:41 -07:00
parent 0fe80b708b
commit 06b40e6ed8
19 changed files with 929 additions and 9 deletions

9
.gitignore vendored
View file

@ -19,5 +19,12 @@ _cgo_export.*
_testmain.go
*.exe
*.test
*.exe
# vim temp files
.*.swp
.*.swo
# System files
.DS_Store

212
README.md
View file

@ -1,14 +1,210 @@
# Kinesis Connector Application
# Golang Kinesis Connectors
A Kinesis Connector Application written in Go for extracting streaming event data
into S3, Redshift, DynamoDB, and more.
__Note:__ _This codebase is a under active development._
__Note:__ _This codebase is a under active development, and is not condisdered
production ready._
### Kinesis connector applications written in Go
## Installation
This is a port of the [AWS Kinesis connector libraries][2] from Java to Go for extracting streaming event data
into S3, Redshift, DynamoDB, and more. See the [API Docs][1] for package documentation.
Clone the repository.
## Overview
$ git clone git@github.com:harlow/go-etl.git
Each Amazon Kinesis connector application is a pipeline that determines how records from an Amazon Kinesis stream will be handled. Records are retrieved from the stream, transformed according to a user-defined data model, buffered for batch processing, and then emitted to the appropriate AWS service.
![golang_kinesis_connector](https://cloud.githubusercontent.com/assets/739782/4262283/2ee2550e-3b97-11e4-8cd1-21a5d7ee0964.png)
A connector pipeline uses the following interfaces:
* __Pipeline:__ The pipeline implementation itself.
* __Transformer:__ Defines the transformation of records from the Amazon Kinesis stream in order to suit the user-defined data model. Includes methods for custom serializer/deserializers.
* __Filter:__ Defines a method for excluding irrelevant records from the processing.
* __Buffer:__ Defines a system for batching the set of records to be processed. The application can specify three thresholds: number of records, total byte count, and time. When one of these thresholds is crossed, the buffer is flushed and the data is emitted to the destination.
* __Emitter:__ Defines a method that makes client calls to other AWS services and persists the records stored in the buffer. The records can also be sent to another Amazon Kinesis stream.
## Usage
Install the library:
$ go get github.com/harlow/kinesis-connectors
The library has been broken into several components (buffers, checkpoints, filters, transformers, and emitters). These compontents can be mixed and matched to generate the desired functionality.
### Example Redshift Pipeline
The Redshift Pipeline will pull records from Kinesis and buffer them untill the desired threshold is reached. The Emitter will then upload the buffered records to an S3 bucket, set a checkpoint in Redis, and copy data to to Redshift.
```go
package main
import (
"fmt"
"github.com/harlow/kinesis-connectors"
"github.com/harlow/sample-connectors/transformers"
"github.com/joho/godotenv"
"github.com/sendgridlabs/go-kinesis"
)
type Config struct {
AppName string
NumRecordsToBuffer int
KinesisStream string
KinesisStreamShardCount int
TableName string
S3Bucket string
Format string
Delimiter string
}
func NewPipeline(cfg Config) *connector.Pipeline {
b := connector.RecordBuffer{
NumRecordsToBuffer: cfg.NumRecordsToBuffer,
}
c := connector.RedisCheckpoint{
AppName: cfg.AppName,
StreamName: cfg.KinesisStream,
}
e := connector.RedshiftEmitter{
TableName: cfg.TableName,
S3Bucket: cfg.S3Bucket,
Format: cfg.Format,
Delimiter: cfg.Delimiter,
}
f := connector.AllPassFilter{}
t := transformers.UserTransformer{}
return &connector.Pipeline{
Buffer: &b,
Checkpoint: &c,
Emitter: &e,
Filter: &f,
StreamName: cfg.KinesisStream,
Transformer: &t,
}
}
func main() {
var cfg Config
godotenv.Load()
ksis := kinesis.New("", "", kinesis.Region{})
connector.LoadConfig(&cfg, "redshift_basic_pipeline.properties")
connector.CreateAndWaitForStreamToBecomeAvailable(ksis, cfg.KinesisStream, cfg.KinesisStreamShardCount)
args := kinesis.NewArgs()
args.Add("StreamName", cfg.KinesisStream)
streamInfo, err := ksis.DescribeStream(args)
if err != nil {
fmt.Printf("Unable to connect to %v stream. Aborting.", cfg.KinesisStream)
return
}
for _, shard := range streamInfo.StreamDescription.Shards {
var p = NewPipeline(cfg)
go p.ProcessShard(ksis, shard.ShardId)
}
select {}
}
```
```go
package models
// Implements Model interface
type User struct {
ID int `json:"userid"`
Username string `json:"username"`
Firstname string `json:"firstname"`
Lastname string `json:"lastname"`
City string `json:"city"`
State string `json:"state"`
Email string `json:"email"`
Phone string `json:"phone"`
Likesports bool `json:"likesports"`
Liketheatre bool `json:"liketheatre"`
Likeconcerts bool `json:"likeconcerts"`
Likejazz bool `json:"likejazz"`
Likeclassical bool `json:"likeclassical"`
Likeopera bool `json:"likeopera"`
Likerock bool `json:"likerock"`
Likevegas bool `json:"likevegas"`
Likebroadway bool `json:"likebroadway"`
Likemusicals bool `json:"likemusicals"`
}
func (u User) ToString() string {
s := []string{
strconv.Itoa(u.ID),
u.Username,
u.Firstname,
u.Lastname,
u.City,
u.State,
u.Email,
u.Phone,
strconv.FormatBool(u.Likesports),
strconv.FormatBool(u.Liketheatre),
strconv.FormatBool(u.Likeconcerts),
strconv.FormatBool(u.Likejazz),
strconv.FormatBool(u.Likeclassical),
strconv.FormatBool(u.Likeopera),
strconv.FormatBool(u.Likerock),
strconv.FormatBool(u.Likevegas),
strconv.FormatBool(u.Likebroadway),
strconv.FormatBool(u.Likemusicals),
"\n",
}
return strings.Join(s, "|")
}
```
```go
package transformers
// Implements Transformer interface
type UserTransformer struct {}
func (t *UserTransformer) ToModel(data []byte) connector.Model {
user := &models.User{}
json.Unmarshal(data, &user)
return user
}
```
```sql
CREATE TABLE users (
id INTEGER,
username VARCHAR(255),
first_name VARCHAR(255),
last_name VARCHAR(255),
city VARCHAR(255),
state VARCHAR(255),
email VARCHAR(255),
phone VARCHAR(255),
like_sports BOOLEAN,
like_theatre BOOLEAN,
like_concerts BOOLEAN,
like_jazz BOOLEAN,
like_classical BOOLEAN,
like_opera BOOLEAN,
like_rock BOOLEAN,
like_vegas BOOLEAN,
like_broadway BOOLEAN,
like_musicals BOOLEAN,
PRIMARY KEY(id)
)
DISTSTYLE KEY
DISTKEY(id)
SORTKEY(id)
```
[1]: http://godoc.org/github.com/harlow/kinesis-connectors
[2]: http://aws.amazon.com/kinesis/
[3]: https://github.com/awslabs/amazon-kinesis-connectors

9
all_pass_filter.go Normal file
View file

@ -0,0 +1,9 @@
package connector
// A basic implementation of the Filter interface that returns true for all records.
type AllPassFilter struct{}
// Returns true for all records.
func (b *AllPassFilter) KeepRecord(m Model) bool {
return true
}

16
buffer.go Normal file
View file

@ -0,0 +1,16 @@
package connector
// Buffer defines a buffer used to store records streamed through Kinesis. It is a part of the
// Pipeline utilized by the Pipeline.ProcessShard function. Records are stored in the buffer by calling
// the Add method. The buffer has two size limits defined: total total number of records and a
// time limit in seconds. The ShouldFlush() method may indicate that the buffer is full based on
// these limits.
type Buffer interface {
Add(data Model, sequenceNumber string)
FirstSequenceNumber() string
Flush()
LastSequenceNumber() string
NumRecordsInBuffer() int
Records() []Model
ShouldFlush() bool
}

10
checkpoint.go Normal file
View file

@ -0,0 +1,10 @@
package connector
// Used by Pipeline.ProcessShard when they want to checkpoint their progress.
// The Kinesis Connector Library will pass an object implementing this interface to ProcessShard,
// so they can checkpoint their progress.
type Checkpoint interface {
CheckpointExists(shardID string) bool
SequenceNumber() string
SetCheckpoint(shardID string, sequenceNumber string)
}

11
emitter.go Normal file
View file

@ -0,0 +1,11 @@
package connector
// Emitter takes a full buffer and processes the stored records. The Emitter is a member of the
// Pipeline that "emits" the objects that have been deserialized by the
// Transformer. The Emit() method is invoked when the buffer is full (possibly to persist the
// records or send them to another Kinesis stream). After emitting the records.
// Implementations may choose to fail the entire set of records in the buffer or to fail records
// individually.
type Emitter interface {
Emit(buffer Buffer)
}

7
filter.go Normal file
View file

@ -0,0 +1,7 @@
package connector
// The Filter is associated with an Buffer. The Buffer may use the result of calling the
// KeepRecord() method to decide whether to store a record or discard it.
type Filter interface {
KeepRecord(m Model) bool
}

6
model.go Normal file
View file

@ -0,0 +1,6 @@
package connector
// Used to map the attributres of the data being sent through the Kinesis stream
type Model interface {
ToString() string
}

86
pipeline.go Normal file
View file

@ -0,0 +1,86 @@
package connector
import (
"fmt"
"time"
"github.com/sendgridlabs/go-kinesis"
)
// This struct is used by the main application to configure instances of the user's implemented pipline.
// The user should implement this such that each method returns a configured implementation of each
// interface. It has a data type (Model) as Records come in as a byte[] and are transformed to a Model.
// Then they are buffered in Model form and when the buffer is full, Models's are passed to the emitter.
type Pipeline struct {
Buffer Buffer
Checkpoint Checkpoint
Emitter Emitter
Filter Filter
StreamName string
Transformer Transformer
}
func (p Pipeline) ProcessShard(ksis *kinesis.Kinesis, shardID string) {
args := kinesis.NewArgs()
args.Add("ShardId", shardID)
args.Add("StreamName", p.StreamName)
if p.Checkpoint.CheckpointExists(shardID) {
args.Add("ShardIteratorType", "AFTER_SEQUENCE_NUMBER")
args.Add("StartingSequenceNumber", p.Checkpoint.SequenceNumber())
} else {
args.Add("ShardIteratorType", "TRIM_HORIZON")
}
shardInfo, err := ksis.GetShardIterator(args)
if err != nil {
fmt.Printf("Error fetching shard itterator: %v", err)
return
}
shardIterator := shardInfo.ShardIterator
for {
args = kinesis.NewArgs()
args.Add("ShardIterator", shardIterator)
recordSet, err := ksis.GetRecords(args)
if err != nil {
fmt.Printf("GetRecords ERROR: %v\n", err)
continue
}
if len(recordSet.Records) > 0 {
for _, v := range recordSet.Records {
data, err := v.GetData()
if err != nil {
fmt.Printf("GetData ERROR: %v\n", err)
continue
}
var m = p.Transformer.ToModel(data)
if p.Filter.KeepRecord(m) {
p.Buffer.Add(m, v.SequenceNumber)
}
}
} else if recordSet.NextShardIterator == "" || shardIterator == recordSet.NextShardIterator || err != nil {
fmt.Printf("NextShardIterator ERROR: %v\n", err)
break
} else {
fmt.Printf("Sleeping: %v\n", shardID)
time.Sleep(5 * time.Second)
}
if p.Buffer.ShouldFlush() {
fmt.Printf("Emitting to Shard: %v\n", shardID)
p.Emitter.Emit(p.Buffer)
p.Checkpoint.SetCheckpoint(shardID, p.Buffer.LastSequenceNumber())
p.Buffer.Flush()
}
shardIterator = recordSet.NextShardIterator
}
}

67
record_buffer.go Normal file
View file

@ -0,0 +1,67 @@
package connector
// This struct is a basic implementation of the Buffer interface. It is a wrapper on a buffer of
// records that are periodically flushed. It is configured with an implementation of Filter that
// decides whether a record will be added to the buffer to be emitted.
type RecordBuffer struct {
NumRecordsToBuffer int
firstSequenceNumber string
lastSequenceNumber string
recordsInBuffer []Model
sequencesInBuffer []string
}
// Adds a message to the buffer.
func (b *RecordBuffer) Add(record Model, sequenceNumber string) {
if len(b.sequencesInBuffer) == 0 {
b.firstSequenceNumber = sequenceNumber
}
b.lastSequenceNumber = sequenceNumber
if !b.sequenceExists(sequenceNumber) {
b.recordsInBuffer = append(b.recordsInBuffer, record)
b.sequencesInBuffer = append(b.sequencesInBuffer, sequenceNumber)
}
}
// Returns the records in the buffer.
func (b *RecordBuffer) Records() []Model {
return b.recordsInBuffer
}
// Returns the number of messages in the buffer.
func (b RecordBuffer) NumRecordsInBuffer() int {
return len(b.sequencesInBuffer)
}
// Flushes the content in the buffer and resets the sequence counter.
func (b *RecordBuffer) Flush() {
b.recordsInBuffer = b.recordsInBuffer[:0]
b.sequencesInBuffer = b.sequencesInBuffer[:0]
}
// Checks if the sequence already exists in the buffer.
func (b *RecordBuffer) sequenceExists(sequenceNumber string) bool {
for _, v := range b.sequencesInBuffer {
if v == sequenceNumber {
return true
}
}
return false
}
// Determines if the buffer has reached its target size.
func (b *RecordBuffer) ShouldFlush() bool {
return len(b.sequencesInBuffer) >= b.NumRecordsToBuffer
}
// Returns the sequence number of the first message in the buffer.
func (b *RecordBuffer) FirstSequenceNumber() string {
return b.firstSequenceNumber
}
// Returns the sequence number of the last message in the buffer.
func (b *RecordBuffer) LastSequenceNumber() string {
return b.lastSequenceNumber
}

118
record_buffer_test.go Normal file
View file

@ -0,0 +1,118 @@
package connector
import "testing"
type TestModel struct{}
func (u TestModel) ToString() string {
return "ok"
}
func TestAdd(t *testing.T) {
var r1, s1 = TestModel{}, "Seq1"
var r2, s2 = TestModel{}, "Seq2"
b := RecordBuffer{}
b.Add(r1, s1)
if b.NumRecordsInBuffer() != 1 {
t.Errorf("NumRecordsInBuffer() want %v", 1)
}
b.Add(r2, s2)
if b.NumRecordsInBuffer() != 2 {
t.Errorf("NumRecordsInBuffer() want %v", 2)
}
b.Add(r2, s2)
if b.NumRecordsInBuffer() != 2 {
t.Errorf("NumRecordsInBuffer() want %v", 2)
}
}
func TestSequenceExists(t *testing.T) {
var r1, s1 = TestModel{}, "Seq1"
var r2, s2 = TestModel{}, "Seq2"
b := RecordBuffer{}
b.Add(r1, s1)
if b.sequenceExists(s1) != true {
t.Errorf("sequenceExists() want %v", true)
}
b.Add(r2, s2)
if b.sequenceExists(s2) != true {
t.Errorf("sequenceExists() want %v", true)
}
}
func TestFlush(t *testing.T) {
var r1, s1 = TestModel{}, "SeqNum"
b := RecordBuffer{}
b.Add(r1, s1)
b.Flush()
if b.NumRecordsInBuffer() != 0 {
t.Errorf("Count() want %v", 0)
}
}
func TestLastSequenceNumber(t *testing.T) {
var r1, s1 = TestModel{}, "Seq1"
var r2, s2 = TestModel{}, "Seq2"
b := RecordBuffer{}
b.Add(r1, s1)
if b.LastSequenceNumber() != s1 {
t.Errorf("LastSequenceNumber() want %v", s1)
}
b.Add(r2, s2)
if b.LastSequenceNumber() != s2 {
t.Errorf("LastSequenceNumber() want %v", s2)
}
}
func TestFirstSequenceNumber(t *testing.T) {
var r1, s1 = TestModel{}, "Seq1"
var r2, s2 = TestModel{}, "Seq2"
b := RecordBuffer{}
b.Add(r1, s1)
if b.FirstSequenceNumber() != s1 {
t.Errorf("FirstSequenceNumber() want %v", s1)
}
b.Add(r2, s2)
if b.FirstSequenceNumber() != s1 {
t.Errorf("FirstSequenceNumber() want %v", s1)
}
}
func TestShouldFlush(t *testing.T) {
const n = 2
var r1, s1 = TestModel{}, "Seq1"
var r2, s2 = TestModel{}, "Seq2"
b := RecordBuffer{NumRecordsToBuffer: n}
b.Add(r1, s1)
if b.ShouldFlush() != false {
t.Errorf("ShouldFlush() want %v", false)
}
b.Add(r2, s2)
if b.ShouldFlush() != true {
t.Errorf("ShouldFlush() want %v", true)
}
}

40
redis_checkpoint.go Normal file
View file

@ -0,0 +1,40 @@
package connector
import (
"fmt"
"github.com/hoisie/redis"
)
// A Redis implementation of the Checkpont interface. This class is used to enable the Pipeline.ProcessShard
// to checkpoint their progress.
type RedisCheckpoint struct {
AppName string
client redis.Client
sequenceNumber string
StreamName string
}
func (c *RedisCheckpoint) CheckpointExists(shardID string) bool {
val, _ := c.client.Get(c.key(shardID))
if val != nil && string(val) != "" {
c.sequenceNumber = string(val)
return true
} else {
return false
}
}
func (c *RedisCheckpoint) SequenceNumber() string {
return c.sequenceNumber
}
func (c *RedisCheckpoint) SetCheckpoint(shardID string, sequenceNumber string) {
c.client.Set(c.key(shardID), []byte(sequenceNumber))
c.sequenceNumber = sequenceNumber
}
func (c *RedisCheckpoint) key(shardID string) string {
return fmt.Sprintf("%v:checkpoint:%v:%v", c.AppName, c.StreamName, shardID)
}

48
redis_checkpoint_test.go Normal file
View file

@ -0,0 +1,48 @@
package connector
import (
"testing"
"github.com/hoisie/redis"
)
func TestKey(t *testing.T) {
k := "app:checkpoint:stream:shard"
c := RedisCheckpoint{AppName: "app", StreamName: "stream"}
r := c.key("shard")
if r != k {
t.Errorf("key() = %v, want %v", k, r)
}
}
func TestCheckpointExists(t *testing.T) {
var rc redis.Client
k := "app:checkpoint:stream:shard"
rc.Set(k, []byte("fakeSeqNum"))
c := RedisCheckpoint{AppName: "app", StreamName: "stream"}
r := c.CheckpointExists("shard")
if r != true {
t.Errorf("CheckpointExists() = %v, want %v", false, r)
}
rc.Del(k)
}
func TestSetCheckpoint(t *testing.T) {
k := "app:checkpoint:stream:shard"
var rc redis.Client
c := RedisCheckpoint{AppName: "app", StreamName: "stream"}
c.SetCheckpoint("shard", "fakeSeqNum")
r, _ := rc.Get(k)
if string(r) != "fakeSeqNum" {
t.Errorf("SetCheckpoint() = %v, want %v", "fakeSeqNum", r)
}
rc.Del(k)
}

62
redshift_emitter.go Normal file
View file

@ -0,0 +1,62 @@
package connector
import (
"bytes"
"database/sql"
"fmt"
"log"
"os"
_ "github.com/lib/pq"
)
// This struct is an implementation of Emitter that buffered batches of records into Redshift one by one.
// It first emits records into S3 and then perfors the Redshift JSON COPY command. S3 storage of buffered
// data achieved using the S3Emitter. A link to jsonpaths must be provided when configuring the struct.
type RedshiftEmitter struct {
Delimiter string
Format string
Jsonpath string
S3Bucket string
TableName string
}
// Invoked when the buffer is full. This method leverages the S3Emitter and then issues a copy command to
// Redshift data store.
func (e RedshiftEmitter) Emit(b Buffer) {
s3Emitter := S3Emitter{S3Bucket: e.S3Bucket}
s3Emitter.Emit(b)
s3File := s3Emitter.S3FileName(b.FirstSequenceNumber(), b.LastSequenceNumber())
db, err := sql.Open("postgres", os.Getenv("REDSHIFT_URL"))
if err != nil {
log.Fatal(err)
}
_, err = db.Exec(e.copyStatement(s3File))
if err != nil {
log.Fatal(err)
}
fmt.Printf("Redshift load completed.\n")
db.Close()
}
// Creates the SQL copy statement issued to Redshift cluster.
func (e RedshiftEmitter) copyStatement(s3File string) string {
var b bytes.Buffer
b.WriteString(fmt.Sprintf("COPY %v ", e.TableName))
b.WriteString(fmt.Sprintf("FROM 's3://%v%v' ", e.S3Bucket, s3File))
b.WriteString(fmt.Sprintf("CREDENTIALS 'aws_access_key_id=%v;", os.Getenv("AWS_ACCESS_KEY")))
b.WriteString(fmt.Sprintf("aws_secret_access_key=%v' ", os.Getenv("AWS_SECRET_KEY")))
if e.Format == "json" {
b.WriteString(fmt.Sprintf("json 'auto'"))
} else {
b.WriteString(fmt.Sprintf("DELIMITER '%v'", e.Delimiter))
}
b.WriteString(";")
return b.String()
}

20
redshift_emitter_test.go Normal file
View file

@ -0,0 +1,20 @@
package connector
import (
"testing"
)
func TestCopyStatement(t *testing.T) {
e := RedshiftEmitter{
Delimiter: ",",
S3Bucket: "test_bucket",
TableName: "test_table",
}
f := e.copyStatement("/test.txt")
copyStatement := "COPY test_table FROM 's3://test_bucket/test.txt' CREDENTIALS 'aws_access_key_id=;aws_secret_access_key=' DELIMITER ',';"
if f != copyStatement {
t.Errorf("copyStatement() = %s want %s", f, copyStatement)
}
}

48
s3_emitter.go Normal file
View file

@ -0,0 +1,48 @@
package connector
import (
"bytes"
"fmt"
"time"
"github.com/crowdmob/goamz/aws"
"github.com/crowdmob/goamz/s3"
)
// This implementation of Emitter is used to store files from a Kinesis stream in S3. The use of
// this struct requires the configuration of an S3 bucket/endpoint. When the buffer is full, this
// struct's Emit method adds the contents of the buffer to S3 as one file. The filename is generated
// from the first and last sequence numbers of the records contained in that file separated by a
// dash. This struct requires the configuration of an S3 bucket and endpoint.
type S3Emitter struct {
S3Bucket string
}
// Generates a file name based on the First and Last sequence numbers from the buffer. The current
// UTC date (YYYY-MM-DD) is base of the path to logically group days of batches.
func (e S3Emitter) S3FileName(firstSeq string, lastSeq string) string {
date := time.Now().UTC().Format("2006-01-02")
return fmt.Sprintf("/%v/%v-%v.txt", date, firstSeq, lastSeq)
}
// Invoked when the buffer is full. This method emits the set of filtered records.
func (e S3Emitter) Emit(buf Buffer) {
auth, _ := aws.EnvAuth()
s3Con := s3.New(auth, aws.USEast)
bucket := s3Con.Bucket(e.S3Bucket)
s3File := e.S3FileName(buf.FirstSequenceNumber(), buf.LastSequenceNumber())
var buffer bytes.Buffer
for _, r := range buf.Records() {
buffer.WriteString(r.ToString())
}
err := bucket.Put(s3File, buffer.Bytes(), "text/plain", s3.Private, s3.Options{})
if err != nil {
fmt.Printf("Error occured while uploding to S3: %v\n", err)
} else {
fmt.Printf("Emitted %v records to S3 in s3://%v%v\n", buf.NumRecordsInBuffer(), e.S3Bucket, s3File)
}
}

18
s3_emitter_test.go Normal file
View file

@ -0,0 +1,18 @@
package connector
import (
"fmt"
"testing"
"time"
)
func TestS3FileName(t *testing.T) {
d := time.Now().UTC().Format("2006-01-02")
n := fmt.Sprintf("/%v/a-b.txt", d)
e := S3Emitter{}
f := e.S3FileName("a", "b")
if f != n {
t.Errorf("S3FileName() = want %v", f, n)
}
}

7
transformer.go Normal file
View file

@ -0,0 +1,7 @@
package connector
// Transformer is used to transform data from a Record (byte array) to the data model for
// processing in the application.
type Transformer interface {
ToModel(data []byte) Model
}

144
utils.go Normal file
View file

@ -0,0 +1,144 @@
package connector
import (
"bufio"
"fmt"
"log"
"os"
"reflect"
"regexp"
"strconv"
"strings"
"time"
"unicode"
"github.com/sendgridlabs/go-kinesis"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
var (
assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
)
func upcaseInitial(str string) string {
for i, v := range str {
return string(unicode.ToUpper(v)) + str[i+1:]
}
return ""
}
// Opens the file path and loads config values into the sturct.
func LoadConfig(config interface{}, filename string) error {
lines, err := readLines(filename)
if err != nil {
log.Fatalf("Config Load ERROR: %s\n", err)
}
mutable := reflect.ValueOf(config).Elem()
for _, line := range lines {
line = strings.TrimSpace(line)
if len(line) == 0 || line[0] == ';' || line[0] == '#' {
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val := groups[1], groups[2]
key, val = strings.TrimSpace(key), strings.TrimSpace(val)
key = upcaseInitial(key)
field := mutable.FieldByName(key)
if !field.IsValid() {
log.Fatalf("Config ERROR: Field %s not found\n", key)
}
switch field.Type().Name() {
case "int":
val, _ := strconv.ParseInt(val, 0, 64)
mutable.FieldByName(key).SetInt(val)
case "bool":
val, _ := strconv.ParseBool(val)
mutable.FieldByName(key).SetBool(val)
default:
mutable.FieldByName(key).SetString(val)
}
}
}
return nil
}
// Creates a new Kinesis stream (uses existing stream if exists) and waits for it to become available.
func CreateAndWaitForStreamToBecomeAvailable(k *kinesis.Kinesis, streamName string, shardCount int) {
if !StreamExists(k, streamName) {
err := k.CreateStream(streamName, shardCount)
if err != nil {
fmt.Printf("CreateStream ERROR: %v\n", err)
return
}
}
resp := &kinesis.DescribeStreamResp{}
timeout := make(chan bool, 30)
for {
args := kinesis.NewArgs()
args.Add("StreamName", streamName)
resp, _ = k.DescribeStream(args)
streamStatus := resp.StreamDescription.StreamStatus
fmt.Printf("Stream [%v] is %v\n", streamName, streamStatus)
if streamStatus != "ACTIVE" {
time.Sleep(4 * time.Second)
timeout <- true
} else {
break
}
}
}
// Check if a Kinesis stream exists.
func StreamExists(k *kinesis.Kinesis, streamName string) bool {
args := kinesis.NewArgs()
resp, _ := k.ListStreams(args)
for _, s := range resp.StreamNames {
if s == streamName {
return true
}
}
return false
}
// Delete a Kinesis stream.
func DeleteStream(k *kinesis.Kinesis, streamName string) {
err := k.DeleteStream("test")
if err != nil {
fmt.Printf("DeleteStream ERROR: %v\n", err)
return
}
fmt.Printf("Stream [%v] is DELETING\n", streamName)
}