Update example usage in README
* Showcase the new StringToStringTransformer as a simple way of getting data directly from Kinesis to Redshift.
This commit is contained in:
parent
4d00c430cf
commit
a9a625aa79
1 changed files with 71 additions and 145 deletions
216
README.md
216
README.md
|
|
@ -33,178 +33,104 @@ The library has been broken into several components (buffers, checkpoints, filte
|
||||||
|
|
||||||
The Redshift Pipeline will pull records from Kinesis and buffer them untill the desired threshold is reached. The Emitter will then upload the buffered records to an S3 bucket, set a checkpoint in Redis, and copy data to to Redshift.
|
The Redshift Pipeline will pull records from Kinesis and buffer them untill the desired threshold is reached. The Emitter will then upload the buffered records to an S3 bucket, set a checkpoint in Redis, and copy data to to Redshift.
|
||||||
|
|
||||||
|
Pipeline properties:
|
||||||
|
|
||||||
|
```
|
||||||
|
# Connector Settings
|
||||||
|
appName = kinesisToRedshiftBasic
|
||||||
|
numRecordsToBuffer = 25
|
||||||
|
|
||||||
|
# S3 Settings
|
||||||
|
s3Bucket = bucketName
|
||||||
|
|
||||||
|
# Kinesis Settings
|
||||||
|
kinesisStream = streamName
|
||||||
|
kinesisStreamShardCount = 2
|
||||||
|
|
||||||
|
# Redshift Settings
|
||||||
|
tableName = redshift_table_name
|
||||||
|
format = json
|
||||||
|
```
|
||||||
|
|
||||||
|
_Note:_ This example pipeline batch copies the data from Kinesis directly to the S3 bucket and uses the JSON COPY statement to load into Redshift.
|
||||||
|
|
||||||
```go
|
```go
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/harlow/kinesis-connectors"
|
"github.com/harlow/kinesis-connectors"
|
||||||
"github.com/harlow/sample-connectors/transformers"
|
"github.com/joho/godotenv"
|
||||||
"github.com/joho/godotenv"
|
"github.com/sendgridlabs/go-kinesis"
|
||||||
"github.com/sendgridlabs/go-kinesis"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
AppName string
|
AppName string
|
||||||
NumRecordsToBuffer int
|
Format string
|
||||||
KinesisStream string
|
KinesisStream string
|
||||||
KinesisStreamShardCount int
|
KinesisStreamShardCount int
|
||||||
TableName string
|
NumRecordsToBuffer int
|
||||||
S3Bucket string
|
S3Bucket string
|
||||||
Format string
|
TableName string
|
||||||
Delimiter string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewPipeline(cfg Config) *connector.Pipeline {
|
func NewPipeline(cfg Config) *connector.Pipeline {
|
||||||
b := connector.RecordBuffer{
|
f := connector.AllPassFilter{}
|
||||||
NumRecordsToBuffer: cfg.NumRecordsToBuffer,
|
|
||||||
}
|
|
||||||
|
|
||||||
c := connector.RedisCheckpoint{
|
b := connector.RecordBuffer{
|
||||||
AppName: cfg.AppName,
|
NumRecordsToBuffer: cfg.NumRecordsToBuffer,
|
||||||
StreamName: cfg.KinesisStream,
|
}
|
||||||
}
|
|
||||||
|
|
||||||
e := connector.RedshiftEmitter{
|
t := connector.StringToStringTransformer{}
|
||||||
TableName: cfg.TableName,
|
|
||||||
S3Bucket: cfg.S3Bucket,
|
|
||||||
Format: cfg.Format,
|
|
||||||
Delimiter: cfg.Delimiter,
|
|
||||||
}
|
|
||||||
|
|
||||||
f := connector.AllPassFilter{}
|
c := connector.RedisCheckpoint{
|
||||||
|
AppName: cfg.AppName,
|
||||||
|
StreamName: cfg.KinesisStream,
|
||||||
|
}
|
||||||
|
|
||||||
t := transformers.UserTransformer{}
|
e := connector.RedshiftEmitter{
|
||||||
|
TableName: cfg.TableName,
|
||||||
|
S3Bucket: cfg.S3Bucket,
|
||||||
|
Format: cfg.Format,
|
||||||
|
}
|
||||||
|
|
||||||
return &connector.Pipeline{
|
return &connector.Pipeline{
|
||||||
Buffer: &b,
|
Buffer: &b,
|
||||||
Checkpoint: &c,
|
Checkpoint: &c,
|
||||||
Emitter: &e,
|
Emitter: &e,
|
||||||
Filter: &f,
|
Filter: &f,
|
||||||
StreamName: cfg.KinesisStream,
|
StreamName: cfg.KinesisStream,
|
||||||
Transformer: &t,
|
Transformer: &t,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var cfg Config
|
var cfg Config
|
||||||
godotenv.Load()
|
godotenv.Load()
|
||||||
ksis := kinesis.New("", "", kinesis.Region{})
|
ksis := kinesis.New("", "", kinesis.Region{})
|
||||||
|
|
||||||
connector.LoadConfig(&cfg, "redshift_basic_pipeline.properties")
|
connector.LoadConfig(&cfg, "redshift_basic_pipeline.properties")
|
||||||
connector.CreateAndWaitForStreamToBecomeAvailable(ksis, cfg.KinesisStream, cfg.KinesisStreamShardCount)
|
connector.CreateAndWaitForStreamToBecomeAvailable(ksis, cfg.KinesisStream, cfg.KinesisStreamShardCount)
|
||||||
|
|
||||||
args := kinesis.NewArgs()
|
args := kinesis.NewArgs()
|
||||||
args.Add("StreamName", cfg.KinesisStream)
|
args.Add("StreamName", cfg.KinesisStream)
|
||||||
streamInfo, err := ksis.DescribeStream(args)
|
streamInfo, err := ksis.DescribeStream(args)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Unable to connect to %v stream. Aborting.", cfg.KinesisStream)
|
fmt.Printf("Unable to connect to %v stream. Aborting.", cfg.KinesisStream)
|
||||||
return
|
return
|
||||||
}
|
|
||||||
|
|
||||||
for _, shard := range streamInfo.StreamDescription.Shards {
|
|
||||||
var p = NewPipeline(cfg)
|
|
||||||
go p.ProcessShard(ksis, shard.ShardId)
|
|
||||||
}
|
|
||||||
|
|
||||||
select {}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
```go
|
|
||||||
package models
|
|
||||||
|
|
||||||
// Implements Model interface
|
|
||||||
type User struct {
|
|
||||||
ID int `json:"userid"`
|
|
||||||
Username string `json:"username"`
|
|
||||||
Firstname string `json:"firstname"`
|
|
||||||
Lastname string `json:"lastname"`
|
|
||||||
City string `json:"city"`
|
|
||||||
State string `json:"state"`
|
|
||||||
Email string `json:"email"`
|
|
||||||
Phone string `json:"phone"`
|
|
||||||
Likesports bool `json:"likesports"`
|
|
||||||
Liketheatre bool `json:"liketheatre"`
|
|
||||||
Likeconcerts bool `json:"likeconcerts"`
|
|
||||||
Likejazz bool `json:"likejazz"`
|
|
||||||
Likeclassical bool `json:"likeclassical"`
|
|
||||||
Likeopera bool `json:"likeopera"`
|
|
||||||
Likerock bool `json:"likerock"`
|
|
||||||
Likevegas bool `json:"likevegas"`
|
|
||||||
Likebroadway bool `json:"likebroadway"`
|
|
||||||
Likemusicals bool `json:"likemusicals"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (u User) ToString() string {
|
|
||||||
s := []string{
|
|
||||||
strconv.Itoa(u.ID),
|
|
||||||
u.Username,
|
|
||||||
u.Firstname,
|
|
||||||
u.Lastname,
|
|
||||||
u.City,
|
|
||||||
u.State,
|
|
||||||
u.Email,
|
|
||||||
u.Phone,
|
|
||||||
strconv.FormatBool(u.Likesports),
|
|
||||||
strconv.FormatBool(u.Liketheatre),
|
|
||||||
strconv.FormatBool(u.Likeconcerts),
|
|
||||||
strconv.FormatBool(u.Likejazz),
|
|
||||||
strconv.FormatBool(u.Likeclassical),
|
|
||||||
strconv.FormatBool(u.Likeopera),
|
|
||||||
strconv.FormatBool(u.Likerock),
|
|
||||||
strconv.FormatBool(u.Likevegas),
|
|
||||||
strconv.FormatBool(u.Likebroadway),
|
|
||||||
strconv.FormatBool(u.Likemusicals),
|
|
||||||
"\n",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return strings.Join(s, "|")
|
for _, shard := range streamInfo.StreamDescription.Shards {
|
||||||
|
var p = NewPipeline(cfg)
|
||||||
|
go p.ProcessShard(ksis, shard.ShardId)
|
||||||
|
}
|
||||||
|
|
||||||
|
select {}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
```go
|
|
||||||
package transformers
|
|
||||||
|
|
||||||
// Implements Transformer interface
|
|
||||||
type UserTransformer struct {}
|
|
||||||
|
|
||||||
func (t *UserTransformer) ToModel(data []byte) connector.Model {
|
|
||||||
user := &models.User{}
|
|
||||||
json.Unmarshal(data, &user)
|
|
||||||
return user
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
```sql
|
|
||||||
CREATE TABLE users (
|
|
||||||
id INTEGER,
|
|
||||||
username VARCHAR(255),
|
|
||||||
first_name VARCHAR(255),
|
|
||||||
last_name VARCHAR(255),
|
|
||||||
city VARCHAR(255),
|
|
||||||
state VARCHAR(255),
|
|
||||||
email VARCHAR(255),
|
|
||||||
phone VARCHAR(255),
|
|
||||||
like_sports BOOLEAN,
|
|
||||||
like_theatre BOOLEAN,
|
|
||||||
like_concerts BOOLEAN,
|
|
||||||
like_jazz BOOLEAN,
|
|
||||||
like_classical BOOLEAN,
|
|
||||||
like_opera BOOLEAN,
|
|
||||||
like_rock BOOLEAN,
|
|
||||||
like_vegas BOOLEAN,
|
|
||||||
like_broadway BOOLEAN,
|
|
||||||
like_musicals BOOLEAN,
|
|
||||||
PRIMARY KEY(id)
|
|
||||||
)
|
|
||||||
DISTSTYLE KEY
|
|
||||||
DISTKEY(id)
|
|
||||||
SORTKEY(id)
|
|
||||||
```
|
|
||||||
|
|
||||||
[1]: http://godoc.org/github.com/harlow/kinesis-connectors
|
[1]: http://godoc.org/github.com/harlow/kinesis-connectors
|
||||||
[2]: http://aws.amazon.com/kinesis/
|
[2]: http://aws.amazon.com/kinesis/
|
||||||
[3]: https://github.com/awslabs/amazon-kinesis-connectors
|
[3]: https://github.com/awslabs/amazon-kinesis-connectors
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue