Add postgres checkpoint implementation (#55)
This commit is contained in:
parent
739e9e39a5
commit
b7be26418a
8 changed files with 293 additions and 1 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -42,3 +42,7 @@ vendor/**
|
||||||
# Benchmark files
|
# Benchmark files
|
||||||
prof.cpu
|
prof.cpu
|
||||||
prof.mem
|
prof.mem
|
||||||
|
|
||||||
|
# VSCode files
|
||||||
|
/.vscode
|
||||||
|
/**/debug
|
||||||
|
|
|
||||||
11
Gopkg.lock
generated
11
Gopkg.lock
generated
|
|
@ -60,6 +60,15 @@
|
||||||
packages = ["."]
|
packages = ["."]
|
||||||
revision = "0b12d6b5"
|
revision = "0b12d6b5"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
branch = "master"
|
||||||
|
name = "github.com/lib/pq"
|
||||||
|
packages = [
|
||||||
|
".",
|
||||||
|
"oid"
|
||||||
|
]
|
||||||
|
revision = "90697d60dd844d5ef6ff15135d0203f65d2f53b8"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
name = "github.com/pkg/errors"
|
name = "github.com/pkg/errors"
|
||||||
packages = ["."]
|
packages = ["."]
|
||||||
|
|
@ -82,6 +91,6 @@
|
||||||
[solve-meta]
|
[solve-meta]
|
||||||
analyzer-name = "dep"
|
analyzer-name = "dep"
|
||||||
analyzer-version = 1
|
analyzer-version = 1
|
||||||
inputs-digest = "1b40486b645b81bc2215d0153631e9e002e534ba86713ba55500ce62c07cbad8"
|
inputs-digest = "82ad275b394f4727b9d9ad816f534e512b1b007806a4b785d8a742e5836eee48"
|
||||||
solver-name = "gps-cdcl"
|
solver-name = "gps-cdcl"
|
||||||
solver-version = 1
|
solver-version = 1
|
||||||
|
|
|
||||||
28
README.md
28
README.md
|
|
@ -123,6 +123,34 @@ Sort key: shard_id
|
||||||
|
|
||||||
<img width="727" alt="screen shot 2017-11-22 at 7 59 36 pm" src="https://user-images.githubusercontent.com/739782/33158557-b90e4228-cfbf-11e7-9a99-73b56a446f5f.png">
|
<img width="727" alt="screen shot 2017-11-22 at 7 59 36 pm" src="https://user-images.githubusercontent.com/739782/33158557-b90e4228-cfbf-11e7-9a99-73b56a446f5f.png">
|
||||||
|
|
||||||
|
### Postgres Checkpoint
|
||||||
|
|
||||||
|
The Postgres checkpoint requires Table Name, App Name, Stream Name and ConnectionString:
|
||||||
|
|
||||||
|
```go
|
||||||
|
import checkpoint "github.com/harlow/kinesis-consumer/checkpoint/postgres"
|
||||||
|
|
||||||
|
// postgres checkpoint
|
||||||
|
ck, err := checkpoint.New(app, table, connStr)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("new checkpoint error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
To leverage the Postgres checkpoint we'll also need to create a table:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE kinesis_consumer (
|
||||||
|
namespace text NOT NULL,
|
||||||
|
shard_id text NOT NULL,
|
||||||
|
sequence_number numeric NOT NULL,
|
||||||
|
CONSTRAINT kinesis_consumer_pk PRIMARY KEY (namespace, shard_id)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
The table name has to be the same that you specify when creating the checkpoint. The primary key composed by namespace and shard_id is mandatory in order to the checkpoint run without issues and also to ensure data integrity.
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
|
|
||||||
The consumer allows the following optional overrides.
|
The consumer allows the following optional overrides.
|
||||||
|
|
|
||||||
154
checkpoint/postgres/postgres.go
Normal file
154
checkpoint/postgres/postgres.go
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
package postgres
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
// this is the postgres package so it makes sense to be here
|
||||||
|
_ "github.com/lib/pq"
|
||||||
|
)
|
||||||
|
|
||||||
|
var getCheckpointQuery = `SELECT sequence_number
|
||||||
|
FROM %s
|
||||||
|
WHERE namespace=$1 AND shard_id=$2`
|
||||||
|
|
||||||
|
var upsertCheckpoint = `INSERT INTO %s (namespace, shard_id, sequence_number)
|
||||||
|
VALUES($1, $2, $3)
|
||||||
|
ON CONFLICT (namespace, shard_id)
|
||||||
|
DO
|
||||||
|
UPDATE
|
||||||
|
SET sequence_number= $3`
|
||||||
|
|
||||||
|
type key struct {
|
||||||
|
streamName string
|
||||||
|
shardID string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Option is used to override defaults when creating a new Checkpoint
|
||||||
|
type Option func(*Checkpoint)
|
||||||
|
|
||||||
|
// Checkpoint stores and retreives the last evaluated key from a DDB scan
|
||||||
|
type Checkpoint struct {
|
||||||
|
appName string
|
||||||
|
conn *sql.DB
|
||||||
|
mu *sync.Mutex // protects the checkpoints
|
||||||
|
done chan struct{}
|
||||||
|
checkpoints map[key]string
|
||||||
|
maxInterval time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// New returns a checkpoint that uses PostgresDB for underlying storage
|
||||||
|
// Using connectionStr turn it more flexible to use specific db configs
|
||||||
|
func New(appName, tableName, connectionStr string, opts ...Option) (*Checkpoint, error) {
|
||||||
|
|
||||||
|
if tableName == "" {
|
||||||
|
return nil, errors.New("Table name not defined")
|
||||||
|
}
|
||||||
|
|
||||||
|
conn, err := sql.Open("postgres", connectionStr)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
getCheckpointQuery = fmt.Sprintf(getCheckpointQuery, tableName)
|
||||||
|
upsertCheckpoint = fmt.Sprintf(upsertCheckpoint, tableName)
|
||||||
|
|
||||||
|
ck := &Checkpoint{
|
||||||
|
conn: conn,
|
||||||
|
appName: appName,
|
||||||
|
done: make(chan struct{}),
|
||||||
|
maxInterval: time.Duration(1 * time.Minute),
|
||||||
|
mu: new(sync.Mutex),
|
||||||
|
checkpoints: map[key]string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, opt := range opts {
|
||||||
|
opt(ck)
|
||||||
|
}
|
||||||
|
|
||||||
|
go ck.loop()
|
||||||
|
|
||||||
|
return ck, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get determines if a checkpoint for a particular Shard exists.
|
||||||
|
// Typically used to determine whether we should start processing the shard with
|
||||||
|
// TRIM_HORIZON or AFTER_SEQUENCE_NUMBER (if checkpoint exists).
|
||||||
|
func (c *Checkpoint) Get(streamName, shardID string) (string, error) {
|
||||||
|
namespace := fmt.Sprintf("%s-%s", c.appName, streamName)
|
||||||
|
|
||||||
|
var sequenceNumber string
|
||||||
|
|
||||||
|
err := c.conn.QueryRow(getCheckpointQuery, namespace, shardID).Scan(&sequenceNumber)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
if err == sql.ErrNoRows {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return sequenceNumber, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set stores a checkpoint for a shard (e.g. sequence number of last record processed by application).
|
||||||
|
// Upon failover, record processing is resumed from this point.
|
||||||
|
func (c *Checkpoint) Set(streamName, shardID, sequenceNumber string) error {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
if sequenceNumber == "" {
|
||||||
|
return fmt.Errorf("sequence number should not be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
key := key{
|
||||||
|
streamName: streamName,
|
||||||
|
shardID: shardID,
|
||||||
|
}
|
||||||
|
|
||||||
|
c.checkpoints[key] = sequenceNumber
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown the checkpoint. Save any in-flight data.
|
||||||
|
func (c *Checkpoint) Shutdown() error {
|
||||||
|
defer c.conn.Close()
|
||||||
|
|
||||||
|
c.done <- struct{}{}
|
||||||
|
|
||||||
|
return c.save()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Checkpoint) loop() {
|
||||||
|
tick := time.NewTicker(c.maxInterval)
|
||||||
|
defer tick.Stop()
|
||||||
|
defer close(c.done)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-tick.C:
|
||||||
|
c.save()
|
||||||
|
case <-c.done:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Checkpoint) save() error {
|
||||||
|
c.mu.Lock()
|
||||||
|
defer c.mu.Unlock()
|
||||||
|
|
||||||
|
for key, sequenceNumber := range c.checkpoints {
|
||||||
|
|
||||||
|
if _, err := c.conn.Exec(upsertCheckpoint, fmt.Sprintf("%s-%s", c.appName, key.streamName), key.shardID, sequenceNumber); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
17
examples/consumer/cp-postgres/README.md
Normal file
17
examples/consumer/cp-postgres/README.md
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
# Consumer with postgres checkpoint
|
||||||
|
|
||||||
|
Read records from the Kinesis stream using postgres as checkpoint
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Export the required environment vars for connecting to the Kinesis stream:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
export AWS_ACCESS_KEY=
|
||||||
|
export AWS_REGION=
|
||||||
|
export AWS_SECRET_KEY=
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run the consumer
|
||||||
|
|
||||||
|
go run main.go --app appName --stream streamName --table tableName --connection connectionString
|
||||||
80
examples/consumer/cp-postgres/main.go
Normal file
80
examples/consumer/cp-postgres/main.go
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"expvar"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
|
||||||
|
consumer "github.com/harlow/kinesis-consumer"
|
||||||
|
checkpoint "github.com/harlow/kinesis-consumer/checkpoint/postgres"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var (
|
||||||
|
app = flag.String("app", "", "App name")
|
||||||
|
stream = flag.String("stream", "", "Stream name")
|
||||||
|
table = flag.String("table", "", "Table name")
|
||||||
|
connStr = flag.String("connection", "", "Connection Str")
|
||||||
|
)
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// postgres checkpoint
|
||||||
|
ck, err := checkpoint.New(*app, *table, *connStr)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("checkpoint error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
counter = expvar.NewMap("counters")
|
||||||
|
)
|
||||||
|
|
||||||
|
newKclient := consumer.NewKinesisClient()
|
||||||
|
|
||||||
|
// consumer
|
||||||
|
c, err := consumer.New(
|
||||||
|
*stream,
|
||||||
|
consumer.WithCheckpoint(ck),
|
||||||
|
consumer.WithCounter(counter),
|
||||||
|
consumer.WithClient(newKclient),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("consumer error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// use cancel \func to signal shutdown
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
// trap SIGINT, wait to trigger shutdown
|
||||||
|
signals := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(signals, os.Interrupt)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
<-signals
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// scan stream
|
||||||
|
err = c.Scan(ctx, func(r *consumer.Record) consumer.ScanError {
|
||||||
|
fmt.Println(string(r.Data))
|
||||||
|
err := errors.New("some error happened")
|
||||||
|
// continue scanning
|
||||||
|
return consumer.ScanError{
|
||||||
|
Error: err,
|
||||||
|
StopScan: false,
|
||||||
|
SkipCheckpoint: false,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("scan error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ck.Shutdown(); err != nil {
|
||||||
|
log.Fatalf("checkpoint shutdown error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue