kinesis-consumer/redshift_emitter.go
Harlow Ward 06b40e6ed8 Base pipeline components
* Create base Interfaces for Pipeline
* Add first base implementations for Pipeline
* Add initial test for core functionality
2014-11-14 20:45:34 -08:00

62 lines
1.7 KiB
Go

package connector
import (
"bytes"
"database/sql"
"fmt"
"log"
"os"
_ "github.com/lib/pq"
)
// This struct is an implementation of Emitter that buffered batches of records into Redshift one by one.
// It first emits records into S3 and then perfors the Redshift JSON COPY command. S3 storage of buffered
// data achieved using the S3Emitter. A link to jsonpaths must be provided when configuring the struct.
type RedshiftEmitter struct {
Delimiter string
Format string
Jsonpath string
S3Bucket string
TableName string
}
// Invoked when the buffer is full. This method leverages the S3Emitter and then issues a copy command to
// Redshift data store.
func (e RedshiftEmitter) Emit(b Buffer) {
s3Emitter := S3Emitter{S3Bucket: e.S3Bucket}
s3Emitter.Emit(b)
s3File := s3Emitter.S3FileName(b.FirstSequenceNumber(), b.LastSequenceNumber())
db, err := sql.Open("postgres", os.Getenv("REDSHIFT_URL"))
if err != nil {
log.Fatal(err)
}
_, err = db.Exec(e.copyStatement(s3File))
if err != nil {
log.Fatal(err)
}
fmt.Printf("Redshift load completed.\n")
db.Close()
}
// Creates the SQL copy statement issued to Redshift cluster.
func (e RedshiftEmitter) copyStatement(s3File string) string {
var b bytes.Buffer
b.WriteString(fmt.Sprintf("COPY %v ", e.TableName))
b.WriteString(fmt.Sprintf("FROM 's3://%v%v' ", e.S3Bucket, s3File))
b.WriteString(fmt.Sprintf("CREDENTIALS 'aws_access_key_id=%v;", os.Getenv("AWS_ACCESS_KEY")))
b.WriteString(fmt.Sprintf("aws_secret_access_key=%v' ", os.Getenv("AWS_SECRET_KEY")))
if e.Format == "json" {
b.WriteString(fmt.Sprintf("json 'auto'"))
} else {
b.WriteString(fmt.Sprintf("DELIMITER '%v'", e.Delimiter))
}
b.WriteString(";")
return b.String()
}