Requiring the Logger be passed in to each of the structs was causing Panics if the Logger was omitted. * Add function for overriding the default logger * Remove panics caused by initialization
148 lines
3.9 KiB
Go
148 lines
3.9 KiB
Go
package connector
|
|
|
|
import (
|
|
"bytes"
|
|
"database/sql"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/crowdmob/goamz/aws"
|
|
"github.com/crowdmob/goamz/s3"
|
|
_ "github.com/lib/pq"
|
|
)
|
|
|
|
// An implementation of Emitter that reads S3 file paths from a stream, creates a
|
|
// manifest file and batch copies them into Redshift.
|
|
type RedshiftManifestEmitter struct {
|
|
AccessKey string
|
|
CopyMandatory bool
|
|
DataTable string
|
|
Delimiter string
|
|
FileTable string
|
|
Format string
|
|
Jsonpaths string
|
|
S3Bucket string
|
|
SecretKey string
|
|
}
|
|
|
|
// Invoked when the buffer is full.
|
|
// Emits a Manifest file to S3 and then performs the Redshift copy command.
|
|
func (e RedshiftManifestEmitter) Emit(b Buffer, t Transformer) {
|
|
db, err := sql.Open("postgres", os.Getenv("REDSHIFT_URL"))
|
|
|
|
if err != nil {
|
|
logger.Fatalf("sql.Open ERROR: %v\n", err)
|
|
}
|
|
|
|
// Aggregate file paths as strings
|
|
files := []string{}
|
|
for _, r := range b.Records() {
|
|
f := t.FromRecord(r)
|
|
files = append(files, string(f))
|
|
}
|
|
|
|
// Manifest file name
|
|
date := time.Now().UTC().Format("2006/01/02")
|
|
manifestFileName := e.getManifestName(date, files)
|
|
|
|
// Issue manifest COPY to Redshift
|
|
e.writeManifestToS3(files, manifestFileName)
|
|
c := e.copyStmt(manifestFileName)
|
|
_, err = db.Exec(c)
|
|
|
|
if err != nil {
|
|
logger.Fatalf("db.Exec ERROR: %v\n", err)
|
|
}
|
|
|
|
// Insert file paths into File Names table
|
|
i := e.fileInsertStmt(files)
|
|
_, err = db.Exec(i)
|
|
|
|
if err != nil {
|
|
logger.Fatalf("db.Exec ERROR: %v\n", err)
|
|
}
|
|
|
|
logger.Printf("[%v] copied to Redshift", manifestFileName)
|
|
db.Close()
|
|
}
|
|
|
|
// Creates the INSERT statement for the file names database table.
|
|
func (e RedshiftManifestEmitter) fileInsertStmt(fileNames []string) string {
|
|
i := new(bytes.Buffer)
|
|
i.WriteString("('")
|
|
i.WriteString(strings.Join(fileNames, "'),('"))
|
|
i.WriteString("')")
|
|
|
|
b := new(bytes.Buffer)
|
|
b.WriteString("INSERT INTO ")
|
|
b.WriteString(e.FileTable)
|
|
b.WriteString(" VALUES ")
|
|
b.WriteString(i.String())
|
|
b.WriteString(";")
|
|
|
|
return b.String()
|
|
}
|
|
|
|
// Creates the COPY statment for Redshift insertion.
|
|
func (e RedshiftManifestEmitter) copyStmt(filePath string) string {
|
|
b := new(bytes.Buffer)
|
|
c := fmt.Sprintf(
|
|
"CREDENTIALS 'aws_access_key_id=%s;aws_secret_access_key=%s' ",
|
|
os.Getenv("AWS_ACCESS_KEY"),
|
|
os.Getenv("AWS_SECRET_KEY"),
|
|
)
|
|
b.WriteString("COPY " + e.DataTable + " ")
|
|
b.WriteString("FROM 's3://" + e.S3Bucket + "/" + filePath + "' ")
|
|
b.WriteString(c)
|
|
switch e.Format {
|
|
case "json":
|
|
b.WriteString(fmt.Sprintf("json 'auto' "))
|
|
case "jsonpaths":
|
|
b.WriteString(fmt.Sprintf("json '%s' ", e.Jsonpaths))
|
|
default:
|
|
b.WriteString(fmt.Sprintf("DELIMITER '%s' ", e.Delimiter))
|
|
}
|
|
b.WriteString("MANIFEST")
|
|
b.WriteString(";")
|
|
return b.String()
|
|
}
|
|
|
|
// Put the Manifest file contents to Redshift
|
|
func (e RedshiftManifestEmitter) writeManifestToS3(files []string, manifestFileName string) {
|
|
auth, _ := aws.EnvAuth()
|
|
s3Con := s3.New(auth, aws.USEast)
|
|
bucket := s3Con.Bucket(e.S3Bucket)
|
|
content := e.generateManifestFile(files)
|
|
err := bucket.Put(manifestFileName, content, "text/plain", s3.Private, s3.Options{})
|
|
if err != nil {
|
|
fmt.Printf("Error occured while uploding to S3: %v\n", err)
|
|
}
|
|
}
|
|
|
|
// Manifest file name based on First and Last sequence numbers
|
|
func (e RedshiftManifestEmitter) getManifestName(date string, files []string) string {
|
|
firstSeq := e.getSeq(files[0])
|
|
lastSeq := e.getSeq(files[len(files)-1])
|
|
return fmt.Sprintf("%v/_manifest/%v_%v", date, firstSeq, lastSeq)
|
|
}
|
|
|
|
// Trims the date and suffix information from string
|
|
func (e RedshiftManifestEmitter) getSeq(file string) string {
|
|
matches := strings.Split(file, "/")
|
|
return matches[len(matches)-1]
|
|
}
|
|
|
|
// Manifest file contents in JSON structure
|
|
func (e RedshiftManifestEmitter) generateManifestFile(files []string) []byte {
|
|
m := &Manifest{}
|
|
for _, r := range files {
|
|
var url = fmt.Sprintf("s3://%s/%s", e.S3Bucket, r)
|
|
var entry = Entry{Url: url, Mandatory: e.CopyMandatory}
|
|
m.Entries = append(m.Entries, entry)
|
|
}
|
|
b, _ := json.Marshal(m)
|
|
return b
|
|
}
|