Compare commits

..

1 commit

Author SHA1 Message Date
Rafael Garcia
2b763d7b15 remove some strings 2017-09-25 16:26:32 +00:00
26 changed files with 686 additions and 1702 deletions

View file

@ -1,30 +0,0 @@
version: 2
jobs:
build:
working_directory: /go/src/github.com/Clever/amazon-kinesis-client-go
docker:
- image: circleci/golang:1.13-stretch
- image: circleci/mongo:3.2.20-jessie-ram
environment:
GOPRIVATE: github.com/Clever/*
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
steps:
- run:
command: cd $HOME && git clone --depth 1 -v https://github.com/Clever/ci-scripts.git && cd ci-scripts && git show --oneline -s
name: Clone ci-scripts
- checkout
- setup_remote_docker
- run:
command: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
name: Set up CircleCI artifacts directories
- run:
command: git config --global "url.ssh://git@github.com/Clever".insteadOf "https://github.com/Clever"
- run:
name: Add github.com to known hosts
command: mkdir -p ~/.ssh && touch ~/.ssh/known_hosts && echo 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=' >> ~/.ssh/known_hosts
- run: make install_deps
- run: make build
- run: make bench
- run: make test
- run: if [ "${CIRCLE_BRANCH}" == "master" ]; then $HOME/ci-scripts/circleci/github-release $GH_RELEASE_TOKEN; fi;

1
.github/CODEOWNERS vendored
View file

@ -1 +0,0 @@
* @Clever/eng-infra

View file

@ -1,20 +0,0 @@
name: Notify CI status
on:
check_suite:
types: [completed]
status:
jobs:
call-workflow:
if: >-
(github.event.branches[0].name == github.event.repository.default_branch &&
(github.event.state == 'error' || github.event.state == 'failure')) ||
(github.event.check_suite.head_branch == github.event.repository.default_branch &&
github.event.check_suite.conclusion != 'success')
uses: Clever/ci-scripts/.github/workflows/reusable-notify-ci-status.yml@master
secrets:
CIRCLE_CI_INTEGRATIONS_URL: ${{ secrets.CIRCLE_CI_INTEGRATIONS_URL }}
CIRCLE_CI_INTEGRATIONS_USERNAME: ${{ secrets.CIRCLE_CI_INTEGRATIONS_USERNAME }}
CIRCLE_CI_INTEGRATIONS_PASSWORD: ${{ secrets.CIRCLE_CI_INTEGRATIONS_PASSWORD }}
SLACK_BOT_TOKEN: ${{ secrets.DAPPLE_BOT_TOKEN }}

View file

@ -5,7 +5,7 @@ SHELL := /bin/bash
PKG := github.com/Clever/amazon-kinesis-client-go PKG := github.com/Clever/amazon-kinesis-client-go
PKGS := $(shell go list ./... | grep -v /vendor ) PKGS := $(shell go list ./... | grep -v /vendor )
.PHONY: download_jars run build .PHONY: download_jars run build
$(eval $(call golang-version-check,1.13)) $(eval $(call golang-version-check,1.8))
CONSUMER ?= consumer CONSUMER ?= consumer
TMP_DIR := ./tmp-jars TMP_DIR := ./tmp-jars
@ -37,7 +37,7 @@ download_jars:
mv $(TMP_DIR)/target/dependency/* $(JAR_DIR)/ mv $(TMP_DIR)/target/dependency/* $(JAR_DIR)/
# Download the STS jar file for supporting IAM Roles # Download the STS jar file for supporting IAM Roles
ls $(JAR_DIR)/aws-java-sdk-core-*.jar | sed -e "s/.*-sdk-core-//g" | sed -e "s/\.jar//g" > /tmp/version.txt ls $(JAR_DIR)/aws-java-sdk-core-*.jar | sed -e "s/.*-sdk-core-//g" | sed -e "s/\.jar//g" > /tmp/version.txt
curl -o $(JAR_DIR)/aws-java-sdk-sts-`cat /tmp/version.txt`.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-sts/`cat /tmp/version.txt`/aws-java-sdk-sts-`cat /tmp/version.txt`.jar curl -o $(JAR_DIR)/aws-java-sdk-sts-`cat /tmp/version.txt`.jar http://central.maven.org/maven2/com/amazonaws/aws-java-sdk-sts/`cat /tmp/version.txt`/aws-java-sdk-sts-`cat /tmp/version.txt`.jar
rm -r $(TMP_DIR) rm -r $(TMP_DIR)
all: test build all: test build
@ -58,8 +58,8 @@ test: $(PKGS)
$(PKGS): golang-test-all-deps $(PKGS): golang-test-all-deps
$(call golang-test-all,$@) $(call golang-test-all,$@)
$(GOPATH)/bin/glide:
@go get github.com/Masterminds/glide
install_deps: $(GOPATH)/bin/glide
@$(GOPATH)/bin/glide install
install_deps:
go mod vendor

View file

@ -1 +0,0 @@
1.0.0

View file

@ -10,8 +10,6 @@ import (
"github.com/Clever/amazon-kinesis-client-go/kcl" "github.com/Clever/amazon-kinesis-client-go/kcl"
) )
var lg = kv.New("amazon-kinesis-client-go")
type tagMsgPair struct { type tagMsgPair struct {
tag string tag string
msg []byte msg []byte
@ -25,9 +23,9 @@ type batcherManagerConfig struct {
} }
type batcherManager struct { type batcherManager struct {
failedLogsFile kv.KayveeLogger log kv.KayveeLogger
sender Sender sender Sender
chkpntManager *checkpointManager chkpntManager *checkpointManager
batchCount int batchCount int
batchSize int batchSize int
@ -40,12 +38,12 @@ type batcherManager struct {
} }
func newBatcherManager( func newBatcherManager(
sender Sender, chkpntManager *checkpointManager, cfg batcherManagerConfig, failedLogsFile kv.KayveeLogger, sender Sender, chkpntManager *checkpointManager, cfg batcherManagerConfig, log kv.KayveeLogger,
) *batcherManager { ) *batcherManager {
bm := &batcherManager{ bm := &batcherManager{
failedLogsFile: failedLogsFile, log: log,
sender: sender, sender: sender,
chkpntManager: chkpntManager, chkpntManager: chkpntManager,
batchCount: cfg.BatchCount, batchCount: cfg.BatchCount,
batchSize: cfg.BatchSize, batchSize: cfg.BatchSize,
@ -98,16 +96,16 @@ func (b *batcherManager) sendBatch(batcher *batcher, tag string) {
switch e := err.(type) { switch e := err.(type) {
case nil: // Do nothing case nil: // Do nothing
case PartialSendBatchError: case PartialSendBatchError:
lg.ErrorD("send-batch", kv.M{"msg": e.Error()}) b.log.ErrorD("send-batch", kv.M{"msg": e.Error()})
for _, line := range e.FailedMessages { for _, line := range e.FailedMessages {
b.failedLogsFile.ErrorD("failed-log", kv.M{"log": line, "msg": e.Error()}) b.log.ErrorD("failed-log", kv.M{"log": line})
} }
stats.Counter("batch-log-failures", len(e.FailedMessages)) stats.Counter("batch-log-failures", len(e.FailedMessages))
case CatastrophicSendBatchError: case CatastrophicSendBatchError:
lg.CriticalD("send-batch", kv.M{"msg": e.Error()}) b.log.CriticalD("send-batch", kv.M{"msg": e.Error()})
os.Exit(1) os.Exit(1)
default: default:
lg.CriticalD("send-batch", kv.M{"msg": e.Error()}) b.log.CriticalD("send-batch", kv.M{"msg": e.Error()})
os.Exit(1) os.Exit(1)
} }
@ -186,14 +184,8 @@ func (b *batcherManager) startMessageHandler(
batcher.AddMessage(tmp.msg, tmp.pair) batcher.AddMessage(tmp.msg, tmp.pair)
} else if err != nil { } else if err != nil {
lg.ErrorD("add-message", kv.M{ b.log.ErrorD("add-message", kv.M{
"err": err.Error(), "err": err.Error(), "msg": string(tmp.msg), "tag": tmp.tag,
"tag": tmp.tag,
})
b.failedLogsFile.ErrorD("add-message", kv.M{
"err": err.Error(),
"msg": string(tmp.msg),
"tag": tmp.tag,
}) })
} }
stats.Counter("msg-batched", 1) stats.Counter("msg-batched", 1)

View file

@ -3,11 +3,15 @@ package batchconsumer
import ( import (
"time" "time"
kv "gopkg.in/Clever/kayvee-go.v6/logger"
"github.com/Clever/amazon-kinesis-client-go/batchconsumer/stats" "github.com/Clever/amazon-kinesis-client-go/batchconsumer/stats"
"github.com/Clever/amazon-kinesis-client-go/kcl" "github.com/Clever/amazon-kinesis-client-go/kcl"
) )
type checkpointManager struct { type checkpointManager struct {
log kv.KayveeLogger
checkpointFreq time.Duration checkpointFreq time.Duration
checkpoint chan kcl.SequencePair checkpoint chan kcl.SequencePair
@ -15,8 +19,12 @@ type checkpointManager struct {
shutdown chan chan<- struct{} shutdown chan chan<- struct{}
} }
func newCheckpointManager(checkpointer kcl.Checkpointer, checkpointFreq time.Duration) *checkpointManager { func newCheckpointManager(
checkpointer kcl.Checkpointer, checkpointFreq time.Duration, log kv.KayveeLogger,
) *checkpointManager {
cm := &checkpointManager{ cm := &checkpointManager{
log: log,
checkpointFreq: checkpointFreq, checkpointFreq: checkpointFreq,
checkpoint: make(chan kcl.SequencePair), checkpoint: make(chan kcl.SequencePair),

View file

@ -13,8 +13,8 @@ import (
// Config used for BatchConsumer constructor. Any empty fields are populated with defaults. // Config used for BatchConsumer constructor. Any empty fields are populated with defaults.
type Config struct { type Config struct {
// FailedLogsFile is where logs that failed to process are written. // LogFile where consumer errors and failed log lines are saved
FailedLogsFile string LogFile string
// BatchInterval the upper bound on how often SendBatch is called with accumulated messages // BatchInterval the upper bound on how often SendBatch is called with accumulated messages
BatchInterval time.Duration BatchInterval time.Duration
@ -34,13 +34,13 @@ type Config struct {
// BatchConsumer is responsible for marshalling // BatchConsumer is responsible for marshalling
type BatchConsumer struct { type BatchConsumer struct {
kclProcess *kcl.KCLProcess kclProcess *kcl.KCLProcess
failedLogsFile *os.File logfile *os.File
} }
func withDefaults(config Config) Config { func withDefaults(config Config) Config {
if config.FailedLogsFile == "" { if config.LogFile == "" {
config.FailedLogsFile = "/tmp/kcl-" + time.Now().Format(time.RFC3339) config.LogFile = "/tmp/kcl-" + time.Now().Format(time.RFC3339)
} }
if config.BatchInterval == 0 { if config.BatchInterval == 0 {
@ -74,19 +74,20 @@ func NewBatchConsumerFromFiles(
) *BatchConsumer { ) *BatchConsumer {
config = withDefaults(config) config = withDefaults(config)
file, err := os.OpenFile(config.FailedLogsFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) file, err := os.OpenFile(config.LogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil { if err != nil {
log.Fatalf("Unable to create log file: %s", err.Error()) log.Fatalf("Unable to create log file: %s", err.Error())
} }
failedLogsFile := logger.New("amazon-kinesis-client-go/batchconsumer")
failedLogsFile.SetOutput(file)
wrt := NewBatchedWriter(config, sender, failedLogsFile) kvlog := logger.New("amazon-kinesis-client-go")
kvlog.SetOutput(file)
wrt := NewBatchedWriter(config, sender, kvlog)
kclProcess := kcl.New(input, output, errFile, wrt) kclProcess := kcl.New(input, output, errFile, wrt)
return &BatchConsumer{ return &BatchConsumer{
kclProcess: kclProcess, kclProcess: kclProcess,
failedLogsFile: file, logfile: file,
} }
} }
@ -99,5 +100,5 @@ func NewBatchConsumer(config Config, sender Sender) *BatchConsumer {
// Start when called, the consumer begins ingesting messages. This function blocks. // Start when called, the consumer begins ingesting messages. This function blocks.
func (b *BatchConsumer) Start() { func (b *BatchConsumer) Start() {
b.kclProcess.Run() b.kclProcess.Run()
b.failedLogsFile.Close() b.logfile.Close()
} }

View file

@ -11,8 +11,6 @@ var ErrMessageIgnored = errors.New("Message intentionally skipped by sender")
// Sender an interface needed for batch consumer implementations // Sender an interface needed for batch consumer implementations
type Sender interface { type Sender interface {
// Initialize called once before ProcessMessage and SendBatch
Initialize(shardID string)
// ProcessMessage receives a raw message and is expected to return an appropriately formatted // ProcessMessage receives a raw message and is expected to return an appropriately formatted
// message as well as a list of tags for that log line. A tag corresponds to a batch that // message as well as a list of tags for that log line. A tag corresponds to a batch that
// it'll be put into. Typically tags are series names. // it'll be put into. Typically tags are series names.

View file

@ -30,17 +30,16 @@ type datum struct {
var queue = make(chan datum, 1000) var queue = make(chan datum, 1000)
func init() { func init() {
countData := map[string]int{} data := map[string]int{}
gaugeData := map[string]int{}
tick := time.Tick(time.Minute) tick := time.Tick(time.Minute)
go func() { go func() {
for { for {
select { select {
case d := <-queue: case d := <-queue:
if d.category == "counter" { if d.category == "counter" {
countData[d.name] = countData[d.name] + d.value data[d.name] = data[d.name] + d.value
} else if d.category == "gauge" { } else if d.category == "gauge" {
gaugeData[d.name] = d.value data[d.name] = d.value
} else { } else {
log.ErrorD("unknown-stat-category", logger.M{"category": d.category}) log.ErrorD("unknown-stat-category", logger.M{"category": d.category})
} }
@ -49,14 +48,10 @@ func init() {
for _, k := range DefaultCounters { for _, k := range DefaultCounters {
tmp[k] = 0 tmp[k] = 0
} }
for k, v := range countData { for k, v := range data {
tmp[k] = v
}
for k, v := range gaugeData {
tmp[k] = v tmp[k] = v
} }
log.InfoD("stats", tmp) log.InfoD("stats", tmp)
countData = map[string]int{}
} }
} }
}() }()

View file

@ -15,9 +15,9 @@ import (
) )
type batchedWriter struct { type batchedWriter struct {
config Config config Config
sender Sender sender Sender
failedLogsFile kv.KayveeLogger log kv.KayveeLogger
shardID string shardID string
@ -30,11 +30,11 @@ type batchedWriter struct {
lastProcessedSeq kcl.SequencePair lastProcessedSeq kcl.SequencePair
} }
func NewBatchedWriter(config Config, sender Sender, failedLogsFile kv.KayveeLogger) *batchedWriter { func NewBatchedWriter(config Config, sender Sender, log kv.KayveeLogger) *batchedWriter {
return &batchedWriter{ return &batchedWriter{
config: config, config: config,
sender: sender, sender: sender,
failedLogsFile: failedLogsFile, log: log,
rateLimiter: rate.NewLimiter(rate.Limit(config.ReadRateLimit), config.ReadBurstLimit), rateLimiter: rate.NewLimiter(rate.Limit(config.ReadRateLimit), config.ReadBurstLimit),
} }
@ -49,14 +49,25 @@ func (b *batchedWriter) Initialize(shardID string, checkpointer kcl.Checkpointer
BatchInterval: b.config.BatchInterval, BatchInterval: b.config.BatchInterval,
} }
b.sender.Initialize(shardID) b.chkpntManager = newCheckpointManager(checkpointer, b.config.CheckpointFreq, b.log)
b.batcherManager = newBatcherManager(b.sender, b.chkpntManager, bmConfig, b.log)
b.chkpntManager = newCheckpointManager(checkpointer, b.config.CheckpointFreq)
b.batcherManager = newBatcherManager(b.sender, b.chkpntManager, bmConfig, b.failedLogsFile)
return nil return nil
} }
func (b *batchedWriter) splitMessageIfNecessary(record []byte) ([][]byte, error) {
// We handle two types of records:
// - records emitted from CWLogs Subscription
// - records emiited from KPL
if !splitter.IsGzipped(record) {
// Process a single message, from KPL
return [][]byte{record}, nil
}
// Process a batch of messages from a CWLogs Subscription
return splitter.GetMessagesFromGzippedInput(record)
}
func (b *batchedWriter) ProcessRecords(records []kcl.Record) error { func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
var pair kcl.SequencePair var pair kcl.SequencePair
prevPair := b.lastProcessedSeq prevPair := b.lastProcessedSeq
@ -80,7 +91,7 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
return err return err
} }
messages, err := splitter.SplitMessageIfNecessary(data) messages, err := b.splitMessageIfNecessary(data)
if err != nil { if err != nil {
return err return err
} }
@ -92,20 +103,20 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
continue // Skip message continue // Skip message
} else if err != nil { } else if err != nil {
stats.Counter("unknown-error", 1) stats.Counter("unknown-error", 1)
lg.ErrorD("process-message", kv.M{"msg": err.Error(), "rawmsg": string(rawmsg)}) b.log.ErrorD("process-message", kv.M{"msg": err.Error(), "rawmsg": string(rawmsg)})
continue // Don't stop processing messages because of one bad message continue // Don't stop processing messages because of one bad message
} }
if len(tags) == 0 { if len(tags) == 0 {
stats.Counter("no-tags", 1) stats.Counter("no-tags", 1)
lg.ErrorD("no-tags", kv.M{"rawmsg": string(rawmsg)}) b.log.ErrorD("no-tags", kv.M{"rawmsg": string(rawmsg)})
return fmt.Errorf("No tags provided by consumer for log: %s", string(rawmsg)) return fmt.Errorf("No tags provided by consumer for log: %s", string(rawmsg))
} }
for _, tag := range tags { for _, tag := range tags {
if tag == "" { if tag == "" {
stats.Counter("blank-tag", 1) stats.Counter("blank-tag", 1)
lg.ErrorD("blank-tag", kv.M{"rawmsg": string(rawmsg)}) b.log.ErrorD("blank-tag", kv.M{"rawmsg": string(rawmsg)})
return fmt.Errorf("Blank tag provided by consumer for log: %s", string(rawmsg)) return fmt.Errorf("Blank tag provided by consumer for log: %s", string(rawmsg))
} }
@ -133,9 +144,9 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
func (b *batchedWriter) Shutdown(reason string) error { func (b *batchedWriter) Shutdown(reason string) error {
if reason == "TERMINATE" { if reason == "TERMINATE" {
lg.InfoD("terminate-signal", kv.M{"shard-id": b.shardID}) b.log.InfoD("terminate-signal", kv.M{"shard-id": b.shardID})
} else { } else {
lg.ErrorD("shutdown-failover", kv.M{"shard-id": b.shardID, "reason": reason}) b.log.ErrorD("shutdown-failover", kv.M{"shard-id": b.shardID, "reason": reason})
} }
done := b.batcherManager.Shutdown() done := b.batcherManager.Shutdown()

View file

@ -14,8 +14,6 @@ import (
type ignoringSender struct{} type ignoringSender struct{}
func (i ignoringSender) Initialize(shardID string) {}
func (i ignoringSender) ProcessMessage(rawmsg []byte) (msg []byte, tags []string, err error) { func (i ignoringSender) ProcessMessage(rawmsg []byte) (msg []byte, tags []string, err error) {
return nil, nil, ErrMessageIgnored return nil, nil, ErrMessageIgnored
} }
@ -29,7 +27,6 @@ type tagBatch struct {
batch [][]byte batch [][]byte
} }
type msgAsTagSender struct { type msgAsTagSender struct {
shardID string
batches map[string][][][]byte batches map[string][][][]byte
saveBatch chan tagBatch saveBatch chan tagBatch
shutdown chan struct{} shutdown chan struct{}
@ -64,10 +61,6 @@ func (i *msgAsTagSender) startBatchSaver(saveBatch <-chan tagBatch, shutdown <-c
}() }()
} }
func (i *msgAsTagSender) Initialize(shardID string) {
i.shardID = shardID
}
func (i *msgAsTagSender) ProcessMessage(rawmsg []byte) (msg []byte, tags []string, err error) { func (i *msgAsTagSender) ProcessMessage(rawmsg []byte) (msg []byte, tags []string, err error) {
if "ignore" == string(rawmsg) { if "ignore" == string(rawmsg) {
return nil, nil, ErrMessageIgnored return nil, nil, ErrMessageIgnored
@ -142,14 +135,14 @@ func encode(str string) string {
func TestProcessRecordsIgnoredMessages(t *testing.T) { func TestProcessRecordsIgnoredMessages(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
mockFailedLogsFile := logger.New("testing") mocklog := logger.New("testing")
mockconfig := withDefaults(Config{ mockconfig := withDefaults(Config{
BatchInterval: 10 * time.Millisecond, BatchInterval: 10 * time.Millisecond,
CheckpointFreq: 20 * time.Millisecond, CheckpointFreq: 20 * time.Millisecond,
}) })
mockcheckpointer := NewMockCheckpointer(5 * time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
wrt := NewBatchedWriter(mockconfig, ignoringSender{}, mockFailedLogsFile) wrt := NewBatchedWriter(mockconfig, ignoringSender{}, mocklog)
wrt.Initialize("test-shard", mockcheckpointer) wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{ err := wrt.ProcessRecords([]kcl.Record{
@ -172,7 +165,7 @@ func TestProcessRecordsIgnoredMessages(t *testing.T) {
func TestProcessRecordsSingleBatchBasic(t *testing.T) { func TestProcessRecordsSingleBatchBasic(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
mockFailedLogsFile := logger.New("testing") mocklog := logger.New("testing")
mockconfig := withDefaults(Config{ mockconfig := withDefaults(Config{
BatchCount: 2, BatchCount: 2,
CheckpointFreq: 1, // Don't throttle checks points CheckpointFreq: 1, // Don't throttle checks points
@ -180,11 +173,9 @@ func TestProcessRecordsSingleBatchBasic(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt.Initialize("test-shard", mockcheckpointer) wrt.Initialize("test-shard", mockcheckpointer)
assert.Equal("test-shard", mocksender.shardID)
err := wrt.ProcessRecords([]kcl.Record{ err := wrt.ProcessRecords([]kcl.Record{
kcl.Record{SequenceNumber: "1", Data: encode("tag1")}, kcl.Record{SequenceNumber: "1", Data: encode("tag1")},
kcl.Record{SequenceNumber: "2", Data: encode("tag1")}, kcl.Record{SequenceNumber: "2", Data: encode("tag1")},
@ -221,7 +212,7 @@ func TestProcessRecordsSingleBatchBasic(t *testing.T) {
func TestProcessRecordsMutliBatchBasic(t *testing.T) { func TestProcessRecordsMutliBatchBasic(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
mockFailedLogsFile := logger.New("testing") mocklog := logger.New("testing")
mockconfig := withDefaults(Config{ mockconfig := withDefaults(Config{
BatchInterval: 100 * time.Millisecond, BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Millisecond, CheckpointFreq: 200 * time.Millisecond,
@ -229,7 +220,7 @@ func TestProcessRecordsMutliBatchBasic(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt.Initialize("test-shard", mockcheckpointer) wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{ err := wrt.ProcessRecords([]kcl.Record{
@ -279,7 +270,7 @@ func TestProcessRecordsMutliBatchBasic(t *testing.T) {
func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) { func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
mockFailedLogsFile := logger.New("testing") mocklog := logger.New("testing")
mockconfig := withDefaults(Config{ mockconfig := withDefaults(Config{
BatchInterval: 100 * time.Millisecond, BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Millisecond, CheckpointFreq: 200 * time.Millisecond,
@ -287,7 +278,7 @@ func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt.Initialize("test-shard", mockcheckpointer) wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{ err := wrt.ProcessRecords([]kcl.Record{
@ -352,10 +343,10 @@ func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
assert.Equal("tag3", string(mocksender.batches["tag3"][0][2])) assert.Equal("tag3", string(mocksender.batches["tag3"][0][2]))
} }
func TestStaggeredCheckpointing(t *testing.T) { func TestStaggeredCheckpionting(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
mockFailedLogsFile := logger.New("testing") mocklog := logger.New("testing")
mockconfig := withDefaults(Config{ mockconfig := withDefaults(Config{
BatchCount: 2, BatchCount: 2,
BatchInterval: 100 * time.Millisecond, BatchInterval: 100 * time.Millisecond,
@ -364,7 +355,7 @@ func TestStaggeredCheckpointing(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt.Initialize("test-shard", mockcheckpointer) wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{ err := wrt.ProcessRecords([]kcl.Record{

BIN
bin/dep

Binary file not shown.

19
circle.yml Normal file
View file

@ -0,0 +1,19 @@
machine:
post:
- cd $HOME && git clone --depth 1 -v git@github.com:clever/ci-scripts.git && cd ci-scripts && git show --oneline -s
- $HOME/ci-scripts/circleci/golang-install 1.8
services:
- docker
checkout:
post:
- $HOME/ci-scripts/circleci/golang-move-project
compile:
override:
- make install_deps
- make build
- make bench
test:
override:
- make test
general:
build_dir: ../.go_workspace/src/github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME

View file

@ -2,6 +2,8 @@ package main
import ( import (
"fmt" "fmt"
"log"
"os"
"time" "time"
"gopkg.in/Clever/kayvee-go.v6/logger" "gopkg.in/Clever/kayvee-go.v6/logger"
@ -9,26 +11,36 @@ import (
kbc "github.com/Clever/amazon-kinesis-client-go/batchconsumer" kbc "github.com/Clever/amazon-kinesis-client-go/batchconsumer"
) )
func main() { func createDummyOutput() (logger.KayveeLogger, *os.File) {
config := kbc.Config{ file, err := os.OpenFile("/tmp/example-kcl-output", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
BatchInterval: 10 * time.Second, if err != nil {
BatchCount: 500, log.Fatalf("Unable to create log file: %s", err.Error())
BatchSize: 4 * 1024 * 1024, // 4Mb
FailedLogsFile: "/tmp/example-kcl-consumer",
} }
sender := &exampleSender{output: logger.New("fake-output")} kvlog := logger.New("amazon-kinesis-client-go")
kvlog.SetOutput(file)
return kvlog, file
}
func main() {
config := kbc.Config{
BatchInterval: 10 * time.Second,
BatchCount: 500,
BatchSize: 4 * 1024 * 1024, // 4Mb
LogFile: "/tmp/example-kcl-consumer",
}
output, file := createDummyOutput()
defer file.Close()
sender := &exampleSender{output: output}
consumer := kbc.NewBatchConsumer(config, sender) consumer := kbc.NewBatchConsumer(config, sender)
consumer.Start() consumer.Start()
} }
type exampleSender struct { type exampleSender struct {
shardID string output logger.KayveeLogger
output logger.KayveeLogger
}
func (e *exampleSender) Initialize(shardID string) {
e.shardID = shardID
} }
func (e *exampleSender) ProcessMessage(rawmsg []byte) ([]byte, []string, error) { func (e *exampleSender) ProcessMessage(rawmsg []byte) ([]byte, []string, error) {

View file

@ -29,7 +29,7 @@ func (srp *sampleRecordProcessor) Initialize(shardID string, checkpointer kcl.Ch
} }
func (srp *sampleRecordProcessor) shouldUpdateSequence(pair kcl.SequencePair) bool { func (srp *sampleRecordProcessor) shouldUpdateSequence(pair kcl.SequencePair) bool {
return srp.largestPair.IsNil() || srp.largestPair.IsLessThan(pair) return srp.largestPair.IsLessThan(pair)
} }
func (srp *sampleRecordProcessor) ProcessRecords(records []kcl.Record) error { func (srp *sampleRecordProcessor) ProcessRecords(records []kcl.Record) error {
@ -55,9 +55,6 @@ func (srp *sampleRecordProcessor) Shutdown(reason string) error {
if reason == "TERMINATE" { if reason == "TERMINATE" {
fmt.Fprintf(os.Stderr, "Was told to terminate, will attempt to checkpoint.\n") fmt.Fprintf(os.Stderr, "Was told to terminate, will attempt to checkpoint.\n")
srp.checkpointer.Shutdown() srp.checkpointer.Shutdown()
} else if reason == "SHUTDOWN_REQUESTED" {
fmt.Fprintf(os.Stderr, "Got shutdown requested, attempt to checkpoint.\n")
srp.checkpointer.Shutdown()
} else { } else {
fmt.Fprintf(os.Stderr, "Shutting down due to failover. Will not checkpoint.\n") fmt.Fprintf(os.Stderr, "Shutting down due to failover. Will not checkpoint.\n")
} }
@ -65,6 +62,11 @@ func (srp *sampleRecordProcessor) Shutdown(reason string) error {
} }
func main() { func main() {
f, err := os.Create("/tmp/kcl_stderr")
if err != nil {
panic(err)
}
defer f.Close()
kclProcess := kcl.New(os.Stdin, os.Stdout, os.Stderr, newSampleRecordProcessor()) kclProcess := kcl.New(os.Stdin, os.Stdout, os.Stderr, newSampleRecordProcessor())
kclProcess.Run() kclProcess.Run()
} }

View file

@ -2,7 +2,6 @@ package decode
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"regexp" "regexp"
"strings" "strings"
@ -17,18 +16,6 @@ var reservedFields = []string{
"prefix", "prefix",
"postfix", "postfix",
"decoder_msg_type", "decoder_msg_type",
// used by all logs
"timestamp",
"hostname",
"rawlog",
// Used by Kayvee
"prefix",
"postfix",
// Set to the deepest step of parsing that succeeded
"decoder_msg_type",
} }
func stringInSlice(s string, slice []string) bool { func stringInSlice(s string, slice []string) bool {
@ -71,9 +58,6 @@ func FieldsFromSyslog(line string) (map[string]interface{}, error) {
out[newKey] = v out[newKey] = v
} }
} }
out["decoder_msg_type"] = "syslog"
return out, nil return out, nil
} }
@ -118,20 +102,6 @@ func FieldsFromKayvee(line string) (map[string]interface{}, error) {
return m, nil return m, nil
} }
var userPattern = `#\sUser@Host:\s(?P<user>[a-zA-Z]+\[[a-zA-Z]+\])\s@\s.*Id:\s+(?P<id>[0-9]+)`
var userPatternRegex = regexp.MustCompile(userPattern)
func FieldsFromRDSSlowquery(rawlog string) map[string]interface{} {
out := map[string]interface{}{}
match := userPatternRegex.FindStringSubmatch(rawlog)
if len(match) == 3 {
out["user"] = match[1]
out["user_id"] = match[2]
}
return out
}
// MetricsRoute represents a metrics kv log route // MetricsRoute represents a metrics kv log route
type MetricsRoute struct { type MetricsRoute struct {
Series string Series string
@ -204,17 +174,6 @@ func getStringArray(json map[string]interface{}, key string) []string {
// LogRoutes a type alias to make it easier to add route specific filter functions // LogRoutes a type alias to make it easier to add route specific filter functions
type LogRoutes []map[string]interface{} type LogRoutes []map[string]interface{}
// RuleNames returns a list of the names of the rules
func (l LogRoutes) RuleNames() []string {
names := []string{}
for _, route := range l {
name := getStringValue(route, "rule")
names = append(names, name)
}
return names
}
// MetricsRoutes filters the LogRoutes and returns a list of MetricsRoutes structs // MetricsRoutes filters the LogRoutes and returns a list of MetricsRoutes structs
func (l LogRoutes) MetricsRoutes() []MetricsRoute { func (l LogRoutes) MetricsRoutes() []MetricsRoute {
routes := []MetricsRoute{} routes := []MetricsRoute{}
@ -395,22 +354,18 @@ func ExtractKVMeta(kvlog map[string]interface{}) KVMeta {
} }
// ParseAndEnhance extracts fields from a log line, and does some post-processing to rename/add fields // ParseAndEnhance extracts fields from a log line, and does some post-processing to rename/add fields
func ParseAndEnhance(line string, env string) (map[string]interface{}, error) { func ParseAndEnhance(line string, env string, stringifyNested bool, renameESReservedFields bool, minimumTimestamp time.Time) (map[string]interface{}, error) {
syslog, syslogErr := FieldsFromSyslog(line) out := map[string]interface{}{}
if syslogErr == nil {
return decodeSyslog(syslog, env) syslogFields, err := FieldsFromSyslog(line)
if err != nil {
return map[string]interface{}{}, err
} }
for k, v := range syslogFields {
fluentLog, fluentErr := FieldsFromFluentbitLog(line) out[k] = v
if fluentErr == nil {
return decodeFluent(fluentLog, env)
} }
rawlog := syslogFields["rawlog"].(string)
return nil, fmt.Errorf("unable to parse log line with errors `%v` and `%v`", syslogErr, fluentErr) programname := syslogFields["programname"].(string)
}
func decodeSyslog(syslog map[string]interface{}, env string) (map[string]interface{}, error) {
rawlog := syslog["rawlog"].(string)
// Try pulling Kayvee fields out of message // Try pulling Kayvee fields out of message
kvFields, err := FieldsFromKayvee(rawlog) kvFields, err := FieldsFromKayvee(rawlog)
@ -420,39 +375,94 @@ func decodeSyslog(syslog map[string]interface{}, env string) (map[string]interfa
} }
} else { } else {
for k, v := range kvFields { for k, v := range kvFields {
syslog[k] = v out[k] = v
}
}
// Try pulling RDS slowquery logs fields out of message
if syslog["hostname"] == "aws-rds" {
slowQueryFields := FieldsFromRDSSlowquery(rawlog)
for k, v := range slowQueryFields {
syslog[k] = v
} }
} }
// Inject additional fields that are useful in log-searching and other business logic // Inject additional fields that are useful in log-searching and other business logic
syslog["env"] = env out["env"] = env
// this can error, which indicates inability to extract container meta. That's fine, we can ignore that. // Sometimes its useful to force `container_{env,app,task}`. A specific use-case is writing Docker events.
addContainterMetaToSyslog(syslog) // A separate container monitors for start/stop events, but we set the container values in such a way that
// the logs for these events will appear in context for the app that the user is looking at instead of the
// docker-events app.
forceEnv := ""
forceApp := ""
forceTask := ""
if cEnv, ok := out["container_env"]; ok {
forceEnv = cEnv.(string)
}
if cApp, ok := out["container_app"]; ok {
forceApp = cApp.(string)
}
if cTask, ok := out["container_task"]; ok {
forceTask = cTask.(string)
}
meta, err := getContainerMeta(programname, forceEnv, forceApp, forceTask)
if err == nil {
for k, v := range meta {
out[k] = v
}
}
return syslog, nil // ES dynamic mappings get finnicky once you start sending nested objects.
// E.g., if one app sends a field for the first time as an object, then any log
// sent by another app containing that field /not/ as an object will fail.
// One solution is to decode nested objects as strings.
if stringifyNested {
for k, v := range out {
_, ismap := v.(map[string]interface{})
_, isarr := v.([]interface{})
if ismap || isarr {
bs, _ := json.Marshal(v)
out[k] = string(bs)
}
}
}
// ES doesn't like fields that start with underscores.
if renameESReservedFields {
for oldKey, renamedKey := range esFieldRenames {
if val, ok := out[oldKey]; ok {
out[renamedKey] = val
delete(out, oldKey)
}
}
}
msgTime, ok := out["timestamp"].(time.Time)
if ok && !msgTime.After(minimumTimestamp) {
return map[string]interface{}{}, fmt.Errorf("message's timestamp < minimumTimestamp")
}
return out, nil
}
var esFieldRenames = map[string]string{
"_index": "kv__index",
"_uid": "kv__uid",
"_type": "kv__type",
"_id": "kv__id",
"_source": "kv__source",
"_size": "kv__size",
"_all": "kv__all",
"_field_names": "kv__field_names",
"_timestamp": "kv__timestamp",
"_ttl": "kv__ttl",
"_parent": "kv__parent",
"_routing": "kv__routing",
"_meta": "kv__meta",
} }
const containerMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app const containerMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app
`arn%3Aaws%3Aecs%3Aus-(west|east)-[1-2]%3A[0-9]{12}%3Atask%2F` + // ARN cruft `arn%3Aaws%3Aecs%3Aus-(west|east)-[1-2]%3A[0-9]{12}%3Atask%2F` + // ARN cruft
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[a-z0-9]{32}|jr_[a-z0-9-]+)` // task-id (ECS, both EC2 and Fargate), Glue Job ID `([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})` // task-id
var containerMetaRegex = regexp.MustCompile(containerMeta) var containerMetaRegex = regexp.MustCompile(containerMeta)
var errBadProgramname = errors.New("invalid or missing programname") func getContainerMeta(programname, forceEnv, forceApp, forceTask string) (map[string]string, error) {
if programname == "" {
func addContainterMetaToSyslog(syslog map[string]interface{}) (map[string]interface{}, error) { return map[string]string{}, fmt.Errorf("no programname")
programname, ok := syslog["programname"].(string)
if !ok || programname == "" {
return syslog, errBadProgramname
} }
env := "" env := ""
@ -463,116 +473,25 @@ func addContainterMetaToSyslog(syslog map[string]interface{}) (map[string]interf
env = matches[0][1] env = matches[0][1]
app = matches[0][2] app = matches[0][2]
task = matches[0][4] task = matches[0][4]
} else {
return syslog, errBadProgramname
} }
// Sometimes its useful to force `container_{env,app,task}`. A specific use-case is writing Docker events. if forceEnv != "" {
// A separate container monitors for start/stop events, but we set the container values in such a way that env = forceEnv
// the logs for these events will appear in context for the app that the user is looking at instead of the
// docker-events app.
if existingEnv, ok := syslog["container_env"].(string); !ok || existingEnv == "" {
syslog["container_env"] = env
} }
if existingApp, ok := syslog["container_app"].(string); !ok || existingApp == "" { if forceApp != "" {
syslog["container_app"] = app app = forceApp
} }
if existingTask, ok := syslog["container_task"].(string); !ok || existingTask == "" { if forceTask != "" {
syslog["container_task"] = task task = forceTask
} }
return syslog, nil if env == "" || app == "" || task == "" {
} return map[string]string{}, fmt.Errorf("unable to get one or more of env/app/task")
}
const (
// These are the fields in the incoming fluentbit JSON that we expect the timestamp and log return map[string]string{
// They are referenced below by the FluentLog type; those are the ones that matter. "container_env": env,
fluentbitTimestampField = "fluent_ts" "container_app": app,
fluentbitLogField = "log" "container_task": task,
}, nil
// This is what we get from the fluentbig config: `time_key_format: "%Y-%m-%dT%H:%M:%S.%L%z"`
fluentTimeFormat = "2006-01-02T15:04:05.999-0700"
)
// FluentLog represents is the set of fields extracted from an incoming fluentbit log
// The struct tags must line up with the JSON schema in the fluentbit configuration, see the comment for FieldsFromFluentBitlog
type FluentLog struct {
Log *string `json:"log"`
Timestamp *string `json:"fluent_ts"`
TaskArn string `json:"ecs_task_arn"`
TaskDefinition string `json:"ecs_task_definition"`
}
// FieldsFromFluentbitLog parses JSON object with fields indicating that it's coming from FluentBit
// Its return value shares a common interface with the Syslog output - with the same four key fields
// Unlike FieldsFromSyslog, it accepts its argument as bytes, since it will be used as bytes immediately and bytes is what comes off the firehose
// In theory, the format we recieve is highly customizable, so we'll be making the following assumptions:
// - All logs are coming from aws-fargate with the ecs-metadata fields (ecs_cluster, ecs_task_arn, ecs_task_definition) enabled
// - The timestamp is in the field given by the FluentBitTimestampField constant in this package
// - The timestamp is of the format of the constant fluentTimestampFormat
// - The log is in the field given by the FluentBitlogField constant in this package
// - The ecs_task_definition is of the from {environment}--{application}--.*
func FieldsFromFluentbitLog(line string) (*FluentLog, error) {
fluentFields := FluentLog{}
if err := json.Unmarshal([]byte(line), &fluentFields); err != nil {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: err}
}
return &fluentFields, nil
}
func decodeFluent(fluentLog *FluentLog, env string) (map[string]interface{}, error) {
out := map[string]interface{}{}
if fluentLog.Timestamp == nil || *fluentLog.Timestamp == "" {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: fmt.Errorf("no timestamp found in input field %s", fluentbitTimestampField)}
}
if timeValue, err := time.Parse(fluentTimeFormat, *fluentLog.Timestamp); err == nil {
out["timestamp"] = timeValue
} else {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: fmt.Errorf("timestamp has bad format: %s", *fluentLog.Timestamp)}
}
if fluentLog.Log == nil {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: fmt.Errorf("no log found in input field %s", fluentbitLogField)}
}
log := *fluentLog.Log
out["rawlog"] = log
out["decoder_msg_type"] = "fluentbit"
out["hostname"] = "aws-fargate"
// best effort to add container_env|app|task
if parts := strings.SplitN(fluentLog.TaskDefinition, "--", 3); len(parts) == 3 {
out["container_env"] = parts[0]
out["container_app"] = parts[1]
}
if idx := strings.LastIndex(fluentLog.TaskArn, "/"); idx != -1 {
out["container_task"] = fluentLog.TaskArn[idx+1:]
}
kvFields, err := FieldsFromKayvee(log)
if err == nil {
for k, v := range kvFields {
out[k] = v
}
}
// Inject additional fields that are useful in log-searching and other business logic; mimicking syslog behavior
out["env"] = env
return out, nil
}
type BadLogFormatError struct {
Format string
DecodingError error
}
func (b BadLogFormatError) Error() string {
return fmt.Sprintf("trying to decode log as format %s: %v", b.Format, b.DecodingError)
}
func (b BadLogFormatError) Unwrap() error {
return b.DecodingError
} }

View file

@ -1,9 +1,7 @@
package decode package decode
import ( import (
"errors"
"fmt" "fmt"
"sort"
"testing" "testing"
"time" "time"
@ -66,19 +64,6 @@ func TestKayveeDecoding(t *testing.T) {
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
Spec{
Title: "Reserved fields are respected",
Input: `prefix {"a":"b","prefix":"no-override","postfix":"no-override",` +
`"decoder_msg_type":"no-override","timestamp":"no-override",` +
`"hostname":"no-override","rawlog":"no-override"} postfix`,
ExpectedOutput: map[string]interface{}{
"prefix": "prefix ",
"postfix": " postfix",
"a": "b",
"decoder_msg_type": "Kayvee",
},
ExpectedError: nil,
},
Spec{ Spec{
Title: "Returns NonKayveeError if not JSON in body", Title: "Returns NonKayveeError if not JSON in body",
Input: `prefix { postfix`, Input: `prefix { postfix`,
@ -141,11 +126,10 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_TraditionalFileFormat with simple log body", Title: "Parses Rsyslog_TraditionalFileFormat with simple log body",
Input: `Oct 25 10:20:37 some-host docker/fa3a5e338a47[1294]: log body`, Input: `Oct 25 10:20:37 some-host docker/fa3a5e338a47[1294]: log body`,
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime, "timestamp": logTime,
"hostname": "some-host", "hostname": "some-host",
"programname": "docker/fa3a5e338a47", "programname": "docker/fa3a5e338a47",
"rawlog": "log body", "rawlog": "log body",
"decoder_msg_type": "syslog",
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
@ -153,11 +137,10 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_TraditionalFileFormat with haproxy access log body", Title: "Parses Rsyslog_TraditionalFileFormat with haproxy access log body",
Input: `Apr 5 21:45:54 influx-service docker/0000aa112233[1234]: [httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms`, Input: `Apr 5 21:45:54 influx-service docker/0000aa112233[1234]: [httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms`,
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime2, "timestamp": logTime2,
"hostname": "influx-service", "hostname": "influx-service",
"programname": "docker/0000aa112233", "programname": "docker/0000aa112233",
"rawlog": "[httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms", "rawlog": "[httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms",
"decoder_msg_type": "syslog",
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
@ -165,11 +148,10 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_TraditionalFileFormat", Title: "Parses Rsyslog_TraditionalFileFormat",
Input: `Apr 5 21:45:54 mongodb-some-machine whackanop: 2017/04/05 21:46:11 found 0 ops`, Input: `Apr 5 21:45:54 mongodb-some-machine whackanop: 2017/04/05 21:46:11 found 0 ops`,
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime2, "timestamp": logTime2,
"hostname": "mongodb-some-machine", "hostname": "mongodb-some-machine",
"programname": "whackanop", "programname": "whackanop",
"rawlog": "2017/04/05 21:46:11 found 0 ops", "rawlog": "2017/04/05 21:46:11 found 0 ops",
"decoder_msg_type": "syslog",
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
@ -177,35 +159,10 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_ FileFormat with Kayvee payload", Title: "Parses Rsyslog_ FileFormat with Kayvee payload",
Input: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`, Input: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime3, "timestamp": logTime3,
"hostname": "ip-10-0-0-0", "hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`, "programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`, "rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
Spec{
Title: "Parses Rsyslog_ FileFormat without PID",
Input: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
Spec{
Title: "Parses Rsyslog_ FileFormat with simple log body for slowquery",
Input: `2017-04-05T21:57:46.794862+00:00 aws-rds production-aurora-test-db: Slow query: select * from table.`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "aws-rds",
"programname": "production-aurora-test-db",
"rawlog": "Slow query: select * from table.",
"decoder_msg_type": "syslog",
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
@ -222,18 +179,25 @@ func TestSyslogDecoding(t *testing.T) {
fields, err := FieldsFromSyslog(spec.Input) fields, err := FieldsFromSyslog(spec.Input)
if spec.ExpectedError != nil { if spec.ExpectedError != nil {
assert.Error(err) assert.Error(err)
assert.True(errors.As(err, &spec.ExpectedError)) assert.IsType(spec.ExpectedError, err)
return } else {
assert.NoError(err)
} }
assert.NoError(err)
assert.Equal(spec.ExpectedOutput, fields) assert.Equal(spec.ExpectedOutput, fields)
}) })
} }
} }
type ParseAndEnhanceInput struct {
Line string
StringifyNested bool
RenameESReservedFields bool
MinimumTimestamp time.Time
}
type ParseAndEnhanceSpec struct { type ParseAndEnhanceSpec struct {
Title string Title string
Line string Input ParseAndEnhanceInput
ExpectedOutput map[string]interface{} ExpectedOutput map[string]interface{}
ExpectedError error ExpectedError error
} }
@ -252,15 +216,10 @@ func TestParseAndEnhance(t *testing.T) {
} }
logTime3 = logTime3.UTC() logTime3 = logTime3.UTC()
logTime4, err := time.Parse(fluentTimeFormat, "2020-08-13T21:10:57.000+0000")
if err != nil {
t.Fatal(err)
}
specs := []ParseAndEnhanceSpec{ specs := []ParseAndEnhanceSpec{
ParseAndEnhanceSpec{ ParseAndEnhanceSpec{
Title: "Parses a Kayvee log line from an ECS app", Title: "Parses a Kayvee log line from an ECS app",
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`, Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`},
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime3, "timestamp": logTime3,
"hostname": "ip-10-0-0-0", "hostname": "ip-10-0-0-0",
@ -279,7 +238,7 @@ func TestParseAndEnhance(t *testing.T) {
}, },
ParseAndEnhanceSpec{ ParseAndEnhanceSpec{
Title: "Parses a Kayvee log line from an ECS app, with override to container_app", Title: "Parses a Kayvee log line from an ECS app, with override to container_app",
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished","container_app":"force-app"}`, Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished","container_app":"force-app"}`},
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime3, "timestamp": logTime3,
"hostname": "ip-10-0-0-0", "hostname": "ip-10-0-0-0",
@ -298,29 +257,28 @@ func TestParseAndEnhance(t *testing.T) {
}, },
ParseAndEnhanceSpec{ ParseAndEnhanceSpec{
Title: "Parses a non-Kayvee log line", Title: "Parses a non-Kayvee log line",
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: some log`, Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: some log`},
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime3, "timestamp": logTime3,
"hostname": "ip-10-0-0-0", "hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`, "programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `some log`, "rawlog": `some log`,
"env": "deploy-env", "env": "deploy-env",
"decoder_msg_type": "syslog", "container_env": "env",
"container_env": "env", "container_app": "app",
"container_app": "app", "container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
ParseAndEnhanceSpec{ ParseAndEnhanceSpec{
Title: "Fails to parse non-RSyslog, non-Fluent log line", Title: "Fails to parse non-RSyslog log line",
Line: `not rsyslog`, Input: ParseAndEnhanceInput{Line: `not rsyslog`},
ExpectedOutput: map[string]interface{}{}, ExpectedOutput: map[string]interface{}{},
ExpectedError: fmt.Errorf(""), ExpectedError: &syslogparser.ParserError{},
}, },
ParseAndEnhanceSpec{ ParseAndEnhanceSpec{
Title: "Parses JSON values", Title: "Parses JSON values",
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`, Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`},
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"timestamp": logTime3, "timestamp": logTime3,
"hostname": "ip-10-0-0-0", "hostname": "ip-10-0-0-0",
@ -338,323 +296,222 @@ func TestParseAndEnhance(t *testing.T) {
}, },
ExpectedError: nil, ExpectedError: nil,
}, },
ParseAndEnhanceSpec{
Title: "Has the option to stringify object values",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`,
StringifyNested: true,
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"nested": `{"a":"b"}`,
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Has the option to stringify array values",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": [{"a":"b"}]}`,
StringifyNested: true,
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": [{"a":"b"}]}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"nested": `[{"a":"b"}]`,
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Has the option to rename reserved ES fields",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"kv__source": "a",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Errors if logTime < MinimumTimestamp",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(100 * time.Hour * 24 * 365), // good thru year 2117
},
ExpectedOutput: map[string]interface{}{},
ExpectedError: fmt.Errorf(""),
},
ParseAndEnhanceSpec{
Title: "Accepts logs if logTime > MinimumTimestamp",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(-100 * time.Hour * 24 * 365), // good thru year 2117
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"kv__source": "a",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Accepts logs if logTime > MinimumTimestamp",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(-100 * time.Hour * 24 * 365), // good thru year 2117
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"kv__source": "a",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{ ParseAndEnhanceSpec{
Title: "Log with timestamp time.RFC3339 format", Title: "Log with timestamp time.RFC3339 format",
Line: `2017-04-05T21:57:46+00:00 mongo-docker-pipeline-r10-4 diamond[24099] Signal Received: 15`, Input: ParseAndEnhanceInput{
ExpectedOutput: map[string]interface{}{ Line: `2017-04-05T21:57:46+00:00 mongo-docker-pipeline-r10-4 diamond[24099] ` +
"env": "deploy-env", `Signal Received: 15`,
"hostname": "mongo-docker-pipeline-r10-4", RenameESReservedFields: true,
"programname": "diamond", MinimumTimestamp: time.Now().Add(-100 * time.Hour * 24 * 365), // year 2117
"decoder_msg_type": "syslog",
"rawlog": "Signal Received: 15",
"timestamp": logTime2,
}, },
},
ParseAndEnhanceSpec{
Title: "RDS Slowquery Log rdsadmin",
Line: `2017-04-05T21:57:46+00:00 aws-rds production-aurora-test-db: Slow query: # Time: 190921 16:02:59
# User@Host: rdsadmin[rdsadmin] @ localhost [] Id: 1
# Query_time: 22.741550 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0SET timestamp=1569081779;call action start_seamless_scaling('AQEAAB1P/PAIqvTHEQFJAEkojZUoH176FGJttZ62JF5QmRehaf0S0VFTa+5MPJdYQ9k0/sekBlnMi8U=', 300000, 2);
SET timestamp=1569862702;`,
ExpectedOutput: map[string]interface{}{ ExpectedOutput: map[string]interface{}{
"env": "deploy-env", "env": "deploy-env",
"hostname": "aws-rds", "hostname": "mongo-docker-pipeline-r10-4",
"programname": "production-aurora-test-db", "programname": "diamond",
"decoder_msg_type": "syslog", "rawlog": "Signal Received: 15",
"rawlog": "Slow query: # Time: 190921 16:02:59\n# User@Host: rdsadmin[rdsadmin] @ localhost [] Id: 1\n# Query_time: 22.741550 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0SET timestamp=1569081779;call action start_seamless_scaling('AQEAAB1P/PAIqvTHEQFJAEkojZUoH176FGJttZ62JF5QmRehaf0S0VFTa+5MPJdYQ9k0/sekBlnMi8U=', 300000, 2);\nSET timestamp=1569862702;", "timestamp": logTime2,
"timestamp": logTime2,
"user": "rdsadmin[rdsadmin]",
"user_id": "1",
}, },
}, },
ParseAndEnhanceSpec{
Title: "RDS Slowquery Log clever user",
Line: `2017-04-05T21:57:46+00:00 aws-rds production-aurora-test-db: Slow query: # Time: 190921 16:02:59
# User@Host: clever[clever] @ [10.1.11.112] Id: 868
# Query_time: 2.000270 Lock_time: 0.000000 Rows_sent: 1 Rows_examined: 0
SET timestamp=1571090308;
select sleep(2);`,
ExpectedOutput: map[string]interface{}{
"env": "deploy-env",
"hostname": "aws-rds",
"programname": "production-aurora-test-db",
"decoder_msg_type": "syslog",
"rawlog": "Slow query: # Time: 190921 16:02:59\n# User@Host: clever[clever] @ [10.1.11.112] Id: 868\n# Query_time: 2.000270 Lock_time: 0.000000 Rows_sent: 1 Rows_examined: 0\nSET timestamp=1571090308;\nselect sleep(2);",
"timestamp": logTime2,
"user": "clever[clever]",
"user_id": "868",
},
},
{
Title: "Valid fluent log",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_app": "app",
"container_env": "env",
"container_task": "7c1ccb5cb5c44582808b9e516b479eb6",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
ExpectedError: nil,
},
{
Title: "fluent log missing ecs_task_definition still parses and has container_task",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_task": "7c1ccb5cb5c44582808b9e516b479eb6",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
},
{
Title: "fluent log with bad ecs_task_definition format still parses and has container_task",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_task": "7c1ccb5cb5c44582808b9e516b479eb6",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
},
{
Title: "fluent log missing ecs_task_arn still parses and has containter_env and app",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4, // time.Date(2020, 8, 13, 21, 10, 57, 0, time.UTC),
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_env": "env",
"container_app": "app",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
},
{
Title: "fluent log missing timestamp is an error",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"deploy_env\":\"env\",\"level\":\"info\",\"pod-account\":\"589690932525\",\"pod-id\":\"2870425d-e4b1-4869-b2b7-a6d9fade9be1\",\"pod-region\":\"us-east-1\",\"pod-shortname\":\"us-east-1-dev-canary-2870425d\",\"source\":\"app-service\",\"title\":\"NumFDs\",\"type\":\"gauge\",\"value\":33,\"via\":\"process-metrics\"}",
"source": "stderr"
}`,
ExpectedOutput: nil,
ExpectedError: BadLogFormatError{},
},
{
Title: "fluent log with bad timestamp format is an error",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000Z",
"log": "2020/08/13 21:10:57 {\"deploy_env\":\"env\",\"level\":\"info\",\"pod-account\":\"589690932525\",\"pod-id\":\"2870425d-e4b1-4869-b2b7-a6d9fade9be1\",\"pod-region\":\"us-east-1\",\"pod-shortname\":\"us-east-1-dev-canary-2870425d\",\"source\":\"app-service\",\"title\":\"NumFDs\",\"type\":\"gauge\",\"value\":33,\"via\":\"process-metrics\"}",
"source": "stderr"
}`,
ExpectedOutput: nil,
ExpectedError: BadLogFormatError{},
},
{
Title: "Valid fluent log with overrides for container_env, app, and task",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\", \"container_env\": \"env2\", \"container_task\": \"task\", \"container_app\": \"app2\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\", \"container_env\": \"env2\", \"container_task\": \"task\", \"container_app\": \"app2\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_app": "app2",
"container_env": "env2",
"container_task": "task",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
ExpectedError: nil,
},
} }
for _, spec := range specs { for _, spec := range specs {
t.Run(fmt.Sprintf(spec.Title), func(t *testing.T) { t.Run(fmt.Sprintf(spec.Title), func(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
fields, err := ParseAndEnhance(spec.Line, "deploy-env") fields, err := ParseAndEnhance(spec.Input.Line, "deploy-env", spec.Input.StringifyNested, spec.Input.RenameESReservedFields, spec.Input.MinimumTimestamp)
if spec.ExpectedError != nil { if spec.ExpectedError != nil {
assert.Error(err) assert.Error(err)
assert.True(errors.As(err, &spec.ExpectedError)) assert.IsType(spec.ExpectedError, err)
return } else {
assert.NoError(err)
} }
assert.NoError(err)
assert.Equal(spec.ExpectedOutput, fields) assert.Equal(spec.ExpectedOutput, fields)
}) })
} }
} }
func TestGetContainerMeta(t *testing.T) { func TestGetContainerMeta(t *testing.T) {
assert := assert.New(t)
type containerMetaSpec struct { t.Log("Must have a programname to get container meta")
description string programname := ""
input map[string]interface{} _, err := getContainerMeta(programname, "", "", "")
wantErr bool assert.Error(err)
wantContainerEnv string
wantContainerApp string
wantContainerTask string
}
tests := []containerMetaSpec{ t.Log("Can parse a programname")
{ programname = `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`
description: "Must have a programname to get container meta", meta, err := getContainerMeta(programname, "", "", "")
input: map[string]interface{}{ assert.NoError(err)
"programname": "", assert.Equal(map[string]string{
}, "container_env": "env1",
wantErr: true, "container_app": "app2",
}, "container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
{ }, meta)
description: "Can parse a programname",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
},
wantContainerEnv: "env1",
wantContainerApp: "app2",
wantContainerTask: "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
{
description: "Can override just 'env'",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_env": "force-env",
},
wantContainerEnv: "force-env",
wantContainerApp: "app2",
wantContainerTask: "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
{
description: "Can override just 'app'",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_app": "force-app",
},
wantContainerEnv: "env1",
wantContainerApp: "force-app",
wantContainerTask: "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
{
description: "Can override just 'task'",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_task": "force-task",
},
wantContainerEnv: "env1",
wantContainerApp: "app2",
wantContainerTask: "force-task",
},
{
description: "Can override all fields",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_task": "force-task",
"container_app": "force-app",
"container_env": "force-env",
},
wantContainerEnv: "force-env",
wantContainerApp: "force-app",
wantContainerTask: "force-task",
},
}
for _, tcase := range tests { t.Log("Can override just 'env'")
assert := assert.New(t) overrideEnv := "force-env"
meta, err = getContainerMeta(programname, overrideEnv, "", "")
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": overrideEnv,
"container_app": "app2",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
}, meta)
syslog := map[string]interface{}{} t.Log("Can override just 'app'")
for k, v := range tcase.input { overrideApp := "force-app"
syslog[k] = v meta, err = getContainerMeta(programname, "", overrideApp, "")
} assert.NoError(err)
assert.Equal(map[string]string{
"container_env": "env1",
"container_app": overrideApp,
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
}, meta)
newSyslog, err := addContainterMetaToSyslog(syslog) t.Log("Can override just 'task'")
if tcase.wantErr { overrideTask := "force-task"
assert.Error(err) meta, err = getContainerMeta(programname, "", "", overrideTask)
continue assert.NoError(err)
} assert.Equal(map[string]string{
for k, v := range newSyslog { "container_env": "env1",
switch k { "container_app": "app2",
case "container_env": "container_task": overrideTask,
assert.Equal(v, tcase.wantContainerEnv) }, meta)
case "container_app":
assert.Equal(v, tcase.wantContainerApp)
case "container_task":
assert.Equal(v, tcase.wantContainerTask)
default:
assert.Equal(v, tcase.input[k])
}
}
}
t.Log("Can override all fields")
programname = `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`
meta, err = getContainerMeta(programname, overrideEnv, overrideApp, overrideTask)
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": overrideEnv,
"container_app": overrideApp,
"container_task": overrideTask,
}, meta)
} }
func TestExtractKVMeta(t *testing.T) { func TestExtractKVMeta(t *testing.T) {
@ -666,7 +523,6 @@ func TestExtractKVMeta(t *testing.T) {
Team string Team string
Language string Language string
Version string Version string
Names []string
ExpectedMetricsRoutes []MetricsRoute ExpectedMetricsRoutes []MetricsRoute
ExpectedAnalyticsRoutes []AnalyticsRoute ExpectedAnalyticsRoutes []AnalyticsRoute
ExpectedNotificationRoutes []NotificationRoute ExpectedNotificationRoutes []NotificationRoute
@ -674,12 +530,10 @@ func TestExtractKVMeta(t *testing.T) {
}{ }{
{ {
Description: "log line with no routes", Description: "log line with no routes",
Names: []string{},
Log: map[string]interface{}{"hi": "hello!"}, Log: map[string]interface{}{"hi": "hello!"},
}, },
{ {
Description: "empty _kvmeta", Description: "empty _kvmeta",
Names: []string{},
Log: map[string]interface{}{ Log: map[string]interface{}{
"hi": "hello!", "hi": "hello!",
"_kvmeta": map[string]interface{}{}, "_kvmeta": map[string]interface{}{},
@ -690,7 +544,6 @@ func TestExtractKVMeta(t *testing.T) {
Team: "green", Team: "green",
Version: "three", Version: "three",
Language: "tree", Language: "tree",
Names: []string{},
Log: map[string]interface{}{ Log: map[string]interface{}{
"hi": "hello!", "hi": "hello!",
"_kvmeta": map[string]interface{}{ "_kvmeta": map[string]interface{}{
@ -705,7 +558,6 @@ func TestExtractKVMeta(t *testing.T) {
Team: "green", Team: "green",
Version: "three", Version: "three",
Language: "tree", Language: "tree",
Names: []string{"cool", "cool2"},
ExpectedMetricsRoutes: []MetricsRoute{ ExpectedMetricsRoutes: []MetricsRoute{
{ {
Series: "1,1,2,3,5,8,13", Series: "1,1,2,3,5,8,13",
@ -749,7 +601,6 @@ func TestExtractKVMeta(t *testing.T) {
Team: "green", Team: "green",
Version: "christmas", Version: "christmas",
Language: "tree", Language: "tree",
Names: []string{"what's-this?", "there's-app-invites-everywhere"},
ExpectedAnalyticsRoutes: []AnalyticsRoute{ ExpectedAnalyticsRoutes: []AnalyticsRoute{
{ {
Series: "what-is-this", Series: "what-is-this",
@ -786,7 +637,6 @@ func TestExtractKVMeta(t *testing.T) {
Team: "slack", Team: "slack",
Version: "evergreen", Version: "evergreen",
Language: "markdown-ish", Language: "markdown-ish",
Names: []string{"did-you-know", "what's-the-catch"},
ExpectedNotificationRoutes: []NotificationRoute{ ExpectedNotificationRoutes: []NotificationRoute{
{ {
RuleName: "did-you-know", RuleName: "did-you-know",
@ -833,13 +683,12 @@ func TestExtractKVMeta(t *testing.T) {
Team: "a-team", Team: "a-team",
Version: "old", Version: "old",
Language: "jive", Language: "jive",
Names: []string{"last-call", "watch-out-now"},
ExpectedAlertRoutes: []AlertRoute{ ExpectedAlertRoutes: []AlertRoute{
{ {
RuleName: "last-call", RuleName: "last-call",
Series: "doing-it-til-we-fall", Series: "doing-it-til-we-fall",
Dimensions: []string{"who", "where"}, Dimensions: []string{"who", "where"},
StatType: "gauge", StatType: "guage",
ValueField: "status", ValueField: "status",
}, },
{ {
@ -862,7 +711,7 @@ func TestExtractKVMeta(t *testing.T) {
"rule": "last-call", "rule": "last-call",
"series": "doing-it-til-we-fall", "series": "doing-it-til-we-fall",
"dimensions": []interface{}{"who", "where"}, "dimensions": []interface{}{"who", "where"},
"stat_type": "gauge", "stat_type": "guage",
"value_field": "status", "value_field": "status",
}, },
map[string]interface{}{ map[string]interface{}{
@ -880,9 +729,6 @@ func TestExtractKVMeta(t *testing.T) {
Team: "diversity", Team: "diversity",
Version: "kv-routes", Version: "kv-routes",
Language: "understanding", Language: "understanding",
Names: []string{
"all-combos", "there's-app-invites-everywhere", "what's-the-catch", "last-call",
},
ExpectedMetricsRoutes: []MetricsRoute{ ExpectedMetricsRoutes: []MetricsRoute{
{ {
RuleName: "all-combos", RuleName: "all-combos",
@ -911,7 +757,7 @@ func TestExtractKVMeta(t *testing.T) {
RuleName: "last-call", RuleName: "last-call",
Series: "doing-it-til-we-fall", Series: "doing-it-til-we-fall",
Dimensions: []string{"who", "where"}, Dimensions: []string{"who", "where"},
StatType: "gauge", StatType: "guage",
ValueField: "status", ValueField: "status",
}, },
}, },
@ -944,7 +790,7 @@ func TestExtractKVMeta(t *testing.T) {
"rule": "last-call", "rule": "last-call",
"series": "doing-it-til-we-fall", "series": "doing-it-til-we-fall",
"dimensions": []interface{}{"who", "where"}, "dimensions": []interface{}{"who", "where"},
"stat_type": "gauge", "stat_type": "guage",
"value_field": "status", "value_field": "status",
}, },
}, },
@ -961,10 +807,6 @@ func TestExtractKVMeta(t *testing.T) {
assert.Equal(test.Language, kvmeta.Language) assert.Equal(test.Language, kvmeta.Language)
assert.Equal(test.Version, kvmeta.Version) assert.Equal(test.Version, kvmeta.Version)
assert.EqualValues(
sort.StringSlice(test.Names), sort.StringSlice(kvmeta.Routes.RuleNames()),
)
assert.Equal(len(test.ExpectedMetricsRoutes), len(kvmeta.Routes.MetricsRoutes())) assert.Equal(len(test.ExpectedMetricsRoutes), len(kvmeta.Routes.MetricsRoutes()))
for idx, route := range kvmeta.Routes.MetricsRoutes() { for idx, route := range kvmeta.Routes.MetricsRoutes() {
expected := test.ExpectedMetricsRoutes[idx] expected := test.ExpectedMetricsRoutes[idx]
@ -1011,7 +853,7 @@ func BenchmarkFieldsFromSyslog(b *testing.B) {
func BenchmarkParseAndEnhance(b *testing.B) { func BenchmarkParseAndEnhance(b *testing.B) {
for n := 0; n < b.N; n++ { for n := 0; n < b.N; n++ {
_, err := ParseAndEnhance(benchmarkLine, "env") _, err := ParseAndEnhance(benchmarkLine, "env", false, false, time.Time{})
if err != nil { if err != nil {
b.FailNow() b.FailNow()
} }

53
glide.lock generated Normal file
View file

@ -0,0 +1,53 @@
hash: 6cc2c955c4d70fea747e0056aa63e8fe6a0f8d8c9baca20cf3a5a34d0c55b1fb
updated: 2017-08-16T20:57:02.3853046Z
imports:
- name: github.com/aws/aws-sdk-go
version: 819b71cf8430e434c1eee7e7e8b0f2b8870be899
subpackages:
- aws
- aws/session
- service/firehose
- name: github.com/Clever/kinesis-to-firehose
version: dca69c87e1662c7b1f55581399544d05fdcb09ab
subpackages:
- writer
- name: github.com/Clever/syslogparser
version: fb28ad3e4340c046323b7beba685a72fd12ecbe8
subpackages:
- rfc3164
- name: github.com/jeromer/syslogparser
version: 0e4ae46ea3f08de351074b643d649d5d00661a3c
- name: github.com/xeipuuv/gojsonpointer
version: e0fe6f68307607d540ed8eac07a342c33fa1b54a
- name: github.com/xeipuuv/gojsonreference
version: e02fc20de94c78484cd5ffb007f8af96be030a45
- name: github.com/xeipuuv/gojsonschema
version: e18f0065e8c148fcf567ac43a3f8f5b66ac0720b
- name: golang.org/x/net
version: a6577fac2d73be281a500b310739095313165611
subpackages:
- context
- name: golang.org/x/time
version: f51c12702a4d776e4c1fa9b0fabab841babae631
subpackages:
- rate
- name: gopkg.in/Clever/kayvee-go.v6
version: 096364e316a52652d3493be702d8105d8d01db84
subpackages:
- logger
- router
- name: gopkg.in/yaml.v2
version: a5b47d31c556af34a302ce5d659e6fea44d90de0
testImports:
- name: github.com/davecgh/go-spew
version: 6d212800a42e8ab5c146b8ace3490ee17e5225f9
subpackages:
- spew
- name: github.com/pmezard/go-difflib
version: d8ed2627bdf02c080bf22230dbb337003b7aba2d
subpackages:
- difflib
- name: github.com/stretchr/testify
version: 69483b4bd14f5845b5a1e55bca19e954e827f1d0
subpackages:
- assert

23
glide.yaml Normal file
View file

@ -0,0 +1,23 @@
package: github.com/Clever/amazon-kinesis-client-go
import:
- package: github.com/Clever/kinesis-to-firehose
subpackages:
- writer
- package: github.com/Clever/syslogparser
version: master
subpackages:
- rfc3164
- package: github.com/aws/aws-sdk-go
subpackages:
- aws
- aws/session
- service/firehose
- package: golang.org/x/time
subpackages:
- rate
- package: gopkg.in/Clever/kayvee-go.v6
version: ^6.0.0
testImport:
- package: github.com/stretchr/testify
subpackages:
- assert

22
go.mod
View file

@ -1,22 +0,0 @@
module github.com/amazon-kinesis-client-go
go 1.13
require (
github.com/Clever/amazon-kinesis-client-go v1.0.0
github.com/Clever/syslogparser v0.0.0-20170816194131-fb28ad3e4340
github.com/a8m/kinesis-producer v0.2.0
github.com/aws/aws-sdk-go v1.35.28 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang/protobuf v1.5.2
github.com/jeromer/syslogparser v0.0.0-20190429161531-5fbaaf06d9e7 // indirect
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d // indirect
github.com/stretchr/testify v1.7.0
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonschema v1.2.1-0.20200424115421-065759f9c3d7 // indirect
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e
gopkg.in/Clever/kayvee-go.v6 v6.24.1
gopkg.in/yaml.v2 v2.3.1-0.20200602174213-b893565b90ca // indirect
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect
launchpad.net/gocheck v0.0.0-20140225173054-000000000087 // indirect
)

63
go.sum
View file

@ -1,63 +0,0 @@
github.com/Clever/amazon-kinesis-client-go v1.0.0 h1:rG+hkHCSe+xMGk3asESg18tiEj8X6To8vj0TRa1pKvQ=
github.com/Clever/amazon-kinesis-client-go v1.0.0/go.mod h1:jLfi8QusUjdsYazmEH0DlSO4gE1WRzVE7cs4c/fOFdI=
github.com/Clever/syslogparser v0.0.0-20170816194131-fb28ad3e4340 h1:wr8lTqPJZQpZkZgllQtrD96SecXkAu5MxzY3yJgsuCg=
github.com/Clever/syslogparser v0.0.0-20170816194131-fb28ad3e4340/go.mod h1:e7Yy7RTiIMU9pZ+dcSviX3cpod8e0CEeTUPTgBbKlRE=
github.com/a8m/kinesis-producer v0.2.0 h1:Bd5Oi4dczbTLPIZwVbm02464LIFgBqmViFj//b098xc=
github.com/a8m/kinesis-producer v0.2.0/go.mod h1:CxoFe0Y49udKMnQPkC5S5VmZZy6a+Bef9otuoH96Pv0=
github.com/aws/aws-sdk-go v1.35.28 h1:S2LuRnfC8X05zgZLC8gy/Sb82TGv2Cpytzbzz7tkeHc=
github.com/aws/aws-sdk-go v1.35.28/go.mod h1:tlPOdRjfxPBpNIwqDj61rmsnA85v9jc0Ps9+muhnW+k=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/jeromer/syslogparser v0.0.0-20190429161531-5fbaaf06d9e7 h1:FRDp/rRU2sThfssdHIUATCKJOms8R6A9rrhdC1sLqFI=
github.com/jeromer/syslogparser v0.0.0-20190429161531-5fbaaf06d9e7/go.mod h1:mQyv/QAgjs9+PTi/iXveno+U86nKGsltjqf3ilYx4Bg=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d h1:ix3WmphUvN0GDd0DO9MH0v6/5xTv+Xm1bPN+1UJn58k=
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.1-0.20200424115421-065759f9c3d7 h1:tdnG+ZILOafvA29+pYKP3gauEbigHll3PHsf1GQa2ms=
github.com/xeipuuv/gojsonschema v1.2.1-0.20200424115421-065759f9c3d7/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e h1:EHBhcS0mlXEAVwNyO2dLfjToGsyY4j24pTs2ScHnX7s=
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/Clever/kayvee-go.v6 v6.24.1 h1:lHxjlTFj57mfrq06PiCKIAJ0QuHHEqsyvSYvzb+gSFg=
gopkg.in/Clever/kayvee-go.v6 v6.24.1/go.mod h1:G0m6nBZj7Kdz+w2hiIaawmhXl5zp7E/K0ashol3Kb2A=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.1-0.20200602174213-b893565b90ca h1:oivFrl3Vo+KfpUmTDJvz91I+BWzDPOQ+0CNR5jwTHcg=
gopkg.in/yaml.v2 v2.3.1-0.20200602174213-b893565b90ca/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54=
launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM=

View file

@ -1,55 +1,54 @@
# This is the default Clever Golang Makefile. # This is the default Clever Golang Makefile.
# It is stored in the dev-handbook repo, github.com/Clever/dev-handbook # It is stored in the dev-handbook repo, github.com/Clever/dev-handbook
# Please do not alter this file directly. # Please do not alter this file directly.
GOLANG_MK_VERSION := 1.0.0 GOLANG_MK_VERSION := 0.1.4
SHELL := /bin/bash SHELL := /bin/bash
SYSTEM := $(shell uname -a | cut -d" " -f1 | tr '[:upper:]' '[:lower:]') .PHONY: golang-godep-vendor golang-test-deps $(GODEP)
.PHONY: golang-test-deps golang-ensure-curl-installed
# set timezone to UTC for golang to match circle and deploys
export TZ=UTC
# go build flags for use across all commands which accept them
GO_BUILD_FLAGS := "-mod=vendor"
# if the gopath includes several directories, use only the first # if the gopath includes several directories, use only the first
GOPATH=$(shell echo $$GOPATH | cut -d: -f1) GOPATH=$(shell echo $$GOPATH | cut -d: -f1)
# This block checks and confirms that the proper Go toolchain version is installed. # This block checks and confirms that the proper Go toolchain version is installed.
# It uses ^ matching in the semver sense -- you can be ahead by a minor
# version, but not a major version (patch is ignored).
# arg1: golang version # arg1: golang version
define golang-version-check define golang-version-check
_ := $(if \ GOVERSION := $(shell go version | grep $(1))
$(shell \ _ := $(if \
expr >/dev/null \ $(shell go version | grep $(1)), \
`go version | cut -d" " -f3 | cut -c3- | cut -d. -f2 | sed -E 's/beta[0-9]+//'` \ @echo "", \
\>= `echo $(1) | cut -d. -f2` \ $(error "must be running Go version $(1)"))
\& \
`go version | cut -d" " -f3 | cut -c3- | cut -d. -f1` \
= `echo $(1) | cut -d. -f1` \
&& echo 1), \
@echo "", \
$(error must be running Go version ^$(1) - you are running $(shell go version | cut -d" " -f3 | cut -c3-)))
endef endef
export GO15VENDOREXPERIMENT=1
# FGT is a utility that exits with 1 whenever any stderr/stdout output is recieved. # FGT is a utility that exits with 1 whenever any stderr/stdout output is recieved.
# We pin its version since its a simple tool that does its job as-is;
# so we're defended against it breaking or changing in the future.
FGT := $(GOPATH)/bin/fgt FGT := $(GOPATH)/bin/fgt
$(FGT): $(FGT):
go get github.com/GeertJohan/fgt@262f7b11eec07dc7b147c44641236f3212fee89d go get github.com/GeertJohan/fgt
golang-ensure-curl-installed: # Godep is a tool used to manage Golang dependencies in the style of the Go 1.5
@command -v curl >/dev/null 2>&1 || { echo >&2 "curl not installed. Please install curl."; exit 1; } # vendoring experiment.
GODEP := $(GOPATH)/bin/godep
$(GODEP):
go get -u github.com/tools/godep
# Golint is a tool for linting Golang code for common errors. # Golint is a tool for linting Golang code for common errors.
# We pin its version because an update could add a new lint check which would make
# previously passing tests start failing without changing our code.
GOLINT := $(GOPATH)/bin/golint GOLINT := $(GOPATH)/bin/golint
$(GOLINT): $(GOLINT):
go get golang.org/x/lint/golint@738671d3881b9731cc63024d5d88cf28db875626 go get github.com/golang/lint/golint
# golang-vendor-deps installs all dependencies needed for different test cases.
golang-godep-vendor-deps: $(GODEP)
# golang-godep-vendor is a target for saving dependencies with the godep tool
# to the vendor/ directory. All nested vendor/ directories are deleted as they
# are not handled well by the Go toolchain.
# arg1: pkg path
define golang-godep-vendor
$(GODEP) save $(1)
@# remove any nested vendor directories
find vendor/ -path '*/vendor' -type d | xargs -IX rm -r X
endef
# golang-fmt-deps requires the FGT tool for checking output # golang-fmt-deps requires the FGT tool for checking output
golang-fmt-deps: $(FGT) golang-fmt-deps: $(FGT)
@ -58,7 +57,7 @@ golang-fmt-deps: $(FGT)
# arg1: pkg path # arg1: pkg path
define golang-fmt define golang-fmt
@echo "FORMATTING $(1)..." @echo "FORMATTING $(1)..."
@PKG_PATH=$$(go list -f '{{.Dir}}' $(1)); $(FGT) gofmt -l=true $${PKG_PATH}/*.go @$(FGT) gofmt -l=true $(GOPATH)/src/$(1)/*.go
endef endef
# golang-lint-deps requires the golint tool for golang linting. # golang-lint-deps requires the golint tool for golang linting.
@ -68,7 +67,7 @@ golang-lint-deps: $(GOLINT)
# arg1: pkg path # arg1: pkg path
define golang-lint define golang-lint
@echo "LINTING $(1)..." @echo "LINTING $(1)..."
@PKG_PATH=$$(go list -f '{{.Dir}}' $(1)); find $${PKG_PATH}/*.go -type f | grep -v gen_ | xargs $(GOLINT) @find $(GOPATH)/src/$(1)/*.go -type f | grep -v gen_ | xargs $(GOLINT)
endef endef
# golang-lint-deps-strict requires the golint tool for golang linting. # golang-lint-deps-strict requires the golint tool for golang linting.
@ -79,7 +78,7 @@ golang-lint-deps-strict: $(GOLINT) $(FGT)
# arg1: pkg path # arg1: pkg path
define golang-lint-strict define golang-lint-strict
@echo "LINTING $(1)..." @echo "LINTING $(1)..."
@PKG_PATH=$$(go list -f '{{.Dir}}' $(1)); find $${PKG_PATH}/*.go -type f | grep -v gen_ | xargs $(FGT) $(GOLINT) @find $(GOPATH)/src/$(1)/*.go -type f | grep -v gen_ | xargs $(FGT) $(GOLINT)
endef endef
# golang-test-deps is here for consistency # golang-test-deps is here for consistency
@ -89,7 +88,7 @@ golang-test-deps:
# arg1: pkg path # arg1: pkg path
define golang-test define golang-test
@echo "TESTING $(1)..." @echo "TESTING $(1)..."
@go test $(GO_BUILD_FLAGS) -v $(1) @go test -v $(1)
endef endef
# golang-test-strict-deps is here for consistency # golang-test-strict-deps is here for consistency
@ -99,7 +98,7 @@ golang-test-strict-deps:
# arg1: pkg path # arg1: pkg path
define golang-test-strict define golang-test-strict
@echo "TESTING $(1)..." @echo "TESTING $(1)..."
@go test -v $(GO_BUILD_FLAGS) -race $(1) @go test -v -race $(1)
endef endef
# golang-vet-deps is here for consistency # golang-vet-deps is here for consistency
@ -109,7 +108,7 @@ golang-vet-deps:
# arg1: pkg path # arg1: pkg path
define golang-vet define golang-vet
@echo "VETTING $(1)..." @echo "VETTING $(1)..."
@go vet $(GO_BUILD_FLAGS) $(1) @go vet $(GOPATH)/src/$(1)/*.go
endef endef
# golang-test-all-deps installs all dependencies needed for different test cases. # golang-test-all-deps installs all dependencies needed for different test cases.
@ -137,20 +136,7 @@ $(call golang-vet,$(1))
$(call golang-test-strict,$(1)) $(call golang-test-strict,$(1))
endef endef
# golang-build: builds a golang binary. ensures CGO build is done during CI. This is needed to make a binary that works with a Docker alpine image.
# arg1: pkg path
# arg2: executable name
define golang-build
@echo "BUILDING..."
@if [ -z "$$CI" ]; then \
go build $(GO_BUILD_FLAGS) -o bin/$(2) $(1); \
else \
echo "-> Building CGO binary"; \
CGO_ENABLED=0 go build $(GO_BUILD_FLAGS) -installsuffix cgo -o bin/$(2) $(1); \
fi;
endef
# golang-update-makefile downloads latest version of golang.mk # golang-update-makefile downloads latest version of golang.mk
golang-update-makefile: golang-update-makefile:
@wget https://raw.githubusercontent.com/Clever/dev-handbook/master/make/golang-v1.mk -O /tmp/golang.mk 2>/dev/null @wget https://raw.githubusercontent.com/Clever/dev-handbook/master/make/golang.mk -O /tmp/golang.mk 2>/dev/null
@if ! grep -q $(GOLANG_MK_VERSION) /tmp/golang.mk; then cp /tmp/golang.mk golang.mk && echo "golang.mk updated"; else echo "golang.mk is up-to-date"; fi @if ! grep -q $(GOLANG_MK_VERSION) /tmp/golang.mk; then cp /tmp/golang.mk golang.mk && echo "golang.mk updated"; else echo "golang.mk is up-to-date"; fi

View file

@ -51,71 +51,93 @@ func (i ioHandler) readLine() (string, error) {
return line, nil return line, nil
} }
// ActionName is the "action" string contained in the JSON sent from the KCL.
type ActionName string
const (
ActionNameInitialize ActionName = "initialize"
ActionNameProcessRecords = "processRecords"
ActionNameShutdownRequested = "shutdownRequested"
ActionNameShutdown = "shutdown"
ActionNameCheckpoint = "checkpoint"
)
type ActionInitialize struct { type ActionInitialize struct {
Action string `json:"action"`
ShardID string `json:"shardId"` ShardID string `json:"shardId"`
SequenceNumber string `json:"sequenceNumber"` SequenceNumber string `json:"sequenceNumber"`
SubSequenceNumber int `json:"subSequenceNumber"` SubSequenceNumber int `json:"subSequenceNumber"`
} }
func (a ActionInitialize) Name() ActionName {
return ActionNameInitialize
}
type Record struct { type Record struct {
SequenceNumber string `json:"sequenceNumber"` SequenceNumber string `json:"sequenceNumber"`
SubSequenceNumber int `json:"subSequenceNumber"` SubSequenceNumber int `json:"subSequenceNumber"`
PartitionKey string `json:"partitionKey"` ApproximateArrivalTimestamp int `json:"approximateArrivalTimestamp"`
Data string `json:"data"` PartitionKey string `json:"partitionKey"`
// KCL v2 has a different format, causing failure when json.Unmarshal. Data string `json:"data"`
// This is not used anywhere currently, so commenting it out.
//ApproximateArrivalTimestamp int `json:"approximateArrivalTimestamp"`
} }
type ActionProcessRecords struct { type ActionProcessRecords struct {
Action string `json:"action"`
Records []Record `json:"records"` Records []Record `json:"records"`
MillisBehindLatest int `json:"millisBehindLatest"` MillisBehindLatest int `json:"millisBehindLatest"`
} }
func (a ActionProcessRecords) Name() ActionName {
return ActionNameProcessRecords
}
type ActionShutdown struct { type ActionShutdown struct {
Action string `json:"action"`
Reason string `json:"reason"` Reason string `json:"reason"`
} }
func (a ActionShutdown) Name() ActionName {
return ActionNameShutdown
}
type ActionCheckpoint struct { type ActionCheckpoint struct {
Action string `json:"action"` Action ActionName `json:"action"`
SequenceNumber *string `json:"sequenceNumber,omitempty"` SequenceNumber *string `json:"sequenceNumber,omitempty"`
SubSequenceNumber *int `json:"subSequenceNumber,omitempty"` SubSequenceNumber *int `json:"subSequenceNumber,omitempty"`
Error *string `json:"error,omitempty"` Error *string `json:"error,omitempty"`
}
func (a ActionCheckpoint) Name() ActionName {
return ActionNameCheckpoint
} }
func (i ioHandler) loadAction(line string) (interface{}, error) { func (i ioHandler) loadAction(line string) (interface{}, error) {
lineBytes := []byte(line) lineBytes := []byte(line)
var message struct { var message struct {
Action string `json:"action"` Action ActionName `json:"action"`
} }
if err := json.Unmarshal(lineBytes, &message); err != nil { if err := json.Unmarshal(lineBytes, &message); err != nil {
return nil, err return nil, err
} }
switch message.Action { switch message.Action {
case "initialize": case ActionNameInitialize:
var actionInitialize ActionInitialize var actionInitialize ActionInitialize
if err := json.Unmarshal(lineBytes, &actionInitialize); err != nil { if err := json.Unmarshal(lineBytes, &actionInitialize); err != nil {
return nil, err return nil, err
} }
return actionInitialize, nil return actionInitialize, nil
case "processRecords": case ActionNameProcessRecords:
var actionProcessRecords ActionProcessRecords var actionProcessRecords ActionProcessRecords
if err := json.Unmarshal(lineBytes, &actionProcessRecords); err != nil { if err := json.Unmarshal(lineBytes, &actionProcessRecords); err != nil {
return nil, err return nil, err
} }
return actionProcessRecords, nil return actionProcessRecords, nil
case "shutdownRequested": case ActionNameShutdownRequested:
fallthrough fallthrough
case "shutdown": case ActionNameShutdown:
var actionShutdown ActionShutdown var actionShutdown ActionShutdown
if err := json.Unmarshal(lineBytes, &actionShutdown); err != nil { if err := json.Unmarshal(lineBytes, &actionShutdown); err != nil {
return nil, err return nil, err
} }
return actionShutdown, nil return actionShutdown, nil
case "checkpoint": case ActionNameCheckpoint:
var actionCheckpoint ActionCheckpoint var actionCheckpoint ActionCheckpoint
if err := json.Unmarshal(lineBytes, &actionCheckpoint); err != nil { if err := json.Unmarshal(lineBytes, &actionCheckpoint); err != nil {
return nil, err return nil, err
@ -210,41 +232,41 @@ func (kclp *KCLProcess) handleCheckpointAction(action ActionCheckpoint) error {
func (kclp *KCLProcess) sendCheckpoint(seq *string, subSeq *int) error { func (kclp *KCLProcess) sendCheckpoint(seq *string, subSeq *int) error {
return kclp.ioHandler.writeAction(ActionCheckpoint{ return kclp.ioHandler.writeAction(ActionCheckpoint{
Action: "checkpoint", Action: ActionNameCheckpoint,
SequenceNumber: seq, SequenceNumber: seq,
SubSequenceNumber: subSeq, SubSequenceNumber: subSeq,
}) })
} }
func (kclp *KCLProcess) reportDone(responseFor string) error { func (kclp *KCLProcess) reportDone(responseFor ActionName) error {
return kclp.ioHandler.writeAction(struct { return kclp.ioHandler.writeAction(struct {
Action string `json:"action"` Action string `json:"action"`
ResponseFor string `json:"responseFor"` ResponseFor ActionName `json:"responseFor"`
}{ }{
Action: "status", Action: "status",
ResponseFor: responseFor, ResponseFor: responseFor,
}) })
} }
func (kclp *KCLProcess) handleLine(line string) (string, error) { type Action interface {
Name() ActionName
}
// handleLine processes a line of text sent to the process by the KCL. It returns the action handled, if any.
func (kclp *KCLProcess) handleLine(line string) (Action, error) {
action, err := kclp.ioHandler.loadAction(line) action, err := kclp.ioHandler.loadAction(line)
if err != nil { if err != nil {
return "", err return nil, err
} }
switch action := action.(type) { switch action := action.(type) {
case ActionCheckpoint: case ActionCheckpoint:
return "checkpoint", kclp.handleCheckpointAction(action) return action, kclp.handleCheckpointAction(action)
case ActionShutdown: case ActionShutdown:
kclp.ioHandler.writeError("Received shutdown action...") kclp.ioHandler.writeError("Received shutdown action...")
reason := action.Reason
// Shutdown should block until it's safe to shutdown the process // Shutdown should block until it's safe to shutdown the process
if action.Action == "shutdownRequested" { err = kclp.recordProcessor.Shutdown(action.Reason)
reason = "SHUTDOWN_REQUESTED"
}
err = kclp.recordProcessor.Shutdown(reason)
if err != nil { // Log error and continue shutting down if err != nil { // Log error and continue shutting down
kclp.ioHandler.writeError(fmt.Sprintf("ERR shutdown: %+#v", err)) kclp.ioHandler.writeError(fmt.Sprintf("ERR shutdown: %+#v", err))
} }
@ -258,23 +280,23 @@ func (kclp *KCLProcess) handleLine(line string) (string, error) {
} }
} }
return "shutdown", kclp.reportDone("shutdown") return action, kclp.reportDone(action.Name())
case ActionInitialize: case ActionInitialize:
err := kclp.recordProcessor.Initialize(action.ShardID, kclp) err := kclp.recordProcessor.Initialize(action.ShardID, kclp)
if err != nil { if err != nil {
return "", err return nil, err
} }
return "initialize", kclp.reportDone(action.Action) return action, kclp.reportDone(action.Name())
case ActionProcessRecords: case ActionProcessRecords:
err := kclp.recordProcessor.ProcessRecords(action.Records) err := kclp.recordProcessor.ProcessRecords(action.Records)
if err != nil { if err != nil {
return "", err return nil, err
} }
return "process-record", kclp.reportDone(action.Action) return action, kclp.reportDone(action.Name())
default: default:
return "", fmt.Errorf("unknown action to dispatch: %+#v", action) return nil, fmt.Errorf("unknown action to dispatch: %+#v", action)
} }
} }
@ -295,7 +317,7 @@ func (kclp *KCLProcess) Run() {
action, err := kclp.handleLine(line) action, err := kclp.handleLine(line)
if err != nil { if err != nil {
kclp.ioHandler.writeError(fmt.Sprintf("ERR Handle line: %+#v", err)) kclp.ioHandler.writeError(fmt.Sprintf("ERR Handle line: %+#v", err))
} else if action == "shutdown" { } else if action != nil && action.Name() == ActionNameShutdown {
return return
} }

View file

@ -1,186 +1,20 @@
// Package splitter provides functions for decoding various kinds of records that might come off of a kinesis stream.
// It is equipped to with the functions to unbundle KPL aggregates and CloudWatch log bundles,
// as well as apply appropriate decompression.
// KCL applications would be most interested in `SplitMessageIfNecessary` which can handle zlibbed records as well as
// CloudWatch bundles. KCL automatically unbundles KPL aggregates before passing the records to the consumer.
// Non-KCL applications (such as Lambdas consuming KPL-produced aggregates) should either use
// - KPLDeaggregate if the consumer purely wants to unbundle KPL aggregates, but will handle the raw records themselves.
// - Deaggregate if the consumer wants to apply the same decompress and split logic as SplitMessageIfNecessary
// in addition to the KPL splitting.
package splitter package splitter
import ( import (
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"compress/zlib"
"crypto/md5"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"regexp" "regexp"
"strconv"
"time" "time"
kpl "github.com/a8m/kinesis-producer"
"github.com/golang/protobuf/proto"
) )
// The Amazon Kinesis Producer Library (KPL) aggregates multiple logical user records into a single
// Amazon Kinesis record for efficient puts.
// https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md
var kplMagicNumber = []byte{0xF3, 0x89, 0x9A, 0xC2}
// IsKPLAggregate checks a record for a KPL aggregate prefix.
// It is not necessary to call this before calling KPLDeaggregate.
func IsKPLAggregate(data []byte) bool {
return bytes.HasPrefix(data, kplMagicNumber)
}
// KPLDeaggregate takes a Kinesis record and converts it to one or more user records by applying KPL deaggregation.
// If the record begins with the 4-byte magic prefix that KPL uses, the single Kinesis record is split into its component user records.
// Otherwise, the return value is a singleton containing the original record.
func KPLDeaggregate(kinesisRecord []byte) ([][]byte, error) {
if !IsKPLAggregate(kinesisRecord) {
return [][]byte{kinesisRecord}, nil
}
src := kinesisRecord[len(kplMagicNumber) : len(kinesisRecord)-md5.Size]
checksum := kinesisRecord[len(kinesisRecord)-md5.Size:]
recordSum := md5.Sum(src)
for i, b := range checksum {
if b != recordSum[i] {
// either the data is corrupted or this is not a KPL aggregate
// either way, return the data as-is
return [][]byte{kinesisRecord}, nil
}
}
dest := new(kpl.AggregatedRecord)
err := proto.Unmarshal(src, dest)
if err != nil {
return nil, fmt.Errorf("unmarshalling proto: %v", err)
}
var records [][]byte
for _, userRecord := range dest.GetRecords() {
records = append(records, userRecord.Data)
}
return records, nil
}
// Deaggregate is a combination of KPLDeaggregate and SplitMessageIfNecessary
// First it tries to KPL-deaggregate. If unsuccessful, it calls SplitIfNecessary on the original record.
// If successful, it iterates over the individual user records and attempts to unzlib them.
// If a record inside an aggregate is in zlib format, the output will contain the unzlibbed version.
// If it is not zlibbed, the output will contain the record verbatim
// A similar result can be optained by calling KPLDeaggregate, then iterating over the results and callin SplitMessageIfNecessary.
// This function makes the assumption that after KPL-deaggregating, the results are not CloudWatch aggregates, so it doesn't need to check them for a gzip header.
// Also it lets us iterate over the user records one less time, since KPLDeaggregate loops over the records and we would need to loop again to unzlib.
//
// See the SplitMessageIfNecessary documentation for the format of output for CloudWatch log bundles.
func Deaggregate(kinesisRecord []byte) ([][]byte, error) {
if !IsKPLAggregate(kinesisRecord) {
return SplitMessageIfNecessary(kinesisRecord)
}
src := kinesisRecord[len(kplMagicNumber) : len(kinesisRecord)-md5.Size]
checksum := kinesisRecord[len(kinesisRecord)-md5.Size:]
recordSum := md5.Sum(src)
for i, b := range checksum {
if b != recordSum[i] {
// either the data is corrupted or this is not a KPL aggregate
// either way, return the data as-is
return [][]byte{kinesisRecord}, nil
}
}
dest := new(kpl.AggregatedRecord)
err := proto.Unmarshal(src, dest)
if err != nil {
return nil, fmt.Errorf("unmarshalling proto: %v", err)
}
var records [][]byte
for _, userRecord := range dest.GetRecords() {
record, err := unzlib(userRecord.Data)
if err != nil {
return nil, fmt.Errorf("unzlibbing record: %w", err)
}
records = append(records, record)
}
return records, nil
}
// SplitMessageIfNecessary recieves a user-record and returns a slice of one or more records.
// if the record is coming off of a kinesis stream and might be KPL aggregated, it needs to be deaggregated before calling this.
// This function handles three types of records:
// - records emitted from CWLogs Subscription (which are gzip compressed)
// - zlib compressed records (e.g. as compressed and emitted by Kinesis plugin for Fluent Bit
// - any other record (left unchanged)
//
// CloudWatch logs come as structured JSON. In the process of splitting, they are converted
// into an rsyslog format that allows fairly uniform parsing of the result across the
// AWS services that might emit logs to CloudWatch.
// Note that these timezone used in these syslog records is guessed based on the local env.
// If you need consistent timezones, set TZ=UTC in your environment.
func SplitMessageIfNecessary(userRecord []byte) ([][]byte, error) {
// First try the record as a CWLogs record
if IsGzipped(userRecord) {
return GetMessagesFromGzippedInput(userRecord)
}
unzlibRecord, err := unzlib(userRecord)
if err != nil {
return nil, err
}
// Process a single message, from KPL
return [][]byte{unzlibRecord}, nil
}
func unzlib(input []byte) ([]byte, error) {
zlibReader, err := zlib.NewReader(bytes.NewReader(input))
if err == nil {
unzlibRecord, err := ioutil.ReadAll(zlibReader)
if err != nil {
return nil, fmt.Errorf("reading zlib-compressed record: %v", err)
}
return unzlibRecord, nil
}
return input, nil
}
// LogEvent is a single log line within a LogEventBatch // LogEvent is a single log line within a LogEventBatch
type LogEvent struct { type LogEvent struct {
ID string `json:"id"` ID string `json:"id"`
Timestamp UnixTimestampMillis `json:"timestamp"` Timestamp int64 `json:"timestamp"`
Message string `json:"message"` Message string `json:"message"`
}
// UnixTimestampMillis is a time.Time that marshals (unmarshals) to (from) a unix timestamp with millisecond resolution.
type UnixTimestampMillis time.Time
func NewUnixTimestampMillis(ts int64) UnixTimestampMillis {
return UnixTimestampMillis(time.Unix(ts/millisPerSecond,
(ts%millisPerSecond)*nanosPerMillisecond))
}
func (t *UnixTimestampMillis) MarshalJSON() ([]byte, error) {
ts := time.Time(*t).UnixNano()
stamp := fmt.Sprint(ts / nanosPerMillisecond)
return []byte(stamp), nil
}
var millisPerSecond = int64(time.Second / time.Millisecond)
var nanosPerMillisecond = int64(time.Millisecond / time.Nanosecond)
func (t *UnixTimestampMillis) UnmarshalJSON(b []byte) error {
ts, err := strconv.ParseInt(string(b), 10, 64)
if err != nil {
return err
}
*t = NewUnixTimestampMillis(ts)
return nil
}
func (t *UnixTimestampMillis) Time() time.Time {
return time.Time(*t)
} }
// LogEventBatch is a batch of multiple log lines, read from a KinesisStream with a CWLogs subscription // LogEventBatch is a batch of multiple log lines, read from a KinesisStream with a CWLogs subscription
@ -233,201 +67,43 @@ const RFC3339Micro = "2006-01-02T15:04:05.999999-07:00"
// http://docs.aws.amazon.com/batch/latest/userguide/job_states.html // http://docs.aws.amazon.com/batch/latest/userguide/job_states.html
// "log stream name format is jobDefinitionName/default/ecs_task_id (this format may change in the future)." // "log stream name format is jobDefinitionName/default/ecs_task_id (this format may change in the future)."
const awsBatchTaskMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app const taskMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app
`default\/` + `default\/` +
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})` // task-id `([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})` // task-id
var awsBatchTaskRegex = regexp.MustCompile(awsBatchTaskMeta)
// lambda log groups are of the form /aws/lambda/<env>--<app> var taskRegex = regexp.MustCompile(taskMeta)
var awsLambdaLogGroupRegex = regexp.MustCompile(`^/aws/lambda/([a-z0-9-]+)--([a-z0-9-]+)$`)
var awsLambdaRequestIDRegex = regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)
// fargate log groups are of the form /ecs/<env>--<app>
// fargate log streams are of the form fargate/<container name>/<ecs task id>
var awsFargateLogGroupRegex = regexp.MustCompile(`^/ecs/([a-z0-9-]+)--([a-z0-9-]+)$`)
var awsFargateLogStreamRegex = regexp.MustCompile(`^fargate/([a-z0-9-]+)--([a-z0-9-]+)/([a-z0-9]+)$`)
// RDS slowquery log groups are in the form of /aws/rds/cluster/<database name>/slowquery
var awsRDSLogGroupRegex = regexp.MustCompile(`^/aws/rds/cluster/([a-z0-9-]+)/slowquery$`)
// glue log groups are of the form /aws-glue/jobs/<env>/<app>/<pod-id>
// glue log streams are of the form <job id>-<"driver" | "1" | "progress-bar">
var awsGlueLogGroupRegex = regexp.MustCompile(`^/aws-glue/jobs/([a-z0-9-]+)/([a-z0-9-]+)/([a-z0-9-]+)$`)
var awsGlueLogStreamRegex = regexp.MustCompile(`^(jr_[a-z0-9-]+)-.*$`)
// arn and task cruft to satisfy parsing later on: https://github.com/Clever/amazon-kinesis-client-go/blob/94aacdf8339bd2cc8400d3bcb323dc1bce2c8422/decode/decode.go#L421-L425
const arnCruft = `/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F`
const taskCruft = `12345678-1234-1234-1234-555566667777`
type RSysLogMessage struct {
Timestamp time.Time
Hostname string
ProgramName string
Message string
}
func (r RSysLogMessage) String() string {
// Adding an extra Microsecond forces `Format` to include all 6 digits within the micorsecond format.
// Otherwise, time.Format omits trailing zeroes. (https://github.com/golang/go/issues/12472)
return fmt.Sprintf(`%s %s %s: %s`,
r.Timestamp.Add(time.Microsecond).Format(RFC3339Micro),
r.Hostname, r.ProgramName, r.Message)
}
func splitAWSBatch(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsBatchTaskRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
task := matches[0][3]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + task,
Hostname: "aws-batch",
Message: event.Message,
})
}
return out, true
}
func splitAWSLambda(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsLambdaLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
// find the request ID, e.g. 1f7fcc25-015f-11e8-a728-a1b6168ab9aa, set it as task
var task string
if matches := awsLambdaRequestIDRegex.FindAllString(event.Message, 1); len(matches) == 1 {
task = matches[0]
} else {
task = taskCruft // rsyslog message must contain a non-empty task ID to satisfy later parsing
}
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + task,
Hostname: "aws-lambda",
Message: event.Message,
})
}
return out, true
}
func splitAWSFargate(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsFargateLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
streamMatches := awsFargateLogStreamRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(streamMatches) != 1 {
return nil, false
}
ecsTaskID := streamMatches[0][3]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + ecsTaskID,
Hostname: "aws-fargate",
Message: event.Message,
})
}
return out, true
}
func splitAWSRDS(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsRDSLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
databaseName := matches[0][1]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: databaseName,
Hostname: "aws-rds",
Message: event.Message,
})
}
return out, true
}
func splitAWSGlue(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsGlueLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
streamMatches := awsGlueLogStreamRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(streamMatches) != 1 {
return nil, false
}
jobID := streamMatches[0][1]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + jobID,
Hostname: "aws-glue",
Message: event.Message,
})
}
return out, true
}
func splitDefault(b LogEventBatch) []RSysLogMessage {
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
Hostname: b.LogStream,
ProgramName: b.LogGroup + "--" + b.LogStream + arnCruft + taskCruft,
Message: event.Message,
})
}
return out
}
func stringify(rsyslogs []RSysLogMessage) [][]byte {
out := make([][]byte, len(rsyslogs))
for i := range rsyslogs {
out[i] = []byte(rsyslogs[i].String())
}
return out
}
// Split takes a LogEventBatch and separates into a slice of enriched log lines // Split takes a LogEventBatch and separates into a slice of enriched log lines
// Lines are enhanced by adding an Rsyslog prefix, which should be handled correctly by // Lines are enhanced by adding an Rsyslog prefix, which should be handled correctly by
// the subsequent decoding logic. // the subsequent decoding logic.
func Split(b LogEventBatch) [][]byte { func Split(b LogEventBatch) [][]byte {
if rsyslogMsgs, ok := splitAWSLambda(b); ok { env := "unknown"
return stringify(rsyslogMsgs) app := "unknown"
} else if rsyslogMsgs, ok := splitAWSFargate(b); ok { task := "00001111-2222-3333-4444-555566667777"
return stringify(rsyslogMsgs) matches := taskRegex.FindAllStringSubmatch(b.LogStream, 1)
} else if rsyslogMsgs, ok := splitAWSBatch(b); ok { if len(matches) == 1 {
return stringify(rsyslogMsgs) env = matches[0][1]
} else if rsyslogMsgs, ok := splitAWSRDS(b); ok { app = matches[0][2]
return stringify(rsyslogMsgs) task = matches[0][3]
} else if rsyslogMsgs, ok := splitAWSGlue(b); ok {
return stringify(rsyslogMsgs)
} }
return stringify(splitDefault(b))
rsyslogPrefix := `%s %s %s[%d]: %s`
// programName is a mocked ARN in the format expected by our log decoders
programName := env + "--" + app + `/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F` + task
mockPid := 1
hostname := "aws-batch"
out := [][]byte{}
for _, event := range b.LogEvents {
// Adding an extra Microsecond forces `Format` to include all 6 digits within the micorsecond format.
// Otherwise, time.Format omits trailing zeroes. (https://github.com/golang/go/issues/12472)
nsecs := event.Timestamp*int64(time.Millisecond) + int64(time.Microsecond)
logTime := time.Unix(0, nsecs).UTC().Format(RFC3339Micro)
// Fake an RSyslog prefix, expected by consumers
formatted := fmt.Sprintf(rsyslogPrefix, logTime, hostname, programName, mockPid, event.Message)
out = append(out, []byte(formatted))
}
return out
} }

View file

@ -3,29 +3,13 @@ package splitter
import ( import (
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"compress/zlib"
"crypto/md5"
b64 "encoding/base64" b64 "encoding/base64"
"encoding/json" "encoding/json"
"os"
"testing" "testing"
"time"
"github.com/Clever/amazon-kinesis-client-go/decode"
kpl "github.com/a8m/kinesis-producer"
"github.com/golang/protobuf/proto"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestMain(m *testing.M) {
// In the conversion of CloudWatch LogEvent struct to an RSyslog struct to a string,
// the timezone used in the final string depends on the locally set timezone.
// in order for tests to pass, we set TZ to UTC
os.Setenv("TZ", "UTC")
os.Exit(m.Run())
}
func TestUnpacking(t *testing.T) { func TestUnpacking(t *testing.T) {
input := "H4sIAAAAAAAAADWOTQuCQBRF/8ow6wj6ENRdhLXIClJoERKTvsZHOiPzxiLE/96YtTzcy72n4zUQCQnpuwEe8vXxkJ6O8XUfJclqG/EJ1y8FZkgq3RYvYfMy1pJcUGm5NbptXDZSYg2IekRqb5QbbCxqtcHKgiEeXrJvL3qCsgN2HIuxbtFpWFG7sdky8L1ZECwXc9+b/PUGgXPMfnrspxeydQn5A5VkJYjKlkzfWeGWUInhme1QASEx+qpNeZ/1H1PFPn3yAAAA" input := "H4sIAAAAAAAAADWOTQuCQBRF/8ow6wj6ENRdhLXIClJoERKTvsZHOiPzxiLE/96YtTzcy72n4zUQCQnpuwEe8vXxkJ6O8XUfJclqG/EJ1y8FZkgq3RYvYfMy1pJcUGm5NbptXDZSYg2IekRqb5QbbCxqtcHKgiEeXrJvL3qCsgN2HIuxbtFpWFG7sdky8L1ZECwXc9+b/PUGgXPMfnrspxeydQn5A5VkJYjKlkzfWeGWUInhme1QASEx+qpNeZ/1H1PFPn3yAAAA"
@ -44,7 +28,7 @@ func TestUnpacking(t *testing.T) {
LogEvents: []LogEvent{ LogEvents: []LogEvent{
{ {
ID: "", ID: "",
Timestamp: NewUnixTimestampMillis(1498519943285), Timestamp: 1498519943285,
Message: "CWL CONTROL MESSAGE: Checking health of destination Kinesis stream.", Message: "CWL CONTROL MESSAGE: Checking health of destination Kinesis stream.",
}, },
}, },
@ -114,7 +98,7 @@ func TestFullLoop(t *testing.T) {
assert.Equal(t, leb, output) assert.Equal(t, leb, output)
} }
func TestSplitBatch(t *testing.T) { func TestSplit(t *testing.T) {
input := LogEventBatch{ input := LogEventBatch{
MessageType: "DATA_MESSAGE", MessageType: "DATA_MESSAGE",
Owner: "123456789012", Owner: "123456789012",
@ -124,433 +108,20 @@ func TestSplitBatch(t *testing.T) {
LogEvents: []LogEvent{ LogEvents: []LogEvent{
{ {
ID: "99999992379011144044923130086453437181614530551221780480", ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285), Timestamp: 1498519943285,
Message: "some log line", Message: "some log line",
}, },
{ {
ID: "99999992387663833181953011865369295871402094815542181889", ID: "99999992387663833181953011865369295871402094815542181889",
Timestamp: NewUnixTimestampMillis(1498519943285), Timestamp: 1498519943285,
Message: "another log line", Message: "another log line",
}, },
}, },
} }
lines := Split(input) lines := Split(input)
expected := [][]byte{ expected := [][]byte{
[]byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: some log line"), []byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777[1]: some log line"),
[]byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: another log line"), []byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777[1]: another log line"),
} }
assert.Equal(t, expected, lines) assert.Equal(t, expected, lines)
} }
func TestSplitLambda(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/aws/lambda/env--app",
LogStream: "2018/01/24/[3]62695bfa96de46938f56b156f5235205",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "START RequestId: 8edbd53f-64c7-4a3c-bf1e-efeff40f6512 Version: 3",
},
{
ID: "99999992387663833181953011865369295871402094815542181889",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: `{"aws_request_id":"8edbd53f-64c7-4a3c-bf1e-efeff40f6512","level":"info","source":"app","title":"some-log-title"}`,
},
{
ID: "99999992387663833181953011865369295871402094815542181889",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: `Example message that doesn't contain a request ID`,
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-lambda env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F8edbd53f-64c7-4a3c-bf1e-efeff40f6512: START RequestId: 8edbd53f-64c7-4a3c-bf1e-efeff40f6512 Version: 3`),
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-lambda env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F8edbd53f-64c7-4a3c-bf1e-efeff40f6512: {"aws_request_id":"8edbd53f-64c7-4a3c-bf1e-efeff40f6512","level":"info","source":"app","title":"some-log-title"}`),
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-lambda env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: Example message that doesn't contain a request ID`),
}
assert.Equal(t, expected, lines)
for i, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-lambda", enhanced["hostname"])
assert.Equal(t, "env", enhanced["container_env"])
assert.Equal(t, "app", enhanced["container_app"])
if i != len(expected)-1 /* last line doesn't have a request ID */ {
assert.Equal(t, "8edbd53f-64c7-4a3c-bf1e-efeff40f6512", enhanced["container_task"])
}
}
}
func TestSplitFargate(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/ecs/production--clever-com-router",
LogStream: "fargate/clever-dev--clever-com-router/27b22d5d68aa4bd3923c95e7f32a3852",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "Starting haproxy: haproxy.",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-fargate production--clever-com-router/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F27b22d5d68aa4bd3923c95e7f32a3852: Starting haproxy: haproxy.`),
}
assert.Equal(t, expected, lines)
for _, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-fargate", enhanced["hostname"])
assert.Equal(t, "production", enhanced["container_env"])
assert.Equal(t, "clever-com-router", enhanced["container_app"])
assert.Equal(t, "27b22d5d68aa4bd3923c95e7f32a3852", enhanced["container_task"])
}
}
func TestSplitDefault(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "vpn_flow_logs",
LogStream: "eni-43403819-all",
SubscriptionFilters: []string{"SomeSubscription"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "2 589690932525 eni-43403819 10.0.0.233 172.217.6.46 64067 443 17 8 3969 1516891809 1516891868 ACCEPT OK",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 eni-43403819-all vpn_flow_logs--eni-43403819-all/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: 2 589690932525 eni-43403819 10.0.0.233 172.217.6.46 64067 443 17 8 3969 1516891809 1516891868 ACCEPT OK`),
}
assert.Equal(t, expected, lines)
}
func TestSplitRDS(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/aws/rds/cluster/production-aurora-test-db/slowquery",
LogStream: "clever-dev-aurora-test-db",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "Slow query: select * from table.",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-rds production-aurora-test-db: Slow query: select * from table.`),
}
assert.Equal(t, expected, lines)
for _, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-rds", enhanced["hostname"])
assert.Equal(t, "production-aurora-test-db", enhanced["programname"])
assert.Equal(t, "Slow query: select * from table.", enhanced["rawlog"])
}
}
func TestSplitGlue(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/aws-glue/jobs/clever-dev/analytics-district-participation/aae75f00",
LogStream: "jr_8927660fecacbe026ccab656cb80befea8102ac2023df531b92889b112aada28-1",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "foo bar.",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-glue clever-dev--analytics-district-participation/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2Fjr_8927660fecacbe026ccab656cb80befea8102ac2023df531b92889b112aada28: foo bar.`),
}
assert.Equal(t, expected, lines)
for _, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-glue", enhanced["hostname"])
assert.Equal(t, "clever-dev", enhanced["container_env"])
assert.Equal(t, "analytics-district-participation", enhanced["container_app"])
assert.Equal(t, "jr_8927660fecacbe026ccab656cb80befea8102ac2023df531b92889b112aada28", enhanced["container_task"])
}
}
func TestSplitIfNecesary(t *testing.T) {
// We provide three different inputs to batchedWriter.splitMessageIfNecessary
// plain text
// zlib compressed text
// gzip compressed CloudWatch logs batch
// we verify that the split function matches the input against the correct splitter
// and decodes it.
assert := assert.New(t)
plainTextInput := []byte("hello, world!")
records, err := SplitMessageIfNecessary(plainTextInput)
assert.NoError(err)
assert.Equal(
records,
[][]byte{[]byte("hello, world!")},
)
var z bytes.Buffer
zbuf := zlib.NewWriter(&z)
zbuf.Write([]byte("hello, world!"))
zbuf.Close()
zlibSingleInput := z.Bytes()
records, err = SplitMessageIfNecessary(zlibSingleInput)
assert.NoError(err)
assert.Equal(
records,
[][]byte{[]byte("hello, world!")},
)
// the details of this part aren't super important since the actual functionality is
// tested in other tests; for this test we just want to make sure that split function
// correctly realizes it's gzip and call the appropriate CW-log-splitting logic
var g bytes.Buffer
gbuf := gzip.NewWriter(&g)
cwLogBatch := LogEventBatch{
MessageType: "test",
Owner: "test",
LogGroup: "test",
LogStream: "test",
SubscriptionFilters: []string{""},
LogEvents: []LogEvent{{
ID: "test",
Timestamp: UnixTimestampMillis(time.Date(2020, time.September, 9, 9, 10, 10, 0, time.UTC)),
Message: "test",
}},
}
cwLogBatchJSON, _ := json.Marshal(cwLogBatch)
gbuf.Write(cwLogBatchJSON)
gbuf.Close()
gzipBatchInput := g.Bytes()
expectedRecord := []byte("2020-09-09T09:10:10.000001+00:00 test test--test/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: test")
records, err = SplitMessageIfNecessary(gzipBatchInput)
assert.NoError(err)
assert.Equal(
records,
[][]byte{expectedRecord},
)
}
func createKPLAggregate(input [][]byte, compress bool) []byte {
var partitionKeyIndex uint64 = 0
records := []*kpl.Record{}
for _, log := range input {
if compress {
var z bytes.Buffer
zbuf := zlib.NewWriter(&z)
zbuf.Write(log)
zbuf.Close()
log = z.Bytes()
}
records = append(records, &kpl.Record{
PartitionKeyIndex: &partitionKeyIndex,
Data: log,
})
}
logProto, err := proto.Marshal(&kpl.AggregatedRecord{
PartitionKeyTable: []string{"ecs_task_arn"},
Records: records,
})
if err != nil {
panic(err)
}
log := append(kplMagicNumber, logProto...)
logHash := md5.Sum(logProto)
return append(log, logHash[0:16]...)
}
func TestKPLDeaggregate(t *testing.T) {
type test struct {
description string
input []byte
output [][]byte
shouldError bool
}
tests := []test{
{
description: "non-aggregated record",
input: []byte("hello"),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "one kpl-aggregated record",
input: createKPLAggregate(
[][]byte{[]byte("hello")},
false,
),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "three kpl-aggregated record",
input: createKPLAggregate([][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
false,
),
output: [][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
shouldError: false,
},
}
for _, tt := range tests {
t.Run(tt.description, func(t *testing.T) {
out, err := KPLDeaggregate(tt.input)
if tt.shouldError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, out, tt.output)
})
}
}
func TestDeaggregate(t *testing.T) {
type test struct {
description string
input []byte
output [][]byte
shouldError bool
}
tests := []test{
{
description: "non-aggregated record",
input: []byte("hello"),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "one kpl-aggregated record",
input: createKPLAggregate(
[][]byte{[]byte("hello")},
false,
),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "three kpl-aggregated record",
input: createKPLAggregate([][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
false,
),
output: [][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
shouldError: false,
},
{
description: "one kpl-aggregated zlib record",
input: createKPLAggregate(
[][]byte{[]byte("hello")},
true,
),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "three kpl-aggregated zlib record",
input: createKPLAggregate([][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
true,
),
output: [][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
shouldError: false,
},
}
var g bytes.Buffer
gbuf := gzip.NewWriter(&g)
cwLogBatch := LogEventBatch{
MessageType: "test",
Owner: "test",
LogGroup: "test",
LogStream: "test",
SubscriptionFilters: []string{""},
LogEvents: []LogEvent{{
ID: "test",
Timestamp: UnixTimestampMillis(time.Date(2020, time.September, 9, 9, 10, 10, 0, time.UTC)),
Message: "test",
}},
}
cwLogBatchJSON, _ := json.Marshal(cwLogBatch)
gbuf.Write(cwLogBatchJSON)
gbuf.Close()
gzipBatchInput := g.Bytes()
tests = append(tests, test{
description: "cloudwatch log batch",
input: gzipBatchInput,
output: [][]byte{[]byte("2020-09-09T09:10:10.000001+00:00 test test--test/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: test")},
shouldError: false,
})
for _, tt := range tests {
t.Run(tt.description, func(t *testing.T) {
out, err := Deaggregate(tt.input)
if tt.shouldError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, out, tt.output)
})
}
}