114 lines
3.6 KiB
Go
114 lines
3.6 KiB
Go
|
|
package splitter
|
||
|
|
|
||
|
|
import (
|
||
|
|
"bytes"
|
||
|
|
"compress/gzip"
|
||
|
|
"encoding/json"
|
||
|
|
"fmt"
|
||
|
|
"io/ioutil"
|
||
|
|
"regexp"
|
||
|
|
"time"
|
||
|
|
)
|
||
|
|
|
||
|
|
// LogEvent is a single log line within a LogEventBatch
|
||
|
|
type LogEvent struct {
|
||
|
|
ID string `json:"id"`
|
||
|
|
Timestamp int64 `json:"timestamp"`
|
||
|
|
Message string `json:"message"`
|
||
|
|
}
|
||
|
|
|
||
|
|
// LogEventBatch is a batch of multiple log lines, read from a KinesisStream with a CWLogs subscription
|
||
|
|
type LogEventBatch struct {
|
||
|
|
MessageType string `json:"messageType"`
|
||
|
|
Owner string `json:"owner"`
|
||
|
|
LogGroup string `json:"logGroup"`
|
||
|
|
LogStream string `json:"logStream"`
|
||
|
|
SubscriptionFilters []string `json:"subscriptionFilters"`
|
||
|
|
LogEvents []LogEvent `json:"logEvents"`
|
||
|
|
}
|
||
|
|
|
||
|
|
// IsGzipped returns whether or not a string is Gzipped (determined by looking for a Gzip byte prefix)
|
||
|
|
func IsGzipped(b []byte) bool {
|
||
|
|
return b[0] == 0x1f && b[1] == 0x8b
|
||
|
|
}
|
||
|
|
|
||
|
|
// GetMessagesFromGzippedInput takes a gzipped record from a CWLogs Subscription and splits it into
|
||
|
|
// a slice of messages.
|
||
|
|
func GetMessagesFromGzippedInput(input []byte, prodEnv bool) ([][]byte, error) {
|
||
|
|
unpacked, err := unpack(input)
|
||
|
|
if err != nil {
|
||
|
|
return [][]byte{}, err
|
||
|
|
}
|
||
|
|
return Split(unpacked, prodEnv), nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// Unpack expects a gzipped + json-stringified LogEventBatch
|
||
|
|
func unpack(input []byte) (LogEventBatch, error) {
|
||
|
|
gzipReader, err := gzip.NewReader(bytes.NewReader(input))
|
||
|
|
if err != nil {
|
||
|
|
return LogEventBatch{}, err
|
||
|
|
}
|
||
|
|
|
||
|
|
byt, err := ioutil.ReadAll(gzipReader)
|
||
|
|
if err != nil {
|
||
|
|
return LogEventBatch{}, err
|
||
|
|
}
|
||
|
|
|
||
|
|
var dat LogEventBatch
|
||
|
|
if err := json.Unmarshal(byt, &dat); err != nil {
|
||
|
|
return LogEventBatch{}, err
|
||
|
|
}
|
||
|
|
|
||
|
|
return dat, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// RFC3339Micro is the RFC3339 format in microseconds
|
||
|
|
const RFC3339Micro = "2006-01-02T15:04:05.999999-07:00"
|
||
|
|
|
||
|
|
const taskMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app
|
||
|
|
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\/` + // task-id
|
||
|
|
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})` // container-id
|
||
|
|
|
||
|
|
var taskRegex = regexp.MustCompile(taskMeta)
|
||
|
|
|
||
|
|
// Split takes a LogEventBatch and separates into a slice of enriched log lines
|
||
|
|
// Lines are enhanced by adding an Rsyslog prefix, which should be handled correctly by
|
||
|
|
// the subsequent decoding logic.
|
||
|
|
func Split(b LogEventBatch, prodEnv bool) [][]byte {
|
||
|
|
env := "unknown"
|
||
|
|
app := "unknown"
|
||
|
|
task := "00001111-2222-3333-4444-555566667777"
|
||
|
|
matches := taskRegex.FindAllStringSubmatch(b.LogStream, 1)
|
||
|
|
if len(matches) == 1 {
|
||
|
|
env = matches[0][1]
|
||
|
|
app = matches[0][2]
|
||
|
|
task = matches[0][3]
|
||
|
|
}
|
||
|
|
|
||
|
|
if (env == "production") != prodEnv {
|
||
|
|
// if there's a mis-match between the consumer's environment and the log's environment,
|
||
|
|
// throw away the log. (this is a workaround for coarse grained subscription filters.)
|
||
|
|
return [][]byte{}
|
||
|
|
}
|
||
|
|
|
||
|
|
rsyslogPrefix := `%s %s %s[%d]: %s`
|
||
|
|
// programName is a mocked ARN in the format expected by our log decoders
|
||
|
|
programName := env + "--" + app + `/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F` + task
|
||
|
|
mockPid := 1
|
||
|
|
hostname := "aws-batch"
|
||
|
|
|
||
|
|
out := [][]byte{}
|
||
|
|
for _, event := range b.LogEvents {
|
||
|
|
// Adding an extra Microsecond forces `Format` to include all 6 digits within the micorsecond format.
|
||
|
|
// Otherwise, time.Format omits trailing zeroes. (https://github.com/golang/go/issues/12472)
|
||
|
|
nsecs := event.Timestamp*int64(time.Millisecond) + int64(time.Microsecond)
|
||
|
|
logTime := time.Unix(0, nsecs).UTC().Format(RFC3339Micro)
|
||
|
|
|
||
|
|
// Fake an RSyslog prefix, expected by consumers
|
||
|
|
formatted := fmt.Sprintf(rsyslogPrefix, logTime, hostname, programName, mockPid, event.Message)
|
||
|
|
out = append(out, []byte(formatted))
|
||
|
|
}
|
||
|
|
|
||
|
|
return out
|
||
|
|
}
|