More documentation around handling of CW log bundles; deflake test
This commit is contained in:
parent
6e8a99c50d
commit
cc4f7716b3
2 changed files with 21 additions and 5 deletions
|
|
@ -48,7 +48,8 @@ func KPLDeaggregate(kinesisRecord []byte) ([][]byte, error) {
|
||||||
recordSum := md5.Sum(src)
|
recordSum := md5.Sum(src)
|
||||||
for i, b := range checksum {
|
for i, b := range checksum {
|
||||||
if b != recordSum[i] {
|
if b != recordSum[i] {
|
||||||
// false alarm - the header matched but the checksum doesn't, so it's not KPL
|
// either the data is corrupted or this is not a KPL aggregate
|
||||||
|
// either way, return the data as-is
|
||||||
return [][]byte{kinesisRecord}, nil
|
return [][]byte{kinesisRecord}, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -72,6 +73,8 @@ func KPLDeaggregate(kinesisRecord []byte) ([][]byte, error) {
|
||||||
// A similar result can be optained by calling KPLDeaggregate, then iterating over the results and callin SplitMessageIfNecessary.
|
// A similar result can be optained by calling KPLDeaggregate, then iterating over the results and callin SplitMessageIfNecessary.
|
||||||
// This function makes the assumption that after KPL-deaggregating, the results are not CloudWatch aggregates, so it doesn't need to check them for a gzip header.
|
// This function makes the assumption that after KPL-deaggregating, the results are not CloudWatch aggregates, so it doesn't need to check them for a gzip header.
|
||||||
// Also it lets us iterate over the user records one less time, since KPLDeaggregate loops over the records and we would need to loop again to unzlib.
|
// Also it lets us iterate over the user records one less time, since KPLDeaggregate loops over the records and we would need to loop again to unzlib.
|
||||||
|
//
|
||||||
|
// See the SplitMessageIfNecessary documentation for the format of output for CloudWatch log bundles.
|
||||||
func DeaggregateAndSplitIfNecessary(kinesisRecord []byte) ([][]byte, error) {
|
func DeaggregateAndSplitIfNecessary(kinesisRecord []byte) ([][]byte, error) {
|
||||||
if !IsKPLAggregate(kinesisRecord) {
|
if !IsKPLAggregate(kinesisRecord) {
|
||||||
return SplitMessageIfNecessary(kinesisRecord)
|
return SplitMessageIfNecessary(kinesisRecord)
|
||||||
|
|
@ -81,7 +84,8 @@ func DeaggregateAndSplitIfNecessary(kinesisRecord []byte) ([][]byte, error) {
|
||||||
recordSum := md5.Sum(src)
|
recordSum := md5.Sum(src)
|
||||||
for i, b := range checksum {
|
for i, b := range checksum {
|
||||||
if b != recordSum[i] {
|
if b != recordSum[i] {
|
||||||
// false alarm - the header matched but the checksum doesn't, so it's not KPL
|
// either the data is corrupted or this is not a KPL aggregate
|
||||||
|
// either way, return the data as-is
|
||||||
return [][]byte{kinesisRecord}, nil
|
return [][]byte{kinesisRecord}, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -107,6 +111,12 @@ func DeaggregateAndSplitIfNecessary(kinesisRecord []byte) ([][]byte, error) {
|
||||||
// - records emitted from CWLogs Subscription (which are gzip compressed)
|
// - records emitted from CWLogs Subscription (which are gzip compressed)
|
||||||
// - zlib compressed records (e.g. as compressed and emitted by Kinesis plugin for Fluent Bit
|
// - zlib compressed records (e.g. as compressed and emitted by Kinesis plugin for Fluent Bit
|
||||||
// - any other record (left unchanged)
|
// - any other record (left unchanged)
|
||||||
|
//
|
||||||
|
// CloudWatch logs come as structured JSON. In the process of splitting, they are converted
|
||||||
|
// into an rsyslog format that allows fairly uniform parsing of the result across the
|
||||||
|
// AWS services that might emit logs to CloudWatch.
|
||||||
|
// Note that these timezone used in these syslog records is guessed based on the local env.
|
||||||
|
// If you need consistent timezones, set TZ=UTC in your environment.
|
||||||
func SplitMessageIfNecessary(userRecord []byte) ([][]byte, error) {
|
func SplitMessageIfNecessary(userRecord []byte) ([][]byte, error) {
|
||||||
// First try the record as a CWLogs record
|
// First try the record as a CWLogs record
|
||||||
if IsGzipped(userRecord) {
|
if IsGzipped(userRecord) {
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
b64 "encoding/base64"
|
b64 "encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -17,6 +18,14 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestMain(m *testing.M) {
|
||||||
|
// In the conversion of CloudWatch LogEvent struct to an RSyslog struct to a string,
|
||||||
|
// the timezone used in the final string depends on the locally set timezone.
|
||||||
|
// in order for tests to pass, we set TZ to UTC
|
||||||
|
os.Setenv("TZ", "UTC")
|
||||||
|
os.Exit(m.Run())
|
||||||
|
}
|
||||||
|
|
||||||
func TestUnpacking(t *testing.T) {
|
func TestUnpacking(t *testing.T) {
|
||||||
input := "H4sIAAAAAAAAADWOTQuCQBRF/8ow6wj6ENRdhLXIClJoERKTvsZHOiPzxiLE/96YtTzcy72n4zUQCQnpuwEe8vXxkJ6O8XUfJclqG/EJ1y8FZkgq3RYvYfMy1pJcUGm5NbptXDZSYg2IekRqb5QbbCxqtcHKgiEeXrJvL3qCsgN2HIuxbtFpWFG7sdky8L1ZECwXc9+b/PUGgXPMfnrspxeydQn5A5VkJYjKlkzfWeGWUInhme1QASEx+qpNeZ/1H1PFPn3yAAAA"
|
input := "H4sIAAAAAAAAADWOTQuCQBRF/8ow6wj6ENRdhLXIClJoERKTvsZHOiPzxiLE/96YtTzcy72n4zUQCQnpuwEe8vXxkJ6O8XUfJclqG/EJ1y8FZkgq3RYvYfMy1pJcUGm5NbptXDZSYg2IekRqb5QbbCxqtcHKgiEeXrJvL3qCsgN2HIuxbtFpWFG7sdky8L1ZECwXc9+b/PUGgXPMfnrspxeydQn5A5VkJYjKlkzfWeGWUInhme1QASEx+qpNeZ/1H1PFPn3yAAAA"
|
||||||
|
|
||||||
|
|
@ -289,7 +298,6 @@ func TestSplitGlue(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If running this test directly with `go test`, it may fail unless you set the env var TZ=UTC
|
|
||||||
func TestSplitIfNecesary(t *testing.T) {
|
func TestSplitIfNecesary(t *testing.T) {
|
||||||
|
|
||||||
// We provide three different inputs to batchedWriter.splitMessageIfNecessary
|
// We provide three different inputs to batchedWriter.splitMessageIfNecessary
|
||||||
|
|
@ -384,7 +392,6 @@ func createKPLAggregate(input [][]byte, compress bool) []byte {
|
||||||
return append(log, logHash[0:16]...)
|
return append(log, logHash[0:16]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If running this test directly with `go test`, it may fail unless you set the env var TZ=UTC
|
|
||||||
func TestKPLDeaggregate(t *testing.T) {
|
func TestKPLDeaggregate(t *testing.T) {
|
||||||
type test struct {
|
type test struct {
|
||||||
description string
|
description string
|
||||||
|
|
@ -441,7 +448,6 @@ func TestKPLDeaggregate(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If running this test directly with `go test`, it may fail unless you set the env var TZ=UTC
|
|
||||||
func TestDeaggregateAndSplit(t *testing.T) {
|
func TestDeaggregateAndSplit(t *testing.T) {
|
||||||
type test struct {
|
type test struct {
|
||||||
description string
|
description string
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue