Compare commits

...

111 commits

Author SHA1 Message Date
Taylor Sutton
f78d1c6b9b
Merge pull request #71 from Clever/INFRANG-6435
notify CI failure
2024-09-19 07:23:19 -07:00
Tanmay Sardesai
a8a4e207fe notify CI failure 2024-09-18 13:04:48 -04:00
Taylor Sutton
e66f1922b7
Merge pull request #68 from Clever/INFRANG-5293
Update github known hosts key
2023-05-04 09:43:18 -07:00
Chris Martin
dc3e5e097c Update github known hosts key 2023-04-25 14:56:16 -04:00
Nikhil Bhatia
9a1371d965
Merge pull request #67 from Clever/ci-status-notify
Microplane: add Github Action workflow for ci-notify
2022-03-29 20:30:42 -07:00
nbhatia823
cf19a05ced Microplane: add Github Action workflow for ci-notify 2022-03-29 15:12:09 -07:00
Rafael
3a42d32e53
Merge pull request #66 from Clever/go-modules
dep -> go modules
2021-06-03 19:12:19 -04:00
Gavi Hirsch
d461ebebb6 dep -> go modules 2021-06-03 15:41:20 -07:00
Taylor Sutton
c11acd24ec
Merge pull request #64 from Clever/dependabot/dep/github.com/golang/protobuf-1.5.2
Bump github.com/golang/protobuf from 1.5.1 to 1.5.2
2021-03-30 12:57:35 -07:00
dependabot-preview[bot]
ad3e33c7d6
Bump github.com/golang/protobuf from 1.5.1 to 1.5.2
Bumps [github.com/golang/protobuf](https://github.com/golang/protobuf) from 1.5.1 to 1.5.2.
- [Release notes](https://github.com/golang/protobuf/releases)
- [Commits](https://github.com/golang/protobuf/compare/v1.5.1...v1.5.2)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-03-30 17:52:57 +00:00
Taylor Sutton
fa54b6ee39
Merge pull request #63 from Clever/dependabot/dep/github.com/golang/protobuf-1.5.1
Bump github.com/golang/protobuf from 1.4.3 to 1.5.1
2021-03-18 11:22:53 -07:00
dependabot-preview[bot]
c164f1d895
Bump github.com/golang/protobuf from 1.4.3 to 1.5.1
Bumps [github.com/golang/protobuf](https://github.com/golang/protobuf) from 1.4.3 to 1.5.1.
- [Release notes](https://github.com/golang/protobuf/releases)
- [Commits](https://github.com/golang/protobuf/compare/v1.4.3...v1.5.1)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-03-18 17:21:17 +00:00
Gavi Hirsch
6ac018712d
Merge pull request #62 from Clever/INFRANG-4003
add versioning
2021-03-05 09:27:29 -08:00
Gavi Hirsch
7351a3d3f7 add versioning 2021-03-04 10:11:51 -08:00
Rafael
358de6589c
Merge pull request #61 from Clever/dependabot/dep/gopkg.in/Clever/kayvee-go.v6-6.24.1
Bump gopkg.in/Clever/kayvee-go.v6 from 6.24.0 to 6.24.1
2021-02-02 13:26:14 -05:00
dependabot-preview[bot]
fe41fdaac3
Bump gopkg.in/Clever/kayvee-go.v6 from 6.24.0 to 6.24.1
Bumps [gopkg.in/Clever/kayvee-go.v6](https://github.com/Clever/kayvee-go) from 6.24.0 to 6.24.1.
- [Release notes](https://github.com/Clever/kayvee-go/releases)
- [Commits](https://github.com/Clever/kayvee-go/compare/v6.24.0...v6.24.1)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-02-02 18:20:42 +00:00
Tanmay Sardesai
102de58470
Merge pull request #60 from Clever/dependabot/dep/github.com/stretchr/testify-1.7.0
Bump github.com/stretchr/testify from 1.6.1 to 1.7.0
2021-01-19 10:01:36 -08:00
dependabot-preview[bot]
54f91f4073
Bump github.com/stretchr/testify from 1.6.1 to 1.7.0
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.6.1 to 1.7.0.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.6.1...v1.7.0)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2021-01-13 18:18:37 +00:00
Taylor Sutton
29f86e9986
Merge pull request #59 from Clever/refactor-decompression
Refactor decompression
2020-11-16 20:18:07 -05:00
Taylor Sutton
42c3480288 Simplify main function's name. 2020-11-16 17:37:53 -05:00
Taylor Sutton
cc4f7716b3 More documentation around handling of CW log bundles; deflake test 2020-11-16 12:44:50 -05:00
Taylor Sutton
6e8a99c50d Add functionality for handling KPL aggregates and zlib
Adds a lowest-common-denominator function, KPLDeaggregate, for
handling records that might be KPL aggregated. Also adds a function,
DeaggregateAndSplitIfNecessary, to wrap the existing functionality of
SplitMessageIfNecessary with KPL deaggreation.

These functions are handy for non-KCL consumers, like Lambda
functions. KCL automatically applies deaggreation for you.

This change is backwards compatible - the previously exposed function
SplitMessageIfNecessary still does the same things.
2020-11-16 11:01:28 -05:00
Taylor Sutton
4dd769ffca
Merge pull request #58 from Clever/refactor-decompression
Refactor zlib decompression into the splitter package
2020-11-12 11:57:53 -05:00
Taylor Sutton
f8e9c34641 Refactor zlib decompression into the splitter package
Being in the batchconsumer package means it will work for anything
using KCL, but lambdas that subscribe to these log streams do not use
batchconsumer at all; instead they invoke the splitter package
directly. As such, if we want this functionality to be available to
lambda log consumers, it can't be in batchconsumer.

There are no functionality changes here, just moving code from an
unexported method in one place to an exported function in another
place. The tests also get moved along with it.
2020-11-12 11:37:13 -05:00
Taylor Sutton
2a7e96157e
Merge pull request #57 from Clever/add-zlib-decompression
Add zlib decompression to batch consumer.
2020-11-11 12:42:11 -05:00
Taylor Sutton
4fe27d0d39 Clarifying comment - Logs from CW Logs are gzipped. 2020-11-11 12:39:51 -05:00
Taylor Sutton
6b2d1f8a56 Add zlib decompression to batch consumer.
This has become relevant as zlib is the compression method supported
by the Kinesis plugin for Fluent Bit.
2020-11-11 09:12:05 -05:00
Gavi Hirsch
611f0c0a60
Merge pull request #56 from Clever/INFRANG-3918
clear count stats after logging them
2020-09-11 15:02:06 -07:00
Gavi Hirsch
1112894639
clear count stats after logging them 2020-09-11 14:04:42 -07:00
Taylor Sutton
484b54bfe0
Merge pull request #55 from Clever/INFRANG-35-fluent-decode
Add decoding of JSON logs in Fluent format.
2020-08-18 08:44:17 -07:00
Taylor Sutton
f5ce6fe4e7 Add decoding of JSON logs in Fluent format.
ParseAndEnhance used to be:
- Try to parse line as a syslog, extracting the log itself and other
  fields from syslog format
  - If that succeeds, try to parse the log as either a Kayvee log or
    an RDS slow query log.
  - Combine all these fields, and add on some "derived"
    fields (container_task|env|app).
- Not a syslog => error

Now it will be:
- Try to parse line as a syslog, same as before, including the
  Kayvee/RDS part
- If syslog parsing failed, try to parse as a Fluent log and extract
  some fields from the Fluent format (the log, timestamp, etc)
  - If that succeeds, try to parse the log itself as a Kayvee log.
  - Combine Kayvee fields (if found) and derived fields)
- If BOTH formats fields, it is an error.

The decoding makes a lot of assumptions:
- The names of the log field and timestamp field (even though,
theoretically, they are customizable in the fluentbit config.
- The timestamp format (again)
- The format of the Task Definition name (or at least part of it)
- All fluentbit logs should have hostname set to `aws-fargate`.

Perhaps these can be relaxed if necessary. They could probably be
replaced by some kind of config. As there is currently no config I
wanted to keep things simple as possible. If we need to re-evaluate
(for example if we start getting JSON logs that don't want to use the
same handling for container_task|env|app) we can reevaluate.
2020-08-17 15:02:59 -07:00
Gavi Hirsch
9b32d93d1c
Merge pull request #54 from Clever/upgrade-kayvee-go-6.24.0
upgrade kayvee-go to 6.24.0
2020-08-14 19:19:26 -07:00
Gavi Hirsch
3bedc65483 upgrade kayvee-go to 6.24.0 2020-08-14 16:30:08 -07:00
Gavi Hirsch
b16af062fb
Merge pull request #53 from Clever/glue-2.0
glue log group format
2020-08-11 16:52:03 -07:00
Gavi Hirsch
30d3925119 glue log group format 2020-08-11 16:42:59 -07:00
Gavi Hirsch
ebd1e0e39c
Merge pull request #52 from Clever/dependabot/dep/github.com/stretchr/testify-1.6.1
Bump github.com/stretchr/testify from 1.1.4 to 1.6.1
2020-07-24 12:21:48 -07:00
Gavi Hirsch
bfe1a6ffb9 fix Gopkg.lock 2020-07-24 12:19:30 -07:00
dependabot-preview[bot]
35b07d7aed
Bump github.com/stretchr/testify from 1.1.4 to 1.6.1
Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.1.4 to 1.6.1.
- [Release notes](https://github.com/stretchr/testify/releases)
- [Commits](https://github.com/stretchr/testify/compare/v1.1.4...v1.6.1)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2020-07-24 17:38:31 +00:00
Gavi Hirsch
8a8b44112d
Merge pull request #51 from Clever/dependabot/dep/gopkg.in/Clever/kayvee-go.v6-6.23.0
Bump gopkg.in/Clever/kayvee-go.v6 from 6.6.0 to 6.23.0
2020-07-23 10:27:58 -07:00
dependabot-preview[bot]
8be774bc09
Bump gopkg.in/Clever/kayvee-go.v6 from 6.6.0 to 6.23.0
Bumps [gopkg.in/Clever/kayvee-go.v6](https://github.com/Clever/kayvee-go) from 6.6.0 to 6.23.0.
- [Release notes](https://github.com/Clever/kayvee-go/releases)
- [Commits](https://github.com/Clever/kayvee-go/compare/v6.6.0...v6.23.0)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>
2020-07-23 17:24:47 +00:00
Rafael
504ebfad60
Merge pull request #49 from Clever/INFRANG-3968
support parsing AWS Glue job logs
2020-07-20 14:15:14 -04:00
Rafael Garcia
420ad243a4 add codeowners 2020-07-20 18:11:27 +00:00
Rafael Garcia
337d2063f5 support parsing AWS Glue job logs 2020-07-20 17:20:39 +00:00
Sayan
eb11747434
Merge pull request #48 from Clever/update-jar-link
Fix maven repo link
2020-04-06 13:50:16 -07:00
Sayan Samanta
d489b5039b update to repo1 link for jars 2020-04-06 13:31:10 -07:00
Taylor Sutton
e7db80a35a
Merge pull request #46 from Clever/go1.13
Upgrade go to v1.13
2019-10-18 11:36:05 -07:00
Taylor Sutton
5132214945 Upgrade go to v1.13 2019-10-18 09:45:22 -07:00
Daniel Xu
e265494afb
Merge pull request #45 from Clever/INFRA-3664
Fix regex that parses the RDS slowquery user
2019-10-14 17:21:53 -07:00
Daniel Xu
69d31088f3 Add tests for parsing slow query user 2019-10-14 17:19:12 -07:00
Daniel Xu
7446627fae Fix regex that parses the RDS slowquery user 2019-10-14 17:02:01 -07:00
Aaron Stein
9cbcf1096a
Merge pull request #44 from Clever/parse-rds
Parse rds slowquery user
2019-10-02 10:21:31 -07:00
Aaron Stein
a66ded0e8f parse rds slowquery user 2019-10-01 14:39:11 -07:00
Aaron Stein
07828aef7b
INFRA-2885: Split slowquery logs - Merge pull request #42 from Clever/INFRA-2885
INFRA-2885: Split slowquery logs
2019-08-26 12:16:17 -07:00
Aaron Stein
598d57359f remove PID from slowquery log 2019-08-26 12:09:06 -07:00
Aaron Stein
1261000e33 re-refactor log classification logic 2019-08-26 12:08:01 -07:00
Aaron Stein
ea59ce899b add match check 2019-08-26 12:08:01 -07:00
Aaron Stein
df8b175e86 add decode test 2019-08-26 12:08:01 -07:00
Aaron Stein
45c48035c7 split slowquery logs 2019-08-26 12:08:01 -07:00
Aaron Stein
2af69f28a2 update deps 2019-08-26 12:08:01 -07:00
Aaron Stein
c38be59053
Remove PID - Merge pull request #43 from Clever/rm-pid
Remove PID
2019-08-26 12:01:21 -07:00
Aaron Stein
4c9e2c582f rm debug file 2019-08-23 13:20:21 -07:00
Aaron Stein
c1d013f301 add PID-less test 2019-08-23 13:17:02 -07:00
Aaron Stein
f747ccfdd0 rm constant PID 2019-08-23 10:27:43 -07:00
johnhuangclever
4f8bee913e
Merge pull request #40 from Clever/go1.12-again 2019-06-01 01:05:48 -07:00
John Huang
4a28b40953 Golang 1.9 upgrade 2019-06-01 00:39:17 -07:00
Rafael
ebbd9e7a37
Merge pull request #39 from Clever/INFRA-3363-take-two
expand ECS task id regex for fargate
2019-03-01 10:37:30 -08:00
Rafael Garcia
18e6920dea expand task id regex for fargate 2019-02-27 23:15:19 +00:00
Rafael Garcia
aeb8af04e8 parse fargate logs 2019-02-25 20:52:52 +00:00
Xavi
2b62c6cbb1
Merge pull request #37 from Clever/INFRA-3343-rm-awscli-install-amazon-kinesis-client-go
Removed unneeded "Install awscli for ECR publish" command
2019-01-09 14:55:35 -08:00
Xavi Ramirez
df71c27e6f Removed unneeded "Install awscli for ECR publish" command 2019-01-08 18:40:29 -08:00
Xavi
a975ca3c3b
Merge pull request #36 from Clever/add-err-msg
Added error message for failed log written to file
2018-12-11 16:43:26 -08:00
Xavi Ramirez
e80ff2239b Added error message for failed log written to file 2018-12-11 16:32:33 -08:00
Xavi
23c68b42ff
Merge pull request #33 from wwwjfy/fix-shutdown-requested
Fix shutdown requested
2018-12-11 16:28:37 -08:00
Xavi
7351346041
Merge pull request #35 from wwwjfy/fix-checkpoint
fix periodic checkpoint not triggered
2018-12-11 16:23:51 -08:00
Xavi
6bb489ab5b
Merge pull request #34 from wwwjfy/kcl-v2-format
fix incompatible Record struct in KCL v2
2018-12-11 16:22:57 -08:00
Tony Wang
2f722d9cd2
fix periodic checkpoint not triggered
when initialized, largestPair is empty and IsLessThan always return
false
2018-11-09 15:08:47 +08:00
Tony Wang
c3ff544d00
fix incompatible Record struct in KCL v2 2018-10-18 16:43:58 +08:00
Tony Wang
06838386db
fix review comments 2018-10-18 15:50:44 +08:00
Tony Wang
e7b75a20ac
handle shutdown requested message
Message type shutdownRequested doesn't have reason, which causes
checkpoint on termination not work.
This commit is to ask KCL daemon to checkpoint on shutdownRequested.
2018-10-14 18:29:17 +08:00
Tony Wang
c86a5da722
remove not in use statements 2018-10-14 17:55:46 +08:00
Nate
33d58bc6e0
Merge pull request #32 from Clever/SYNC-859 2018-10-11 15:40:18 -07:00
Nate Brennand
dd22f519be update golang.mk 2018-10-11 12:39:00 -07:00
Xavi
42fba675bb
Merge pull request #31 from Clever/fix-type-o
Fixed type-o: guage -> gauge
2018-10-05 15:38:04 -07:00
Xavi
c4712bd220
Fixed type-o: guage -> gauge 2018-10-05 15:31:48 -07:00
Xavi
dbc3002d02
Merge pull request #30 from Clever/INFRA-3186-circleci-2
[INFRA-3186] autotranslate CircleCI 1.0 -> 2.0
2018-08-17 15:44:57 -07:00
Brianna Veenstra
31eeb34fdc [INFRA-3186] autotranslate CircleCI 1.0 -> 2.0 2018-08-17 15:21:41 -07:00
Xavi
92ce9be607
Merge pull request #29 from Clever/initialize-method
Added initialize method to sender
2018-08-09 16:48:43 -07:00
Xavi Ramirez
e5850f1464 Added Initialize method to sender 2018-08-09 16:43:30 -07:00
Xavi Ramirez
df698f34aa Updated golang.mk to v0.3.8 2018-08-09 16:32:10 -07:00
renato
e377f46b69
Merge pull request #28 from Clever/go1.10 2018-05-23 16:33:11 -07:00
Renato
167c524fb2 golang upgrade 2018-05-18 15:54:07 -07:00
Xavi
6e45fa518b
Merge pull request #27 from Clever/more-reserve-fields
Added timestamp, hostname, and rawlog to list of reserved fields
2018-05-10 17:09:07 -07:00
Xavi Ramirez
8f2ad09efa Added timestamp, hostname, and rawlog to list of reserved fields 2018-05-10 23:56:32 +00:00
Xavi Ramirez
74634a6bf0 Added decoder_msg_type field to logs parsed by syslog 2018-05-10 23:56:14 +00:00
Rafael
3ded2fcd2d
Merge pull request #26 from Clever/INFRA-2824
fix lambda -> rsyslog message conversion to always have a task ID
2018-03-13 09:55:55 -07:00
Rafael Garcia
ad2ef687bb fix lambda -> rsyslog message conversion to always have a task ID 2018-03-13 16:48:41 +00:00
Nathan Leiby
4858074bb7
Merge pull request #25 from Clever/IL-200-remove-self-reference 2018-02-16 14:42:40 -08:00
Nathan Leiby
267b28a032 Remove self-references in Gopkg.toml
Issue: https://clever.atlassian.net/browse/IL-200

We accidentally introduced self-references into some Gopkg.toml files.
Let's remove them!

More context: https://github.com/golang/dep/issues/1690
2018-02-16 14:24:35 -08:00
Nathan Leiby
44a89a1685
Merge pull request #24 from Clever/IL-200-remove-self-reference
Remove self-references in Gopkg.toml
2018-02-16 14:17:23 -08:00
Nathan Leiby
b0196cf7b0 Remove self-references in Gopkg.toml
Issue: https://clever.atlassian.net/browse/IL-200

We accidentally introduced self-references into some Gopkg.toml files.
Let's remove them!

More context: https://github.com/golang/dep/issues/1690
2018-02-16 13:14:07 -08:00
Rafael
bfe2ac0f6d
Merge pull request #23 from Clever/INFRA-2764
handle logs coming in from lambda functions
2018-01-25 15:15:10 -08:00
Rafael Garcia
c255bf87f3 add test for default case 2018-01-25 15:13:11 +00:00
Rafael Garcia
10750e802b handle logs coming in from lambda functions 2018-01-25 01:23:03 +00:00
Nate
7d863db172
Merge pull request #22 from Clever/SYNC-114-go-1.9-upgrade 2017-11-09 08:48:35 -08:00
Nate Brennand
2a4e6fcc71 upgrade to 1.9 and convert to dep 2017-11-08 09:46:29 -08:00
Rafael
ebf2e2dc98
Merge pull request #21 from Clever/only-send-failed-log-lines-to-file
write errors to stderr, failed log lines to file
2017-11-03 11:05:31 -07:00
Rafael Garcia
7b18d1bab4 write failed log message to file 2017-11-03 18:03:39 +00:00
Rafael Garcia
945ed317c2 remove logger as parameter 2017-11-03 17:48:50 +00:00
Rafael Garcia
221743b3e2 write errors to stderr 2017-11-02 21:50:46 +00:00
Xavi
94aacdf833 Merge pull request #19 from Clever/rm-params-from-decoder
Removed stringifyNested, renameESReservedFields, and minimumTimestamp params from decoder
2017-09-26 14:53:56 -07:00
Xavi Ramirez
2ecbc081cd Removed stringifyNested, renameESReservedFields, and minimumTimestamp params from decoder. minimumTimestamp isn't needed anymore. stringifyNested and renameESReservedFields are very specific to the elasticsearch consumer and will be moved to the elasticsearch consumer. 2017-09-21 19:51:48 +00:00
26 changed files with 1674 additions and 629 deletions

30
.circleci/config.yml Normal file
View file

@ -0,0 +1,30 @@
version: 2
jobs:
build:
working_directory: /go/src/github.com/Clever/amazon-kinesis-client-go
docker:
- image: circleci/golang:1.13-stretch
- image: circleci/mongo:3.2.20-jessie-ram
environment:
GOPRIVATE: github.com/Clever/*
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
steps:
- run:
command: cd $HOME && git clone --depth 1 -v https://github.com/Clever/ci-scripts.git && cd ci-scripts && git show --oneline -s
name: Clone ci-scripts
- checkout
- setup_remote_docker
- run:
command: mkdir -p $CIRCLE_ARTIFACTS $CIRCLE_TEST_REPORTS
name: Set up CircleCI artifacts directories
- run:
command: git config --global "url.ssh://git@github.com/Clever".insteadOf "https://github.com/Clever"
- run:
name: Add github.com to known hosts
command: mkdir -p ~/.ssh && touch ~/.ssh/known_hosts && echo 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=' >> ~/.ssh/known_hosts
- run: make install_deps
- run: make build
- run: make bench
- run: make test
- run: if [ "${CIRCLE_BRANCH}" == "master" ]; then $HOME/ci-scripts/circleci/github-release $GH_RELEASE_TOKEN; fi;

1
.github/CODEOWNERS vendored Normal file
View file

@ -0,0 +1 @@
* @Clever/eng-infra

20
.github/workflows/notify-ci-status.yml vendored Normal file
View file

@ -0,0 +1,20 @@
name: Notify CI status
on:
check_suite:
types: [completed]
status:
jobs:
call-workflow:
if: >-
(github.event.branches[0].name == github.event.repository.default_branch &&
(github.event.state == 'error' || github.event.state == 'failure')) ||
(github.event.check_suite.head_branch == github.event.repository.default_branch &&
github.event.check_suite.conclusion != 'success')
uses: Clever/ci-scripts/.github/workflows/reusable-notify-ci-status.yml@master
secrets:
CIRCLE_CI_INTEGRATIONS_URL: ${{ secrets.CIRCLE_CI_INTEGRATIONS_URL }}
CIRCLE_CI_INTEGRATIONS_USERNAME: ${{ secrets.CIRCLE_CI_INTEGRATIONS_USERNAME }}
CIRCLE_CI_INTEGRATIONS_PASSWORD: ${{ secrets.CIRCLE_CI_INTEGRATIONS_PASSWORD }}
SLACK_BOT_TOKEN: ${{ secrets.DAPPLE_BOT_TOKEN }}

View file

@ -5,7 +5,7 @@ SHELL := /bin/bash
PKG := github.com/Clever/amazon-kinesis-client-go
PKGS := $(shell go list ./... | grep -v /vendor )
.PHONY: download_jars run build
$(eval $(call golang-version-check,1.8))
$(eval $(call golang-version-check,1.13))
CONSUMER ?= consumer
TMP_DIR := ./tmp-jars
@ -37,7 +37,7 @@ download_jars:
mv $(TMP_DIR)/target/dependency/* $(JAR_DIR)/
# Download the STS jar file for supporting IAM Roles
ls $(JAR_DIR)/aws-java-sdk-core-*.jar | sed -e "s/.*-sdk-core-//g" | sed -e "s/\.jar//g" > /tmp/version.txt
curl -o $(JAR_DIR)/aws-java-sdk-sts-`cat /tmp/version.txt`.jar http://central.maven.org/maven2/com/amazonaws/aws-java-sdk-sts/`cat /tmp/version.txt`/aws-java-sdk-sts-`cat /tmp/version.txt`.jar
curl -o $(JAR_DIR)/aws-java-sdk-sts-`cat /tmp/version.txt`.jar https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-sts/`cat /tmp/version.txt`/aws-java-sdk-sts-`cat /tmp/version.txt`.jar
rm -r $(TMP_DIR)
all: test build
@ -58,8 +58,8 @@ test: $(PKGS)
$(PKGS): golang-test-all-deps
$(call golang-test-all,$@)
$(GOPATH)/bin/glide:
@go get github.com/Masterminds/glide
install_deps: $(GOPATH)/bin/glide
@$(GOPATH)/bin/glide install
install_deps:
go mod vendor

1
VERSION Normal file
View file

@ -0,0 +1 @@
1.0.0

View file

@ -10,6 +10,8 @@ import (
"github.com/Clever/amazon-kinesis-client-go/kcl"
)
var lg = kv.New("amazon-kinesis-client-go")
type tagMsgPair struct {
tag string
msg []byte
@ -23,9 +25,9 @@ type batcherManagerConfig struct {
}
type batcherManager struct {
log kv.KayveeLogger
sender Sender
chkpntManager *checkpointManager
failedLogsFile kv.KayveeLogger
sender Sender
chkpntManager *checkpointManager
batchCount int
batchSize int
@ -38,12 +40,12 @@ type batcherManager struct {
}
func newBatcherManager(
sender Sender, chkpntManager *checkpointManager, cfg batcherManagerConfig, log kv.KayveeLogger,
sender Sender, chkpntManager *checkpointManager, cfg batcherManagerConfig, failedLogsFile kv.KayveeLogger,
) *batcherManager {
bm := &batcherManager{
log: log,
sender: sender,
chkpntManager: chkpntManager,
failedLogsFile: failedLogsFile,
sender: sender,
chkpntManager: chkpntManager,
batchCount: cfg.BatchCount,
batchSize: cfg.BatchSize,
@ -96,16 +98,16 @@ func (b *batcherManager) sendBatch(batcher *batcher, tag string) {
switch e := err.(type) {
case nil: // Do nothing
case PartialSendBatchError:
b.log.ErrorD("send-batch", kv.M{"msg": e.Error()})
lg.ErrorD("send-batch", kv.M{"msg": e.Error()})
for _, line := range e.FailedMessages {
b.log.ErrorD("failed-log", kv.M{"log": line})
b.failedLogsFile.ErrorD("failed-log", kv.M{"log": line, "msg": e.Error()})
}
stats.Counter("batch-log-failures", len(e.FailedMessages))
case CatastrophicSendBatchError:
b.log.CriticalD("send-batch", kv.M{"msg": e.Error()})
lg.CriticalD("send-batch", kv.M{"msg": e.Error()})
os.Exit(1)
default:
b.log.CriticalD("send-batch", kv.M{"msg": e.Error()})
lg.CriticalD("send-batch", kv.M{"msg": e.Error()})
os.Exit(1)
}
@ -184,8 +186,14 @@ func (b *batcherManager) startMessageHandler(
batcher.AddMessage(tmp.msg, tmp.pair)
} else if err != nil {
b.log.ErrorD("add-message", kv.M{
"err": err.Error(), "msg": string(tmp.msg), "tag": tmp.tag,
lg.ErrorD("add-message", kv.M{
"err": err.Error(),
"tag": tmp.tag,
})
b.failedLogsFile.ErrorD("add-message", kv.M{
"err": err.Error(),
"msg": string(tmp.msg),
"tag": tmp.tag,
})
}
stats.Counter("msg-batched", 1)

View file

@ -3,15 +3,11 @@ package batchconsumer
import (
"time"
kv "gopkg.in/Clever/kayvee-go.v6/logger"
"github.com/Clever/amazon-kinesis-client-go/batchconsumer/stats"
"github.com/Clever/amazon-kinesis-client-go/kcl"
)
type checkpointManager struct {
log kv.KayveeLogger
checkpointFreq time.Duration
checkpoint chan kcl.SequencePair
@ -19,12 +15,8 @@ type checkpointManager struct {
shutdown chan chan<- struct{}
}
func newCheckpointManager(
checkpointer kcl.Checkpointer, checkpointFreq time.Duration, log kv.KayveeLogger,
) *checkpointManager {
func newCheckpointManager(checkpointer kcl.Checkpointer, checkpointFreq time.Duration) *checkpointManager {
cm := &checkpointManager{
log: log,
checkpointFreq: checkpointFreq,
checkpoint: make(chan kcl.SequencePair),

View file

@ -13,8 +13,8 @@ import (
// Config used for BatchConsumer constructor. Any empty fields are populated with defaults.
type Config struct {
// LogFile where consumer errors and failed log lines are saved
LogFile string
// FailedLogsFile is where logs that failed to process are written.
FailedLogsFile string
// BatchInterval the upper bound on how often SendBatch is called with accumulated messages
BatchInterval time.Duration
@ -34,13 +34,13 @@ type Config struct {
// BatchConsumer is responsible for marshalling
type BatchConsumer struct {
kclProcess *kcl.KCLProcess
logfile *os.File
kclProcess *kcl.KCLProcess
failedLogsFile *os.File
}
func withDefaults(config Config) Config {
if config.LogFile == "" {
config.LogFile = "/tmp/kcl-" + time.Now().Format(time.RFC3339)
if config.FailedLogsFile == "" {
config.FailedLogsFile = "/tmp/kcl-" + time.Now().Format(time.RFC3339)
}
if config.BatchInterval == 0 {
@ -74,20 +74,19 @@ func NewBatchConsumerFromFiles(
) *BatchConsumer {
config = withDefaults(config)
file, err := os.OpenFile(config.LogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
file, err := os.OpenFile(config.FailedLogsFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatalf("Unable to create log file: %s", err.Error())
}
failedLogsFile := logger.New("amazon-kinesis-client-go/batchconsumer")
failedLogsFile.SetOutput(file)
kvlog := logger.New("amazon-kinesis-client-go")
kvlog.SetOutput(file)
wrt := NewBatchedWriter(config, sender, kvlog)
wrt := NewBatchedWriter(config, sender, failedLogsFile)
kclProcess := kcl.New(input, output, errFile, wrt)
return &BatchConsumer{
kclProcess: kclProcess,
logfile: file,
kclProcess: kclProcess,
failedLogsFile: file,
}
}
@ -100,5 +99,5 @@ func NewBatchConsumer(config Config, sender Sender) *BatchConsumer {
// Start when called, the consumer begins ingesting messages. This function blocks.
func (b *BatchConsumer) Start() {
b.kclProcess.Run()
b.logfile.Close()
b.failedLogsFile.Close()
}

View file

@ -11,6 +11,8 @@ var ErrMessageIgnored = errors.New("Message intentionally skipped by sender")
// Sender an interface needed for batch consumer implementations
type Sender interface {
// Initialize called once before ProcessMessage and SendBatch
Initialize(shardID string)
// ProcessMessage receives a raw message and is expected to return an appropriately formatted
// message as well as a list of tags for that log line. A tag corresponds to a batch that
// it'll be put into. Typically tags are series names.

View file

@ -30,16 +30,17 @@ type datum struct {
var queue = make(chan datum, 1000)
func init() {
data := map[string]int{}
countData := map[string]int{}
gaugeData := map[string]int{}
tick := time.Tick(time.Minute)
go func() {
for {
select {
case d := <-queue:
if d.category == "counter" {
data[d.name] = data[d.name] + d.value
countData[d.name] = countData[d.name] + d.value
} else if d.category == "gauge" {
data[d.name] = d.value
gaugeData[d.name] = d.value
} else {
log.ErrorD("unknown-stat-category", logger.M{"category": d.category})
}
@ -48,10 +49,14 @@ func init() {
for _, k := range DefaultCounters {
tmp[k] = 0
}
for k, v := range data {
for k, v := range countData {
tmp[k] = v
}
for k, v := range gaugeData {
tmp[k] = v
}
log.InfoD("stats", tmp)
countData = map[string]int{}
}
}
}()

View file

@ -15,9 +15,9 @@ import (
)
type batchedWriter struct {
config Config
sender Sender
log kv.KayveeLogger
config Config
sender Sender
failedLogsFile kv.KayveeLogger
shardID string
@ -30,11 +30,11 @@ type batchedWriter struct {
lastProcessedSeq kcl.SequencePair
}
func NewBatchedWriter(config Config, sender Sender, log kv.KayveeLogger) *batchedWriter {
func NewBatchedWriter(config Config, sender Sender, failedLogsFile kv.KayveeLogger) *batchedWriter {
return &batchedWriter{
config: config,
sender: sender,
log: log,
config: config,
sender: sender,
failedLogsFile: failedLogsFile,
rateLimiter: rate.NewLimiter(rate.Limit(config.ReadRateLimit), config.ReadBurstLimit),
}
@ -49,25 +49,14 @@ func (b *batchedWriter) Initialize(shardID string, checkpointer kcl.Checkpointer
BatchInterval: b.config.BatchInterval,
}
b.chkpntManager = newCheckpointManager(checkpointer, b.config.CheckpointFreq, b.log)
b.batcherManager = newBatcherManager(b.sender, b.chkpntManager, bmConfig, b.log)
b.sender.Initialize(shardID)
b.chkpntManager = newCheckpointManager(checkpointer, b.config.CheckpointFreq)
b.batcherManager = newBatcherManager(b.sender, b.chkpntManager, bmConfig, b.failedLogsFile)
return nil
}
func (b *batchedWriter) splitMessageIfNecessary(record []byte) ([][]byte, error) {
// We handle two types of records:
// - records emitted from CWLogs Subscription
// - records emiited from KPL
if !splitter.IsGzipped(record) {
// Process a single message, from KPL
return [][]byte{record}, nil
}
// Process a batch of messages from a CWLogs Subscription
return splitter.GetMessagesFromGzippedInput(record)
}
func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
var pair kcl.SequencePair
prevPair := b.lastProcessedSeq
@ -91,7 +80,7 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
return err
}
messages, err := b.splitMessageIfNecessary(data)
messages, err := splitter.SplitMessageIfNecessary(data)
if err != nil {
return err
}
@ -103,20 +92,20 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
continue // Skip message
} else if err != nil {
stats.Counter("unknown-error", 1)
b.log.ErrorD("process-message", kv.M{"msg": err.Error(), "rawmsg": string(rawmsg)})
lg.ErrorD("process-message", kv.M{"msg": err.Error(), "rawmsg": string(rawmsg)})
continue // Don't stop processing messages because of one bad message
}
if len(tags) == 0 {
stats.Counter("no-tags", 1)
b.log.ErrorD("no-tags", kv.M{"rawmsg": string(rawmsg)})
lg.ErrorD("no-tags", kv.M{"rawmsg": string(rawmsg)})
return fmt.Errorf("No tags provided by consumer for log: %s", string(rawmsg))
}
for _, tag := range tags {
if tag == "" {
stats.Counter("blank-tag", 1)
b.log.ErrorD("blank-tag", kv.M{"rawmsg": string(rawmsg)})
lg.ErrorD("blank-tag", kv.M{"rawmsg": string(rawmsg)})
return fmt.Errorf("Blank tag provided by consumer for log: %s", string(rawmsg))
}
@ -144,9 +133,9 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
func (b *batchedWriter) Shutdown(reason string) error {
if reason == "TERMINATE" {
b.log.InfoD("terminate-signal", kv.M{"shard-id": b.shardID})
lg.InfoD("terminate-signal", kv.M{"shard-id": b.shardID})
} else {
b.log.ErrorD("shutdown-failover", kv.M{"shard-id": b.shardID, "reason": reason})
lg.ErrorD("shutdown-failover", kv.M{"shard-id": b.shardID, "reason": reason})
}
done := b.batcherManager.Shutdown()

View file

@ -14,6 +14,8 @@ import (
type ignoringSender struct{}
func (i ignoringSender) Initialize(shardID string) {}
func (i ignoringSender) ProcessMessage(rawmsg []byte) (msg []byte, tags []string, err error) {
return nil, nil, ErrMessageIgnored
}
@ -27,6 +29,7 @@ type tagBatch struct {
batch [][]byte
}
type msgAsTagSender struct {
shardID string
batches map[string][][][]byte
saveBatch chan tagBatch
shutdown chan struct{}
@ -61,6 +64,10 @@ func (i *msgAsTagSender) startBatchSaver(saveBatch <-chan tagBatch, shutdown <-c
}()
}
func (i *msgAsTagSender) Initialize(shardID string) {
i.shardID = shardID
}
func (i *msgAsTagSender) ProcessMessage(rawmsg []byte) (msg []byte, tags []string, err error) {
if "ignore" == string(rawmsg) {
return nil, nil, ErrMessageIgnored
@ -135,14 +142,14 @@ func encode(str string) string {
func TestProcessRecordsIgnoredMessages(t *testing.T) {
assert := assert.New(t)
mocklog := logger.New("testing")
mockFailedLogsFile := logger.New("testing")
mockconfig := withDefaults(Config{
BatchInterval: 10 * time.Millisecond,
CheckpointFreq: 20 * time.Millisecond,
})
mockcheckpointer := NewMockCheckpointer(5 * time.Second)
wrt := NewBatchedWriter(mockconfig, ignoringSender{}, mocklog)
wrt := NewBatchedWriter(mockconfig, ignoringSender{}, mockFailedLogsFile)
wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{
@ -165,7 +172,7 @@ func TestProcessRecordsIgnoredMessages(t *testing.T) {
func TestProcessRecordsSingleBatchBasic(t *testing.T) {
assert := assert.New(t)
mocklog := logger.New("testing")
mockFailedLogsFile := logger.New("testing")
mockconfig := withDefaults(Config{
BatchCount: 2,
CheckpointFreq: 1, // Don't throttle checks points
@ -173,9 +180,11 @@ func TestProcessRecordsSingleBatchBasic(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile)
wrt.Initialize("test-shard", mockcheckpointer)
assert.Equal("test-shard", mocksender.shardID)
err := wrt.ProcessRecords([]kcl.Record{
kcl.Record{SequenceNumber: "1", Data: encode("tag1")},
kcl.Record{SequenceNumber: "2", Data: encode("tag1")},
@ -212,7 +221,7 @@ func TestProcessRecordsSingleBatchBasic(t *testing.T) {
func TestProcessRecordsMutliBatchBasic(t *testing.T) {
assert := assert.New(t)
mocklog := logger.New("testing")
mockFailedLogsFile := logger.New("testing")
mockconfig := withDefaults(Config{
BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Millisecond,
@ -220,7 +229,7 @@ func TestProcessRecordsMutliBatchBasic(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile)
wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{
@ -270,7 +279,7 @@ func TestProcessRecordsMutliBatchBasic(t *testing.T) {
func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
assert := assert.New(t)
mocklog := logger.New("testing")
mockFailedLogsFile := logger.New("testing")
mockconfig := withDefaults(Config{
BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Millisecond,
@ -278,7 +287,7 @@ func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile)
wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{
@ -343,10 +352,10 @@ func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
assert.Equal("tag3", string(mocksender.batches["tag3"][0][2]))
}
func TestStaggeredCheckpionting(t *testing.T) {
func TestStaggeredCheckpointing(t *testing.T) {
assert := assert.New(t)
mocklog := logger.New("testing")
mockFailedLogsFile := logger.New("testing")
mockconfig := withDefaults(Config{
BatchCount: 2,
BatchInterval: 100 * time.Millisecond,
@ -355,7 +364,7 @@ func TestStaggeredCheckpionting(t *testing.T) {
mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
wrt := NewBatchedWriter(mockconfig, mocksender, mockFailedLogsFile)
wrt.Initialize("test-shard", mockcheckpointer)
err := wrt.ProcessRecords([]kcl.Record{

BIN
bin/dep Executable file

Binary file not shown.

View file

@ -1,19 +0,0 @@
machine:
post:
- cd $HOME && git clone --depth 1 -v git@github.com:clever/ci-scripts.git && cd ci-scripts && git show --oneline -s
- $HOME/ci-scripts/circleci/golang-install 1.8
services:
- docker
checkout:
post:
- $HOME/ci-scripts/circleci/golang-move-project
compile:
override:
- make install_deps
- make build
- make bench
test:
override:
- make test
general:
build_dir: ../.go_workspace/src/github.com/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME

View file

@ -2,8 +2,6 @@ package main
import (
"fmt"
"log"
"os"
"time"
"gopkg.in/Clever/kayvee-go.v6/logger"
@ -11,36 +9,26 @@ import (
kbc "github.com/Clever/amazon-kinesis-client-go/batchconsumer"
)
func createDummyOutput() (logger.KayveeLogger, *os.File) {
file, err := os.OpenFile("/tmp/example-kcl-output", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatalf("Unable to create log file: %s", err.Error())
}
kvlog := logger.New("amazon-kinesis-client-go")
kvlog.SetOutput(file)
return kvlog, file
}
func main() {
config := kbc.Config{
BatchInterval: 10 * time.Second,
BatchCount: 500,
BatchSize: 4 * 1024 * 1024, // 4Mb
LogFile: "/tmp/example-kcl-consumer",
BatchInterval: 10 * time.Second,
BatchCount: 500,
BatchSize: 4 * 1024 * 1024, // 4Mb
FailedLogsFile: "/tmp/example-kcl-consumer",
}
output, file := createDummyOutput()
defer file.Close()
sender := &exampleSender{output: output}
sender := &exampleSender{output: logger.New("fake-output")}
consumer := kbc.NewBatchConsumer(config, sender)
consumer.Start()
}
type exampleSender struct {
output logger.KayveeLogger
shardID string
output logger.KayveeLogger
}
func (e *exampleSender) Initialize(shardID string) {
e.shardID = shardID
}
func (e *exampleSender) ProcessMessage(rawmsg []byte) ([]byte, []string, error) {

View file

@ -29,7 +29,7 @@ func (srp *sampleRecordProcessor) Initialize(shardID string, checkpointer kcl.Ch
}
func (srp *sampleRecordProcessor) shouldUpdateSequence(pair kcl.SequencePair) bool {
return srp.largestPair.IsLessThan(pair)
return srp.largestPair.IsNil() || srp.largestPair.IsLessThan(pair)
}
func (srp *sampleRecordProcessor) ProcessRecords(records []kcl.Record) error {
@ -55,6 +55,9 @@ func (srp *sampleRecordProcessor) Shutdown(reason string) error {
if reason == "TERMINATE" {
fmt.Fprintf(os.Stderr, "Was told to terminate, will attempt to checkpoint.\n")
srp.checkpointer.Shutdown()
} else if reason == "SHUTDOWN_REQUESTED" {
fmt.Fprintf(os.Stderr, "Got shutdown requested, attempt to checkpoint.\n")
srp.checkpointer.Shutdown()
} else {
fmt.Fprintf(os.Stderr, "Shutting down due to failover. Will not checkpoint.\n")
}
@ -62,11 +65,6 @@ func (srp *sampleRecordProcessor) Shutdown(reason string) error {
}
func main() {
f, err := os.Create("/tmp/kcl_stderr")
if err != nil {
panic(err)
}
defer f.Close()
kclProcess := kcl.New(os.Stdin, os.Stdout, os.Stderr, newSampleRecordProcessor())
kclProcess.Run()
}

View file

@ -2,6 +2,7 @@ package decode
import (
"encoding/json"
"errors"
"fmt"
"regexp"
"strings"
@ -16,6 +17,18 @@ var reservedFields = []string{
"prefix",
"postfix",
"decoder_msg_type",
// used by all logs
"timestamp",
"hostname",
"rawlog",
// Used by Kayvee
"prefix",
"postfix",
// Set to the deepest step of parsing that succeeded
"decoder_msg_type",
}
func stringInSlice(s string, slice []string) bool {
@ -58,6 +71,9 @@ func FieldsFromSyslog(line string) (map[string]interface{}, error) {
out[newKey] = v
}
}
out["decoder_msg_type"] = "syslog"
return out, nil
}
@ -102,6 +118,20 @@ func FieldsFromKayvee(line string) (map[string]interface{}, error) {
return m, nil
}
var userPattern = `#\sUser@Host:\s(?P<user>[a-zA-Z]+\[[a-zA-Z]+\])\s@\s.*Id:\s+(?P<id>[0-9]+)`
var userPatternRegex = regexp.MustCompile(userPattern)
func FieldsFromRDSSlowquery(rawlog string) map[string]interface{} {
out := map[string]interface{}{}
match := userPatternRegex.FindStringSubmatch(rawlog)
if len(match) == 3 {
out["user"] = match[1]
out["user_id"] = match[2]
}
return out
}
// MetricsRoute represents a metrics kv log route
type MetricsRoute struct {
Series string
@ -174,6 +204,17 @@ func getStringArray(json map[string]interface{}, key string) []string {
// LogRoutes a type alias to make it easier to add route specific filter functions
type LogRoutes []map[string]interface{}
// RuleNames returns a list of the names of the rules
func (l LogRoutes) RuleNames() []string {
names := []string{}
for _, route := range l {
name := getStringValue(route, "rule")
names = append(names, name)
}
return names
}
// MetricsRoutes filters the LogRoutes and returns a list of MetricsRoutes structs
func (l LogRoutes) MetricsRoutes() []MetricsRoute {
routes := []MetricsRoute{}
@ -354,18 +395,22 @@ func ExtractKVMeta(kvlog map[string]interface{}) KVMeta {
}
// ParseAndEnhance extracts fields from a log line, and does some post-processing to rename/add fields
func ParseAndEnhance(line string, env string, stringifyNested bool, renameESReservedFields bool, minimumTimestamp time.Time) (map[string]interface{}, error) {
out := map[string]interface{}{}
func ParseAndEnhance(line string, env string) (map[string]interface{}, error) {
syslog, syslogErr := FieldsFromSyslog(line)
if syslogErr == nil {
return decodeSyslog(syslog, env)
}
syslogFields, err := FieldsFromSyslog(line)
if err != nil {
return map[string]interface{}{}, err
fluentLog, fluentErr := FieldsFromFluentbitLog(line)
if fluentErr == nil {
return decodeFluent(fluentLog, env)
}
for k, v := range syslogFields {
out[k] = v
}
rawlog := syslogFields["rawlog"].(string)
programname := syslogFields["programname"].(string)
return nil, fmt.Errorf("unable to parse log line with errors `%v` and `%v`", syslogErr, fluentErr)
}
func decodeSyslog(syslog map[string]interface{}, env string) (map[string]interface{}, error) {
rawlog := syslog["rawlog"].(string)
// Try pulling Kayvee fields out of message
kvFields, err := FieldsFromKayvee(rawlog)
@ -375,94 +420,39 @@ func ParseAndEnhance(line string, env string, stringifyNested bool, renameESRese
}
} else {
for k, v := range kvFields {
out[k] = v
syslog[k] = v
}
}
// Try pulling RDS slowquery logs fields out of message
if syslog["hostname"] == "aws-rds" {
slowQueryFields := FieldsFromRDSSlowquery(rawlog)
for k, v := range slowQueryFields {
syslog[k] = v
}
}
// Inject additional fields that are useful in log-searching and other business logic
out["env"] = env
syslog["env"] = env
// Sometimes its useful to force `container_{env,app,task}`. A specific use-case is writing Docker events.
// A separate container monitors for start/stop events, but we set the container values in such a way that
// the logs for these events will appear in context for the app that the user is looking at instead of the
// docker-events app.
forceEnv := ""
forceApp := ""
forceTask := ""
if cEnv, ok := out["container_env"]; ok {
forceEnv = cEnv.(string)
}
if cApp, ok := out["container_app"]; ok {
forceApp = cApp.(string)
}
if cTask, ok := out["container_task"]; ok {
forceTask = cTask.(string)
}
meta, err := getContainerMeta(programname, forceEnv, forceApp, forceTask)
if err == nil {
for k, v := range meta {
out[k] = v
}
}
// this can error, which indicates inability to extract container meta. That's fine, we can ignore that.
addContainterMetaToSyslog(syslog)
// ES dynamic mappings get finnicky once you start sending nested objects.
// E.g., if one app sends a field for the first time as an object, then any log
// sent by another app containing that field /not/ as an object will fail.
// One solution is to decode nested objects as strings.
if stringifyNested {
for k, v := range out {
_, ismap := v.(map[string]interface{})
_, isarr := v.([]interface{})
if ismap || isarr {
bs, _ := json.Marshal(v)
out[k] = string(bs)
}
}
}
// ES doesn't like fields that start with underscores.
if renameESReservedFields {
for oldKey, renamedKey := range esFieldRenames {
if val, ok := out[oldKey]; ok {
out[renamedKey] = val
delete(out, oldKey)
}
}
}
msgTime, ok := out["timestamp"].(time.Time)
if ok && !msgTime.After(minimumTimestamp) {
return map[string]interface{}{}, fmt.Errorf("message's timestamp < minimumTimestamp")
}
return out, nil
}
var esFieldRenames = map[string]string{
"_index": "kv__index",
"_uid": "kv__uid",
"_type": "kv__type",
"_id": "kv__id",
"_source": "kv__source",
"_size": "kv__size",
"_all": "kv__all",
"_field_names": "kv__field_names",
"_timestamp": "kv__timestamp",
"_ttl": "kv__ttl",
"_parent": "kv__parent",
"_routing": "kv__routing",
"_meta": "kv__meta",
return syslog, nil
}
const containerMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app
`arn%3Aaws%3Aecs%3Aus-(west|east)-[1-2]%3A[0-9]{12}%3Atask%2F` + // ARN cruft
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})` // task-id
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}|[a-z0-9]{32}|jr_[a-z0-9-]+)` // task-id (ECS, both EC2 and Fargate), Glue Job ID
var containerMetaRegex = regexp.MustCompile(containerMeta)
func getContainerMeta(programname, forceEnv, forceApp, forceTask string) (map[string]string, error) {
if programname == "" {
return map[string]string{}, fmt.Errorf("no programname")
var errBadProgramname = errors.New("invalid or missing programname")
func addContainterMetaToSyslog(syslog map[string]interface{}) (map[string]interface{}, error) {
programname, ok := syslog["programname"].(string)
if !ok || programname == "" {
return syslog, errBadProgramname
}
env := ""
@ -473,25 +463,116 @@ func getContainerMeta(programname, forceEnv, forceApp, forceTask string) (map[st
env = matches[0][1]
app = matches[0][2]
task = matches[0][4]
} else {
return syslog, errBadProgramname
}
if forceEnv != "" {
env = forceEnv
// Sometimes its useful to force `container_{env,app,task}`. A specific use-case is writing Docker events.
// A separate container monitors for start/stop events, but we set the container values in such a way that
// the logs for these events will appear in context for the app that the user is looking at instead of the
// docker-events app.
if existingEnv, ok := syslog["container_env"].(string); !ok || existingEnv == "" {
syslog["container_env"] = env
}
if forceApp != "" {
app = forceApp
if existingApp, ok := syslog["container_app"].(string); !ok || existingApp == "" {
syslog["container_app"] = app
}
if forceTask != "" {
task = forceTask
if existingTask, ok := syslog["container_task"].(string); !ok || existingTask == "" {
syslog["container_task"] = task
}
if env == "" || app == "" || task == "" {
return map[string]string{}, fmt.Errorf("unable to get one or more of env/app/task")
}
return map[string]string{
"container_env": env,
"container_app": app,
"container_task": task,
}, nil
return syslog, nil
}
const (
// These are the fields in the incoming fluentbit JSON that we expect the timestamp and log
// They are referenced below by the FluentLog type; those are the ones that matter.
fluentbitTimestampField = "fluent_ts"
fluentbitLogField = "log"
// This is what we get from the fluentbig config: `time_key_format: "%Y-%m-%dT%H:%M:%S.%L%z"`
fluentTimeFormat = "2006-01-02T15:04:05.999-0700"
)
// FluentLog represents is the set of fields extracted from an incoming fluentbit log
// The struct tags must line up with the JSON schema in the fluentbit configuration, see the comment for FieldsFromFluentBitlog
type FluentLog struct {
Log *string `json:"log"`
Timestamp *string `json:"fluent_ts"`
TaskArn string `json:"ecs_task_arn"`
TaskDefinition string `json:"ecs_task_definition"`
}
// FieldsFromFluentbitLog parses JSON object with fields indicating that it's coming from FluentBit
// Its return value shares a common interface with the Syslog output - with the same four key fields
// Unlike FieldsFromSyslog, it accepts its argument as bytes, since it will be used as bytes immediately and bytes is what comes off the firehose
// In theory, the format we recieve is highly customizable, so we'll be making the following assumptions:
// - All logs are coming from aws-fargate with the ecs-metadata fields (ecs_cluster, ecs_task_arn, ecs_task_definition) enabled
// - The timestamp is in the field given by the FluentBitTimestampField constant in this package
// - The timestamp is of the format of the constant fluentTimestampFormat
// - The log is in the field given by the FluentBitlogField constant in this package
// - The ecs_task_definition is of the from {environment}--{application}--.*
func FieldsFromFluentbitLog(line string) (*FluentLog, error) {
fluentFields := FluentLog{}
if err := json.Unmarshal([]byte(line), &fluentFields); err != nil {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: err}
}
return &fluentFields, nil
}
func decodeFluent(fluentLog *FluentLog, env string) (map[string]interface{}, error) {
out := map[string]interface{}{}
if fluentLog.Timestamp == nil || *fluentLog.Timestamp == "" {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: fmt.Errorf("no timestamp found in input field %s", fluentbitTimestampField)}
}
if timeValue, err := time.Parse(fluentTimeFormat, *fluentLog.Timestamp); err == nil {
out["timestamp"] = timeValue
} else {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: fmt.Errorf("timestamp has bad format: %s", *fluentLog.Timestamp)}
}
if fluentLog.Log == nil {
return nil, BadLogFormatError{Format: "fluentbit", DecodingError: fmt.Errorf("no log found in input field %s", fluentbitLogField)}
}
log := *fluentLog.Log
out["rawlog"] = log
out["decoder_msg_type"] = "fluentbit"
out["hostname"] = "aws-fargate"
// best effort to add container_env|app|task
if parts := strings.SplitN(fluentLog.TaskDefinition, "--", 3); len(parts) == 3 {
out["container_env"] = parts[0]
out["container_app"] = parts[1]
}
if idx := strings.LastIndex(fluentLog.TaskArn, "/"); idx != -1 {
out["container_task"] = fluentLog.TaskArn[idx+1:]
}
kvFields, err := FieldsFromKayvee(log)
if err == nil {
for k, v := range kvFields {
out[k] = v
}
}
// Inject additional fields that are useful in log-searching and other business logic; mimicking syslog behavior
out["env"] = env
return out, nil
}
type BadLogFormatError struct {
Format string
DecodingError error
}
func (b BadLogFormatError) Error() string {
return fmt.Sprintf("trying to decode log as format %s: %v", b.Format, b.DecodingError)
}
func (b BadLogFormatError) Unwrap() error {
return b.DecodingError
}

View file

@ -1,7 +1,9 @@
package decode
import (
"errors"
"fmt"
"sort"
"testing"
"time"
@ -64,6 +66,19 @@ func TestKayveeDecoding(t *testing.T) {
},
ExpectedError: nil,
},
Spec{
Title: "Reserved fields are respected",
Input: `prefix {"a":"b","prefix":"no-override","postfix":"no-override",` +
`"decoder_msg_type":"no-override","timestamp":"no-override",` +
`"hostname":"no-override","rawlog":"no-override"} postfix`,
ExpectedOutput: map[string]interface{}{
"prefix": "prefix ",
"postfix": " postfix",
"a": "b",
"decoder_msg_type": "Kayvee",
},
ExpectedError: nil,
},
Spec{
Title: "Returns NonKayveeError if not JSON in body",
Input: `prefix { postfix`,
@ -126,10 +141,11 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_TraditionalFileFormat with simple log body",
Input: `Oct 25 10:20:37 some-host docker/fa3a5e338a47[1294]: log body`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime,
"hostname": "some-host",
"programname": "docker/fa3a5e338a47",
"rawlog": "log body",
"timestamp": logTime,
"hostname": "some-host",
"programname": "docker/fa3a5e338a47",
"rawlog": "log body",
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
@ -137,10 +153,11 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_TraditionalFileFormat with haproxy access log body",
Input: `Apr 5 21:45:54 influx-service docker/0000aa112233[1234]: [httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime2,
"hostname": "influx-service",
"programname": "docker/0000aa112233",
"rawlog": "[httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms",
"timestamp": logTime2,
"hostname": "influx-service",
"programname": "docker/0000aa112233",
"rawlog": "[httpd] 2017/04/05 21:45:54 172.17.42.1 - heka [05/Apr/2017:21:45:54 +0000] POST /write?db=foo&precision=ms HTTP/1.1 204 0 - Go 1.1 package http 123456-1234-1234-b11b-000000000000 13.688672ms",
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
@ -148,10 +165,11 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_TraditionalFileFormat",
Input: `Apr 5 21:45:54 mongodb-some-machine whackanop: 2017/04/05 21:46:11 found 0 ops`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime2,
"hostname": "mongodb-some-machine",
"programname": "whackanop",
"rawlog": "2017/04/05 21:46:11 found 0 ops",
"timestamp": logTime2,
"hostname": "mongodb-some-machine",
"programname": "whackanop",
"rawlog": "2017/04/05 21:46:11 found 0 ops",
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
@ -159,10 +177,35 @@ func TestSyslogDecoding(t *testing.T) {
Title: "Parses Rsyslog_ FileFormat with Kayvee payload",
Input: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
Spec{
Title: "Parses Rsyslog_ FileFormat without PID",
Input: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
Spec{
Title: "Parses Rsyslog_ FileFormat with simple log body for slowquery",
Input: `2017-04-05T21:57:46.794862+00:00 aws-rds production-aurora-test-db: Slow query: select * from table.`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "aws-rds",
"programname": "production-aurora-test-db",
"rawlog": "Slow query: select * from table.",
"decoder_msg_type": "syslog",
},
ExpectedError: nil,
},
@ -179,25 +222,18 @@ func TestSyslogDecoding(t *testing.T) {
fields, err := FieldsFromSyslog(spec.Input)
if spec.ExpectedError != nil {
assert.Error(err)
assert.IsType(spec.ExpectedError, err)
} else {
assert.NoError(err)
assert.True(errors.As(err, &spec.ExpectedError))
return
}
assert.NoError(err)
assert.Equal(spec.ExpectedOutput, fields)
})
}
}
type ParseAndEnhanceInput struct {
Line string
StringifyNested bool
RenameESReservedFields bool
MinimumTimestamp time.Time
}
type ParseAndEnhanceSpec struct {
Title string
Input ParseAndEnhanceInput
Line string
ExpectedOutput map[string]interface{}
ExpectedError error
}
@ -216,10 +252,15 @@ func TestParseAndEnhance(t *testing.T) {
}
logTime3 = logTime3.UTC()
logTime4, err := time.Parse(fluentTimeFormat, "2020-08-13T21:10:57.000+0000")
if err != nil {
t.Fatal(err)
}
specs := []ParseAndEnhanceSpec{
ParseAndEnhanceSpec{
Title: "Parses a Kayvee log line from an ECS app",
Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`},
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished"}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
@ -238,7 +279,7 @@ func TestParseAndEnhance(t *testing.T) {
},
ParseAndEnhanceSpec{
Title: "Parses a Kayvee log line from an ECS app, with override to container_app",
Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished","container_app":"force-app"}`},
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished","container_app":"force-app"}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
@ -257,28 +298,29 @@ func TestParseAndEnhance(t *testing.T) {
},
ParseAndEnhanceSpec{
Title: "Parses a non-Kayvee log line",
Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: some log`},
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: some log`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `some log`,
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `some log`,
"env": "deploy-env",
"decoder_msg_type": "syslog",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Fails to parse non-RSyslog log line",
Input: ParseAndEnhanceInput{Line: `not rsyslog`},
Title: "Fails to parse non-RSyslog, non-Fluent log line",
Line: `not rsyslog`,
ExpectedOutput: map[string]interface{}{},
ExpectedError: &syslogparser.ParserError{},
ExpectedError: fmt.Errorf(""),
},
ParseAndEnhanceSpec{
Title: "Parses JSON values",
Input: ParseAndEnhanceInput{Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`},
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
@ -296,222 +338,323 @@ func TestParseAndEnhance(t *testing.T) {
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Has the option to stringify object values",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`,
StringifyNested: true,
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": {"a":"b"}}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"nested": `{"a":"b"}`,
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Has the option to stringify array values",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": [{"a":"b"}]}`,
StringifyNested: true,
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "nested": [{"a":"b"}]}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"nested": `[{"a":"b"}]`,
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Has the option to rename reserved ES fields",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"kv__source": "a",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Errors if logTime < MinimumTimestamp",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(100 * time.Hour * 24 * 365), // good thru year 2117
},
ExpectedOutput: map[string]interface{}{},
ExpectedError: fmt.Errorf(""),
},
ParseAndEnhanceSpec{
Title: "Accepts logs if logTime > MinimumTimestamp",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(-100 * time.Hour * 24 * 365), // good thru year 2117
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"kv__source": "a",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Accepts logs if logTime > MinimumTimestamp",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46.794862+00:00 ip-10-0-0-0 env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef[3291]: 2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(-100 * time.Hour * 24 * 365), // good thru year 2117
},
ExpectedOutput: map[string]interface{}{
"timestamp": logTime3,
"hostname": "ip-10-0-0-0",
"programname": `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"rawlog": `2017/04/05 21:57:46 some_file.go:10: {"title":"request_finished", "_source": "a"}`,
"title": "request_finished",
"decoder_msg_type": "Kayvee",
"prefix": "2017/04/05 21:57:46 some_file.go:10: ",
"postfix": "",
"env": "deploy-env",
"container_env": "env",
"container_app": "app",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
"kv__source": "a",
},
ExpectedError: nil,
},
ParseAndEnhanceSpec{
Title: "Log with timestamp time.RFC3339 format",
Input: ParseAndEnhanceInput{
Line: `2017-04-05T21:57:46+00:00 mongo-docker-pipeline-r10-4 diamond[24099] ` +
`Signal Received: 15`,
RenameESReservedFields: true,
MinimumTimestamp: time.Now().Add(-100 * time.Hour * 24 * 365), // year 2117
},
Line: `2017-04-05T21:57:46+00:00 mongo-docker-pipeline-r10-4 diamond[24099] Signal Received: 15`,
ExpectedOutput: map[string]interface{}{
"env": "deploy-env",
"hostname": "mongo-docker-pipeline-r10-4",
"programname": "diamond",
"rawlog": "Signal Received: 15",
"timestamp": logTime2,
"env": "deploy-env",
"hostname": "mongo-docker-pipeline-r10-4",
"programname": "diamond",
"decoder_msg_type": "syslog",
"rawlog": "Signal Received: 15",
"timestamp": logTime2,
},
},
ParseAndEnhanceSpec{
Title: "RDS Slowquery Log rdsadmin",
Line: `2017-04-05T21:57:46+00:00 aws-rds production-aurora-test-db: Slow query: # Time: 190921 16:02:59
# User@Host: rdsadmin[rdsadmin] @ localhost [] Id: 1
# Query_time: 22.741550 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0SET timestamp=1569081779;call action start_seamless_scaling('AQEAAB1P/PAIqvTHEQFJAEkojZUoH176FGJttZ62JF5QmRehaf0S0VFTa+5MPJdYQ9k0/sekBlnMi8U=', 300000, 2);
SET timestamp=1569862702;`,
ExpectedOutput: map[string]interface{}{
"env": "deploy-env",
"hostname": "aws-rds",
"programname": "production-aurora-test-db",
"decoder_msg_type": "syslog",
"rawlog": "Slow query: # Time: 190921 16:02:59\n# User@Host: rdsadmin[rdsadmin] @ localhost [] Id: 1\n# Query_time: 22.741550 Lock_time: 0.000000 Rows_sent: 0 Rows_examined: 0SET timestamp=1569081779;call action start_seamless_scaling('AQEAAB1P/PAIqvTHEQFJAEkojZUoH176FGJttZ62JF5QmRehaf0S0VFTa+5MPJdYQ9k0/sekBlnMi8U=', 300000, 2);\nSET timestamp=1569862702;",
"timestamp": logTime2,
"user": "rdsadmin[rdsadmin]",
"user_id": "1",
},
},
ParseAndEnhanceSpec{
Title: "RDS Slowquery Log clever user",
Line: `2017-04-05T21:57:46+00:00 aws-rds production-aurora-test-db: Slow query: # Time: 190921 16:02:59
# User@Host: clever[clever] @ [10.1.11.112] Id: 868
# Query_time: 2.000270 Lock_time: 0.000000 Rows_sent: 1 Rows_examined: 0
SET timestamp=1571090308;
select sleep(2);`,
ExpectedOutput: map[string]interface{}{
"env": "deploy-env",
"hostname": "aws-rds",
"programname": "production-aurora-test-db",
"decoder_msg_type": "syslog",
"rawlog": "Slow query: # Time: 190921 16:02:59\n# User@Host: clever[clever] @ [10.1.11.112] Id: 868\n# Query_time: 2.000270 Lock_time: 0.000000 Rows_sent: 1 Rows_examined: 0\nSET timestamp=1571090308;\nselect sleep(2);",
"timestamp": logTime2,
"user": "clever[clever]",
"user_id": "868",
},
},
{
Title: "Valid fluent log",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_app": "app",
"container_env": "env",
"container_task": "7c1ccb5cb5c44582808b9e516b479eb6",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
ExpectedError: nil,
},
{
Title: "fluent log missing ecs_task_definition still parses and has container_task",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_task": "7c1ccb5cb5c44582808b9e516b479eb6",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
},
{
Title: "fluent log with bad ecs_task_definition format still parses and has container_task",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_task": "7c1ccb5cb5c44582808b9e516b479eb6",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
},
{
Title: "fluent log missing ecs_task_arn still parses and has containter_env and app",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4, // time.Date(2020, 8, 13, 21, 10, 57, 0, time.UTC),
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_env": "env",
"container_app": "app",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
},
{
Title: "fluent log missing timestamp is an error",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"deploy_env\":\"env\",\"level\":\"info\",\"pod-account\":\"589690932525\",\"pod-id\":\"2870425d-e4b1-4869-b2b7-a6d9fade9be1\",\"pod-region\":\"us-east-1\",\"pod-shortname\":\"us-east-1-dev-canary-2870425d\",\"source\":\"app-service\",\"title\":\"NumFDs\",\"type\":\"gauge\",\"value\":33,\"via\":\"process-metrics\"}",
"source": "stderr"
}`,
ExpectedOutput: nil,
ExpectedError: BadLogFormatError{},
},
{
Title: "fluent log with bad timestamp format is an error",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000Z",
"log": "2020/08/13 21:10:57 {\"deploy_env\":\"env\",\"level\":\"info\",\"pod-account\":\"589690932525\",\"pod-id\":\"2870425d-e4b1-4869-b2b7-a6d9fade9be1\",\"pod-region\":\"us-east-1\",\"pod-shortname\":\"us-east-1-dev-canary-2870425d\",\"source\":\"app-service\",\"title\":\"NumFDs\",\"type\":\"gauge\",\"value\":33,\"via\":\"process-metrics\"}",
"source": "stderr"
}`,
ExpectedOutput: nil,
ExpectedError: BadLogFormatError{},
},
{
Title: "Valid fluent log with overrides for container_env, app, and task",
Line: `{
"container_id": "b17752cdd47a480712eca6c9774d782f68b50a876446faec5e26c9e8846bd29e",
"container_name": "/ecs-env--app--d--2-2-env--app-e295a2dcf6f9ac849101",
"ecs_cluster": "arn:aws:ecs:us-east-1:589690932525:cluster/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1",
"ecs_task_arn": "arn:aws:ecs:us-east-1:589690932525:task/pod-2870425d-e4b1-4869-b2b7-a6d9fade9be1/7c1ccb5cb5c44582808b9e516b479eb6",
"ecs_task_definition": "env--app--d--2:2",
"fluent_ts": "2020-08-13T21:10:57.000+0000",
"log": "2020/08/13 21:10:57 {\"title\": \"request-finished\", \"container_env\": \"env2\", \"container_task\": \"task\", \"container_app\": \"app2\"}",
"source": "stderr"
}`,
ExpectedOutput: map[string]interface{}{
"timestamp": logTime4,
"rawlog": "2020/08/13 21:10:57 {\"title\": \"request-finished\", \"container_env\": \"env2\", \"container_task\": \"task\", \"container_app\": \"app2\"}",
"hostname": "aws-fargate",
"decoder_msg_type": "Kayvee",
"title": "request-finished",
"container_app": "app2",
"container_env": "env2",
"container_task": "task",
"prefix": "2020/08/13 21:10:57 ",
"postfix": "",
"env": "deploy-env",
},
ExpectedError: nil,
},
}
for _, spec := range specs {
t.Run(fmt.Sprintf(spec.Title), func(t *testing.T) {
assert := assert.New(t)
fields, err := ParseAndEnhance(spec.Input.Line, "deploy-env", spec.Input.StringifyNested, spec.Input.RenameESReservedFields, spec.Input.MinimumTimestamp)
fields, err := ParseAndEnhance(spec.Line, "deploy-env")
if spec.ExpectedError != nil {
assert.Error(err)
assert.IsType(spec.ExpectedError, err)
} else {
assert.NoError(err)
assert.True(errors.As(err, &spec.ExpectedError))
return
}
assert.NoError(err)
assert.Equal(spec.ExpectedOutput, fields)
})
}
}
func TestGetContainerMeta(t *testing.T) {
assert := assert.New(t)
t.Log("Must have a programname to get container meta")
programname := ""
_, err := getContainerMeta(programname, "", "", "")
assert.Error(err)
type containerMetaSpec struct {
description string
input map[string]interface{}
wantErr bool
wantContainerEnv string
wantContainerApp string
wantContainerTask string
}
t.Log("Can parse a programname")
programname = `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`
meta, err := getContainerMeta(programname, "", "", "")
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": "env1",
"container_app": "app2",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
}, meta)
tests := []containerMetaSpec{
{
description: "Must have a programname to get container meta",
input: map[string]interface{}{
"programname": "",
},
wantErr: true,
},
{
description: "Can parse a programname",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
},
wantContainerEnv: "env1",
wantContainerApp: "app2",
wantContainerTask: "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
{
description: "Can override just 'env'",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_env": "force-env",
},
wantContainerEnv: "force-env",
wantContainerApp: "app2",
wantContainerTask: "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
{
description: "Can override just 'app'",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_app": "force-app",
},
wantContainerEnv: "env1",
wantContainerApp: "force-app",
wantContainerTask: "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
},
{
description: "Can override just 'task'",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_task": "force-task",
},
wantContainerEnv: "env1",
wantContainerApp: "app2",
wantContainerTask: "force-task",
},
{
description: "Can override all fields",
input: map[string]interface{}{
"programname": `env1--app2/arn%3Aaws%3Aecs%3Aus-west-1%3A589690932525%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`,
"container_task": "force-task",
"container_app": "force-app",
"container_env": "force-env",
},
wantContainerEnv: "force-env",
wantContainerApp: "force-app",
wantContainerTask: "force-task",
},
}
t.Log("Can override just 'env'")
overrideEnv := "force-env"
meta, err = getContainerMeta(programname, overrideEnv, "", "")
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": overrideEnv,
"container_app": "app2",
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
}, meta)
for _, tcase := range tests {
assert := assert.New(t)
t.Log("Can override just 'app'")
overrideApp := "force-app"
meta, err = getContainerMeta(programname, "", overrideApp, "")
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": "env1",
"container_app": overrideApp,
"container_task": "abcd1234-1a3b-1a3b-1234-d76552f4b7ef",
}, meta)
syslog := map[string]interface{}{}
for k, v := range tcase.input {
syslog[k] = v
}
t.Log("Can override just 'task'")
overrideTask := "force-task"
meta, err = getContainerMeta(programname, "", "", overrideTask)
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": "env1",
"container_app": "app2",
"container_task": overrideTask,
}, meta)
newSyslog, err := addContainterMetaToSyslog(syslog)
if tcase.wantErr {
assert.Error(err)
continue
}
for k, v := range newSyslog {
switch k {
case "container_env":
assert.Equal(v, tcase.wantContainerEnv)
case "container_app":
assert.Equal(v, tcase.wantContainerApp)
case "container_task":
assert.Equal(v, tcase.wantContainerTask)
default:
assert.Equal(v, tcase.input[k])
}
}
}
t.Log("Can override all fields")
programname = `env--app/arn%3Aaws%3Aecs%3Aus-west-1%3A999988887777%3Atask%2Fabcd1234-1a3b-1a3b-1234-d76552f4b7ef`
meta, err = getContainerMeta(programname, overrideEnv, overrideApp, overrideTask)
assert.NoError(err)
assert.Equal(map[string]string{
"container_env": overrideEnv,
"container_app": overrideApp,
"container_task": overrideTask,
}, meta)
}
func TestExtractKVMeta(t *testing.T) {
@ -523,6 +666,7 @@ func TestExtractKVMeta(t *testing.T) {
Team string
Language string
Version string
Names []string
ExpectedMetricsRoutes []MetricsRoute
ExpectedAnalyticsRoutes []AnalyticsRoute
ExpectedNotificationRoutes []NotificationRoute
@ -530,10 +674,12 @@ func TestExtractKVMeta(t *testing.T) {
}{
{
Description: "log line with no routes",
Names: []string{},
Log: map[string]interface{}{"hi": "hello!"},
},
{
Description: "empty _kvmeta",
Names: []string{},
Log: map[string]interface{}{
"hi": "hello!",
"_kvmeta": map[string]interface{}{},
@ -544,6 +690,7 @@ func TestExtractKVMeta(t *testing.T) {
Team: "green",
Version: "three",
Language: "tree",
Names: []string{},
Log: map[string]interface{}{
"hi": "hello!",
"_kvmeta": map[string]interface{}{
@ -558,6 +705,7 @@ func TestExtractKVMeta(t *testing.T) {
Team: "green",
Version: "three",
Language: "tree",
Names: []string{"cool", "cool2"},
ExpectedMetricsRoutes: []MetricsRoute{
{
Series: "1,1,2,3,5,8,13",
@ -601,6 +749,7 @@ func TestExtractKVMeta(t *testing.T) {
Team: "green",
Version: "christmas",
Language: "tree",
Names: []string{"what's-this?", "there's-app-invites-everywhere"},
ExpectedAnalyticsRoutes: []AnalyticsRoute{
{
Series: "what-is-this",
@ -637,6 +786,7 @@ func TestExtractKVMeta(t *testing.T) {
Team: "slack",
Version: "evergreen",
Language: "markdown-ish",
Names: []string{"did-you-know", "what's-the-catch"},
ExpectedNotificationRoutes: []NotificationRoute{
{
RuleName: "did-you-know",
@ -683,12 +833,13 @@ func TestExtractKVMeta(t *testing.T) {
Team: "a-team",
Version: "old",
Language: "jive",
Names: []string{"last-call", "watch-out-now"},
ExpectedAlertRoutes: []AlertRoute{
{
RuleName: "last-call",
Series: "doing-it-til-we-fall",
Dimensions: []string{"who", "where"},
StatType: "guage",
StatType: "gauge",
ValueField: "status",
},
{
@ -711,7 +862,7 @@ func TestExtractKVMeta(t *testing.T) {
"rule": "last-call",
"series": "doing-it-til-we-fall",
"dimensions": []interface{}{"who", "where"},
"stat_type": "guage",
"stat_type": "gauge",
"value_field": "status",
},
map[string]interface{}{
@ -729,6 +880,9 @@ func TestExtractKVMeta(t *testing.T) {
Team: "diversity",
Version: "kv-routes",
Language: "understanding",
Names: []string{
"all-combos", "there's-app-invites-everywhere", "what's-the-catch", "last-call",
},
ExpectedMetricsRoutes: []MetricsRoute{
{
RuleName: "all-combos",
@ -757,7 +911,7 @@ func TestExtractKVMeta(t *testing.T) {
RuleName: "last-call",
Series: "doing-it-til-we-fall",
Dimensions: []string{"who", "where"},
StatType: "guage",
StatType: "gauge",
ValueField: "status",
},
},
@ -790,7 +944,7 @@ func TestExtractKVMeta(t *testing.T) {
"rule": "last-call",
"series": "doing-it-til-we-fall",
"dimensions": []interface{}{"who", "where"},
"stat_type": "guage",
"stat_type": "gauge",
"value_field": "status",
},
},
@ -807,6 +961,10 @@ func TestExtractKVMeta(t *testing.T) {
assert.Equal(test.Language, kvmeta.Language)
assert.Equal(test.Version, kvmeta.Version)
assert.EqualValues(
sort.StringSlice(test.Names), sort.StringSlice(kvmeta.Routes.RuleNames()),
)
assert.Equal(len(test.ExpectedMetricsRoutes), len(kvmeta.Routes.MetricsRoutes()))
for idx, route := range kvmeta.Routes.MetricsRoutes() {
expected := test.ExpectedMetricsRoutes[idx]
@ -853,7 +1011,7 @@ func BenchmarkFieldsFromSyslog(b *testing.B) {
func BenchmarkParseAndEnhance(b *testing.B) {
for n := 0; n < b.N; n++ {
_, err := ParseAndEnhance(benchmarkLine, "env", false, false, time.Time{})
_, err := ParseAndEnhance(benchmarkLine, "env")
if err != nil {
b.FailNow()
}

53
glide.lock generated
View file

@ -1,53 +0,0 @@
hash: 6cc2c955c4d70fea747e0056aa63e8fe6a0f8d8c9baca20cf3a5a34d0c55b1fb
updated: 2017-08-16T20:57:02.3853046Z
imports:
- name: github.com/aws/aws-sdk-go
version: 819b71cf8430e434c1eee7e7e8b0f2b8870be899
subpackages:
- aws
- aws/session
- service/firehose
- name: github.com/Clever/kinesis-to-firehose
version: dca69c87e1662c7b1f55581399544d05fdcb09ab
subpackages:
- writer
- name: github.com/Clever/syslogparser
version: fb28ad3e4340c046323b7beba685a72fd12ecbe8
subpackages:
- rfc3164
- name: github.com/jeromer/syslogparser
version: 0e4ae46ea3f08de351074b643d649d5d00661a3c
- name: github.com/xeipuuv/gojsonpointer
version: e0fe6f68307607d540ed8eac07a342c33fa1b54a
- name: github.com/xeipuuv/gojsonreference
version: e02fc20de94c78484cd5ffb007f8af96be030a45
- name: github.com/xeipuuv/gojsonschema
version: e18f0065e8c148fcf567ac43a3f8f5b66ac0720b
- name: golang.org/x/net
version: a6577fac2d73be281a500b310739095313165611
subpackages:
- context
- name: golang.org/x/time
version: f51c12702a4d776e4c1fa9b0fabab841babae631
subpackages:
- rate
- name: gopkg.in/Clever/kayvee-go.v6
version: 096364e316a52652d3493be702d8105d8d01db84
subpackages:
- logger
- router
- name: gopkg.in/yaml.v2
version: a5b47d31c556af34a302ce5d659e6fea44d90de0
testImports:
- name: github.com/davecgh/go-spew
version: 6d212800a42e8ab5c146b8ace3490ee17e5225f9
subpackages:
- spew
- name: github.com/pmezard/go-difflib
version: d8ed2627bdf02c080bf22230dbb337003b7aba2d
subpackages:
- difflib
- name: github.com/stretchr/testify
version: 69483b4bd14f5845b5a1e55bca19e954e827f1d0
subpackages:
- assert

View file

@ -1,23 +0,0 @@
package: github.com/Clever/amazon-kinesis-client-go
import:
- package: github.com/Clever/kinesis-to-firehose
subpackages:
- writer
- package: github.com/Clever/syslogparser
version: master
subpackages:
- rfc3164
- package: github.com/aws/aws-sdk-go
subpackages:
- aws
- aws/session
- service/firehose
- package: golang.org/x/time
subpackages:
- rate
- package: gopkg.in/Clever/kayvee-go.v6
version: ^6.0.0
testImport:
- package: github.com/stretchr/testify
subpackages:
- assert

22
go.mod Normal file
View file

@ -0,0 +1,22 @@
module github.com/amazon-kinesis-client-go
go 1.13
require (
github.com/Clever/amazon-kinesis-client-go v1.0.0
github.com/Clever/syslogparser v0.0.0-20170816194131-fb28ad3e4340
github.com/a8m/kinesis-producer v0.2.0
github.com/aws/aws-sdk-go v1.35.28 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang/protobuf v1.5.2
github.com/jeromer/syslogparser v0.0.0-20190429161531-5fbaaf06d9e7 // indirect
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d // indirect
github.com/stretchr/testify v1.7.0
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonschema v1.2.1-0.20200424115421-065759f9c3d7 // indirect
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e
gopkg.in/Clever/kayvee-go.v6 v6.24.1
gopkg.in/yaml.v2 v2.3.1-0.20200602174213-b893565b90ca // indirect
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 // indirect
launchpad.net/gocheck v0.0.0-20140225173054-000000000087 // indirect
)

63
go.sum Normal file
View file

@ -0,0 +1,63 @@
github.com/Clever/amazon-kinesis-client-go v1.0.0 h1:rG+hkHCSe+xMGk3asESg18tiEj8X6To8vj0TRa1pKvQ=
github.com/Clever/amazon-kinesis-client-go v1.0.0/go.mod h1:jLfi8QusUjdsYazmEH0DlSO4gE1WRzVE7cs4c/fOFdI=
github.com/Clever/syslogparser v0.0.0-20170816194131-fb28ad3e4340 h1:wr8lTqPJZQpZkZgllQtrD96SecXkAu5MxzY3yJgsuCg=
github.com/Clever/syslogparser v0.0.0-20170816194131-fb28ad3e4340/go.mod h1:e7Yy7RTiIMU9pZ+dcSviX3cpod8e0CEeTUPTgBbKlRE=
github.com/a8m/kinesis-producer v0.2.0 h1:Bd5Oi4dczbTLPIZwVbm02464LIFgBqmViFj//b098xc=
github.com/a8m/kinesis-producer v0.2.0/go.mod h1:CxoFe0Y49udKMnQPkC5S5VmZZy6a+Bef9otuoH96Pv0=
github.com/aws/aws-sdk-go v1.35.28 h1:S2LuRnfC8X05zgZLC8gy/Sb82TGv2Cpytzbzz7tkeHc=
github.com/aws/aws-sdk-go v1.35.28/go.mod h1:tlPOdRjfxPBpNIwqDj61rmsnA85v9jc0Ps9+muhnW+k=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/jeromer/syslogparser v0.0.0-20190429161531-5fbaaf06d9e7 h1:FRDp/rRU2sThfssdHIUATCKJOms8R6A9rrhdC1sLqFI=
github.com/jeromer/syslogparser v0.0.0-20190429161531-5fbaaf06d9e7/go.mod h1:mQyv/QAgjs9+PTi/iXveno+U86nKGsltjqf3ilYx4Bg=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d h1:ix3WmphUvN0GDd0DO9MH0v6/5xTv+Xm1bPN+1UJn58k=
github.com/jpillora/backoff v0.0.0-20170918002102-8eab2debe79d/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.1-0.20200424115421-065759f9c3d7 h1:tdnG+ZILOafvA29+pYKP3gauEbigHll3PHsf1GQa2ms=
github.com/xeipuuv/gojsonschema v1.2.1-0.20200424115421-065759f9c3d7/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e h1:EHBhcS0mlXEAVwNyO2dLfjToGsyY4j24pTs2ScHnX7s=
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
gopkg.in/Clever/kayvee-go.v6 v6.24.1 h1:lHxjlTFj57mfrq06PiCKIAJ0QuHHEqsyvSYvzb+gSFg=
gopkg.in/Clever/kayvee-go.v6 v6.24.1/go.mod h1:G0m6nBZj7Kdz+w2hiIaawmhXl5zp7E/K0ashol3Kb2A=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.1-0.20200602174213-b893565b90ca h1:oivFrl3Vo+KfpUmTDJvz91I+BWzDPOQ+0CNR5jwTHcg=
gopkg.in/yaml.v2 v2.3.1-0.20200602174213-b893565b90ca/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776 h1:tQIYjPdBoyREyB9XMu+nnTclpTYkz2zFM+lzLJFO4gQ=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54=
launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM=

View file

@ -1,54 +1,55 @@
# This is the default Clever Golang Makefile.
# It is stored in the dev-handbook repo, github.com/Clever/dev-handbook
# Please do not alter this file directly.
GOLANG_MK_VERSION := 0.1.4
GOLANG_MK_VERSION := 1.0.0
SHELL := /bin/bash
.PHONY: golang-godep-vendor golang-test-deps $(GODEP)
SYSTEM := $(shell uname -a | cut -d" " -f1 | tr '[:upper:]' '[:lower:]')
.PHONY: golang-test-deps golang-ensure-curl-installed
# set timezone to UTC for golang to match circle and deploys
export TZ=UTC
# go build flags for use across all commands which accept them
GO_BUILD_FLAGS := "-mod=vendor"
# if the gopath includes several directories, use only the first
GOPATH=$(shell echo $$GOPATH | cut -d: -f1)
# This block checks and confirms that the proper Go toolchain version is installed.
# It uses ^ matching in the semver sense -- you can be ahead by a minor
# version, but not a major version (patch is ignored).
# arg1: golang version
define golang-version-check
GOVERSION := $(shell go version | grep $(1))
_ := $(if \
$(shell go version | grep $(1)), \
@echo "", \
$(error "must be running Go version $(1)"))
_ := $(if \
$(shell \
expr >/dev/null \
`go version | cut -d" " -f3 | cut -c3- | cut -d. -f2 | sed -E 's/beta[0-9]+//'` \
\>= `echo $(1) | cut -d. -f2` \
\& \
`go version | cut -d" " -f3 | cut -c3- | cut -d. -f1` \
= `echo $(1) | cut -d. -f1` \
&& echo 1), \
@echo "", \
$(error must be running Go version ^$(1) - you are running $(shell go version | cut -d" " -f3 | cut -c3-)))
endef
export GO15VENDOREXPERIMENT=1
# FGT is a utility that exits with 1 whenever any stderr/stdout output is recieved.
# We pin its version since its a simple tool that does its job as-is;
# so we're defended against it breaking or changing in the future.
FGT := $(GOPATH)/bin/fgt
$(FGT):
go get github.com/GeertJohan/fgt
go get github.com/GeertJohan/fgt@262f7b11eec07dc7b147c44641236f3212fee89d
# Godep is a tool used to manage Golang dependencies in the style of the Go 1.5
# vendoring experiment.
GODEP := $(GOPATH)/bin/godep
$(GODEP):
go get -u github.com/tools/godep
golang-ensure-curl-installed:
@command -v curl >/dev/null 2>&1 || { echo >&2 "curl not installed. Please install curl."; exit 1; }
# Golint is a tool for linting Golang code for common errors.
# We pin its version because an update could add a new lint check which would make
# previously passing tests start failing without changing our code.
GOLINT := $(GOPATH)/bin/golint
$(GOLINT):
go get github.com/golang/lint/golint
# golang-vendor-deps installs all dependencies needed for different test cases.
golang-godep-vendor-deps: $(GODEP)
# golang-godep-vendor is a target for saving dependencies with the godep tool
# to the vendor/ directory. All nested vendor/ directories are deleted as they
# are not handled well by the Go toolchain.
# arg1: pkg path
define golang-godep-vendor
$(GODEP) save $(1)
@# remove any nested vendor directories
find vendor/ -path '*/vendor' -type d | xargs -IX rm -r X
endef
go get golang.org/x/lint/golint@738671d3881b9731cc63024d5d88cf28db875626
# golang-fmt-deps requires the FGT tool for checking output
golang-fmt-deps: $(FGT)
@ -57,7 +58,7 @@ golang-fmt-deps: $(FGT)
# arg1: pkg path
define golang-fmt
@echo "FORMATTING $(1)..."
@$(FGT) gofmt -l=true $(GOPATH)/src/$(1)/*.go
@PKG_PATH=$$(go list -f '{{.Dir}}' $(1)); $(FGT) gofmt -l=true $${PKG_PATH}/*.go
endef
# golang-lint-deps requires the golint tool for golang linting.
@ -67,7 +68,7 @@ golang-lint-deps: $(GOLINT)
# arg1: pkg path
define golang-lint
@echo "LINTING $(1)..."
@find $(GOPATH)/src/$(1)/*.go -type f | grep -v gen_ | xargs $(GOLINT)
@PKG_PATH=$$(go list -f '{{.Dir}}' $(1)); find $${PKG_PATH}/*.go -type f | grep -v gen_ | xargs $(GOLINT)
endef
# golang-lint-deps-strict requires the golint tool for golang linting.
@ -78,7 +79,7 @@ golang-lint-deps-strict: $(GOLINT) $(FGT)
# arg1: pkg path
define golang-lint-strict
@echo "LINTING $(1)..."
@find $(GOPATH)/src/$(1)/*.go -type f | grep -v gen_ | xargs $(FGT) $(GOLINT)
@PKG_PATH=$$(go list -f '{{.Dir}}' $(1)); find $${PKG_PATH}/*.go -type f | grep -v gen_ | xargs $(FGT) $(GOLINT)
endef
# golang-test-deps is here for consistency
@ -88,7 +89,7 @@ golang-test-deps:
# arg1: pkg path
define golang-test
@echo "TESTING $(1)..."
@go test -v $(1)
@go test $(GO_BUILD_FLAGS) -v $(1)
endef
# golang-test-strict-deps is here for consistency
@ -98,7 +99,7 @@ golang-test-strict-deps:
# arg1: pkg path
define golang-test-strict
@echo "TESTING $(1)..."
@go test -v -race $(1)
@go test -v $(GO_BUILD_FLAGS) -race $(1)
endef
# golang-vet-deps is here for consistency
@ -108,7 +109,7 @@ golang-vet-deps:
# arg1: pkg path
define golang-vet
@echo "VETTING $(1)..."
@go vet $(GOPATH)/src/$(1)/*.go
@go vet $(GO_BUILD_FLAGS) $(1)
endef
# golang-test-all-deps installs all dependencies needed for different test cases.
@ -136,7 +137,20 @@ $(call golang-vet,$(1))
$(call golang-test-strict,$(1))
endef
# golang-build: builds a golang binary. ensures CGO build is done during CI. This is needed to make a binary that works with a Docker alpine image.
# arg1: pkg path
# arg2: executable name
define golang-build
@echo "BUILDING..."
@if [ -z "$$CI" ]; then \
go build $(GO_BUILD_FLAGS) -o bin/$(2) $(1); \
else \
echo "-> Building CGO binary"; \
CGO_ENABLED=0 go build $(GO_BUILD_FLAGS) -installsuffix cgo -o bin/$(2) $(1); \
fi;
endef
# golang-update-makefile downloads latest version of golang.mk
golang-update-makefile:
@wget https://raw.githubusercontent.com/Clever/dev-handbook/master/make/golang.mk -O /tmp/golang.mk 2>/dev/null
@wget https://raw.githubusercontent.com/Clever/dev-handbook/master/make/golang-v1.mk -O /tmp/golang.mk 2>/dev/null
@if ! grep -q $(GOLANG_MK_VERSION) /tmp/golang.mk; then cp /tmp/golang.mk golang.mk && echo "golang.mk updated"; else echo "golang.mk is up-to-date"; fi

View file

@ -59,11 +59,13 @@ type ActionInitialize struct {
}
type Record struct {
SequenceNumber string `json:"sequenceNumber"`
SubSequenceNumber int `json:"subSequenceNumber"`
ApproximateArrivalTimestamp int `json:"approximateArrivalTimestamp"`
PartitionKey string `json:"partitionKey"`
Data string `json:"data"`
SequenceNumber string `json:"sequenceNumber"`
SubSequenceNumber int `json:"subSequenceNumber"`
PartitionKey string `json:"partitionKey"`
Data string `json:"data"`
// KCL v2 has a different format, causing failure when json.Unmarshal.
// This is not used anywhere currently, so commenting it out.
//ApproximateArrivalTimestamp int `json:"approximateArrivalTimestamp"`
}
type ActionProcessRecords struct {
@ -236,8 +238,13 @@ func (kclp *KCLProcess) handleLine(line string) (string, error) {
case ActionShutdown:
kclp.ioHandler.writeError("Received shutdown action...")
reason := action.Reason
// Shutdown should block until it's safe to shutdown the process
err = kclp.recordProcessor.Shutdown(action.Reason)
if action.Action == "shutdownRequested" {
reason = "SHUTDOWN_REQUESTED"
}
err = kclp.recordProcessor.Shutdown(reason)
if err != nil { // Log error and continue shutting down
kclp.ioHandler.writeError(fmt.Sprintf("ERR shutdown: %+#v", err))
}

View file

@ -1,20 +1,186 @@
// Package splitter provides functions for decoding various kinds of records that might come off of a kinesis stream.
// It is equipped to with the functions to unbundle KPL aggregates and CloudWatch log bundles,
// as well as apply appropriate decompression.
// KCL applications would be most interested in `SplitMessageIfNecessary` which can handle zlibbed records as well as
// CloudWatch bundles. KCL automatically unbundles KPL aggregates before passing the records to the consumer.
// Non-KCL applications (such as Lambdas consuming KPL-produced aggregates) should either use
// - KPLDeaggregate if the consumer purely wants to unbundle KPL aggregates, but will handle the raw records themselves.
// - Deaggregate if the consumer wants to apply the same decompress and split logic as SplitMessageIfNecessary
// in addition to the KPL splitting.
package splitter
import (
"bytes"
"compress/gzip"
"compress/zlib"
"crypto/md5"
"encoding/json"
"fmt"
"io/ioutil"
"regexp"
"strconv"
"time"
kpl "github.com/a8m/kinesis-producer"
"github.com/golang/protobuf/proto"
)
// The Amazon Kinesis Producer Library (KPL) aggregates multiple logical user records into a single
// Amazon Kinesis record for efficient puts.
// https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md
var kplMagicNumber = []byte{0xF3, 0x89, 0x9A, 0xC2}
// IsKPLAggregate checks a record for a KPL aggregate prefix.
// It is not necessary to call this before calling KPLDeaggregate.
func IsKPLAggregate(data []byte) bool {
return bytes.HasPrefix(data, kplMagicNumber)
}
// KPLDeaggregate takes a Kinesis record and converts it to one or more user records by applying KPL deaggregation.
// If the record begins with the 4-byte magic prefix that KPL uses, the single Kinesis record is split into its component user records.
// Otherwise, the return value is a singleton containing the original record.
func KPLDeaggregate(kinesisRecord []byte) ([][]byte, error) {
if !IsKPLAggregate(kinesisRecord) {
return [][]byte{kinesisRecord}, nil
}
src := kinesisRecord[len(kplMagicNumber) : len(kinesisRecord)-md5.Size]
checksum := kinesisRecord[len(kinesisRecord)-md5.Size:]
recordSum := md5.Sum(src)
for i, b := range checksum {
if b != recordSum[i] {
// either the data is corrupted or this is not a KPL aggregate
// either way, return the data as-is
return [][]byte{kinesisRecord}, nil
}
}
dest := new(kpl.AggregatedRecord)
err := proto.Unmarshal(src, dest)
if err != nil {
return nil, fmt.Errorf("unmarshalling proto: %v", err)
}
var records [][]byte
for _, userRecord := range dest.GetRecords() {
records = append(records, userRecord.Data)
}
return records, nil
}
// Deaggregate is a combination of KPLDeaggregate and SplitMessageIfNecessary
// First it tries to KPL-deaggregate. If unsuccessful, it calls SplitIfNecessary on the original record.
// If successful, it iterates over the individual user records and attempts to unzlib them.
// If a record inside an aggregate is in zlib format, the output will contain the unzlibbed version.
// If it is not zlibbed, the output will contain the record verbatim
// A similar result can be optained by calling KPLDeaggregate, then iterating over the results and callin SplitMessageIfNecessary.
// This function makes the assumption that after KPL-deaggregating, the results are not CloudWatch aggregates, so it doesn't need to check them for a gzip header.
// Also it lets us iterate over the user records one less time, since KPLDeaggregate loops over the records and we would need to loop again to unzlib.
//
// See the SplitMessageIfNecessary documentation for the format of output for CloudWatch log bundles.
func Deaggregate(kinesisRecord []byte) ([][]byte, error) {
if !IsKPLAggregate(kinesisRecord) {
return SplitMessageIfNecessary(kinesisRecord)
}
src := kinesisRecord[len(kplMagicNumber) : len(kinesisRecord)-md5.Size]
checksum := kinesisRecord[len(kinesisRecord)-md5.Size:]
recordSum := md5.Sum(src)
for i, b := range checksum {
if b != recordSum[i] {
// either the data is corrupted or this is not a KPL aggregate
// either way, return the data as-is
return [][]byte{kinesisRecord}, nil
}
}
dest := new(kpl.AggregatedRecord)
err := proto.Unmarshal(src, dest)
if err != nil {
return nil, fmt.Errorf("unmarshalling proto: %v", err)
}
var records [][]byte
for _, userRecord := range dest.GetRecords() {
record, err := unzlib(userRecord.Data)
if err != nil {
return nil, fmt.Errorf("unzlibbing record: %w", err)
}
records = append(records, record)
}
return records, nil
}
// SplitMessageIfNecessary recieves a user-record and returns a slice of one or more records.
// if the record is coming off of a kinesis stream and might be KPL aggregated, it needs to be deaggregated before calling this.
// This function handles three types of records:
// - records emitted from CWLogs Subscription (which are gzip compressed)
// - zlib compressed records (e.g. as compressed and emitted by Kinesis plugin for Fluent Bit
// - any other record (left unchanged)
//
// CloudWatch logs come as structured JSON. In the process of splitting, they are converted
// into an rsyslog format that allows fairly uniform parsing of the result across the
// AWS services that might emit logs to CloudWatch.
// Note that these timezone used in these syslog records is guessed based on the local env.
// If you need consistent timezones, set TZ=UTC in your environment.
func SplitMessageIfNecessary(userRecord []byte) ([][]byte, error) {
// First try the record as a CWLogs record
if IsGzipped(userRecord) {
return GetMessagesFromGzippedInput(userRecord)
}
unzlibRecord, err := unzlib(userRecord)
if err != nil {
return nil, err
}
// Process a single message, from KPL
return [][]byte{unzlibRecord}, nil
}
func unzlib(input []byte) ([]byte, error) {
zlibReader, err := zlib.NewReader(bytes.NewReader(input))
if err == nil {
unzlibRecord, err := ioutil.ReadAll(zlibReader)
if err != nil {
return nil, fmt.Errorf("reading zlib-compressed record: %v", err)
}
return unzlibRecord, nil
}
return input, nil
}
// LogEvent is a single log line within a LogEventBatch
type LogEvent struct {
ID string `json:"id"`
Timestamp int64 `json:"timestamp"`
Message string `json:"message"`
ID string `json:"id"`
Timestamp UnixTimestampMillis `json:"timestamp"`
Message string `json:"message"`
}
// UnixTimestampMillis is a time.Time that marshals (unmarshals) to (from) a unix timestamp with millisecond resolution.
type UnixTimestampMillis time.Time
func NewUnixTimestampMillis(ts int64) UnixTimestampMillis {
return UnixTimestampMillis(time.Unix(ts/millisPerSecond,
(ts%millisPerSecond)*nanosPerMillisecond))
}
func (t *UnixTimestampMillis) MarshalJSON() ([]byte, error) {
ts := time.Time(*t).UnixNano()
stamp := fmt.Sprint(ts / nanosPerMillisecond)
return []byte(stamp), nil
}
var millisPerSecond = int64(time.Second / time.Millisecond)
var nanosPerMillisecond = int64(time.Millisecond / time.Nanosecond)
func (t *UnixTimestampMillis) UnmarshalJSON(b []byte) error {
ts, err := strconv.ParseInt(string(b), 10, 64)
if err != nil {
return err
}
*t = NewUnixTimestampMillis(ts)
return nil
}
func (t *UnixTimestampMillis) Time() time.Time {
return time.Time(*t)
}
// LogEventBatch is a batch of multiple log lines, read from a KinesisStream with a CWLogs subscription
@ -67,43 +233,201 @@ const RFC3339Micro = "2006-01-02T15:04:05.999999-07:00"
// http://docs.aws.amazon.com/batch/latest/userguide/job_states.html
// "log stream name format is jobDefinitionName/default/ecs_task_id (this format may change in the future)."
const taskMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app
const awsBatchTaskMeta = `([a-z0-9-]+)--([a-z0-9-]+)\/` + // env--app
`default\/` +
`([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})` // task-id
var awsBatchTaskRegex = regexp.MustCompile(awsBatchTaskMeta)
var taskRegex = regexp.MustCompile(taskMeta)
// lambda log groups are of the form /aws/lambda/<env>--<app>
var awsLambdaLogGroupRegex = regexp.MustCompile(`^/aws/lambda/([a-z0-9-]+)--([a-z0-9-]+)$`)
var awsLambdaRequestIDRegex = regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`)
// fargate log groups are of the form /ecs/<env>--<app>
// fargate log streams are of the form fargate/<container name>/<ecs task id>
var awsFargateLogGroupRegex = regexp.MustCompile(`^/ecs/([a-z0-9-]+)--([a-z0-9-]+)$`)
var awsFargateLogStreamRegex = regexp.MustCompile(`^fargate/([a-z0-9-]+)--([a-z0-9-]+)/([a-z0-9]+)$`)
// RDS slowquery log groups are in the form of /aws/rds/cluster/<database name>/slowquery
var awsRDSLogGroupRegex = regexp.MustCompile(`^/aws/rds/cluster/([a-z0-9-]+)/slowquery$`)
// glue log groups are of the form /aws-glue/jobs/<env>/<app>/<pod-id>
// glue log streams are of the form <job id>-<"driver" | "1" | "progress-bar">
var awsGlueLogGroupRegex = regexp.MustCompile(`^/aws-glue/jobs/([a-z0-9-]+)/([a-z0-9-]+)/([a-z0-9-]+)$`)
var awsGlueLogStreamRegex = regexp.MustCompile(`^(jr_[a-z0-9-]+)-.*$`)
// arn and task cruft to satisfy parsing later on: https://github.com/Clever/amazon-kinesis-client-go/blob/94aacdf8339bd2cc8400d3bcb323dc1bce2c8422/decode/decode.go#L421-L425
const arnCruft = `/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F`
const taskCruft = `12345678-1234-1234-1234-555566667777`
type RSysLogMessage struct {
Timestamp time.Time
Hostname string
ProgramName string
Message string
}
func (r RSysLogMessage) String() string {
// Adding an extra Microsecond forces `Format` to include all 6 digits within the micorsecond format.
// Otherwise, time.Format omits trailing zeroes. (https://github.com/golang/go/issues/12472)
return fmt.Sprintf(`%s %s %s: %s`,
r.Timestamp.Add(time.Microsecond).Format(RFC3339Micro),
r.Hostname, r.ProgramName, r.Message)
}
func splitAWSBatch(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsBatchTaskRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
task := matches[0][3]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + task,
Hostname: "aws-batch",
Message: event.Message,
})
}
return out, true
}
func splitAWSLambda(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsLambdaLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
// find the request ID, e.g. 1f7fcc25-015f-11e8-a728-a1b6168ab9aa, set it as task
var task string
if matches := awsLambdaRequestIDRegex.FindAllString(event.Message, 1); len(matches) == 1 {
task = matches[0]
} else {
task = taskCruft // rsyslog message must contain a non-empty task ID to satisfy later parsing
}
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + task,
Hostname: "aws-lambda",
Message: event.Message,
})
}
return out, true
}
func splitAWSFargate(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsFargateLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
streamMatches := awsFargateLogStreamRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(streamMatches) != 1 {
return nil, false
}
ecsTaskID := streamMatches[0][3]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + ecsTaskID,
Hostname: "aws-fargate",
Message: event.Message,
})
}
return out, true
}
func splitAWSRDS(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsRDSLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
databaseName := matches[0][1]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: databaseName,
Hostname: "aws-rds",
Message: event.Message,
})
}
return out, true
}
func splitAWSGlue(b LogEventBatch) ([]RSysLogMessage, bool) {
matches := awsGlueLogGroupRegex.FindAllStringSubmatch(b.LogGroup, 1)
if len(matches) != 1 {
return nil, false
}
env := matches[0][1]
app := matches[0][2]
streamMatches := awsGlueLogStreamRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(streamMatches) != 1 {
return nil, false
}
jobID := streamMatches[0][1]
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
ProgramName: env + "--" + app + arnCruft + jobID,
Hostname: "aws-glue",
Message: event.Message,
})
}
return out, true
}
func splitDefault(b LogEventBatch) []RSysLogMessage {
out := []RSysLogMessage{}
for _, event := range b.LogEvents {
out = append(out, RSysLogMessage{
Timestamp: event.Timestamp.Time(),
Hostname: b.LogStream,
ProgramName: b.LogGroup + "--" + b.LogStream + arnCruft + taskCruft,
Message: event.Message,
})
}
return out
}
func stringify(rsyslogs []RSysLogMessage) [][]byte {
out := make([][]byte, len(rsyslogs))
for i := range rsyslogs {
out[i] = []byte(rsyslogs[i].String())
}
return out
}
// Split takes a LogEventBatch and separates into a slice of enriched log lines
// Lines are enhanced by adding an Rsyslog prefix, which should be handled correctly by
// the subsequent decoding logic.
func Split(b LogEventBatch) [][]byte {
env := "unknown"
app := "unknown"
task := "00001111-2222-3333-4444-555566667777"
matches := taskRegex.FindAllStringSubmatch(b.LogStream, 1)
if len(matches) == 1 {
env = matches[0][1]
app = matches[0][2]
task = matches[0][3]
if rsyslogMsgs, ok := splitAWSLambda(b); ok {
return stringify(rsyslogMsgs)
} else if rsyslogMsgs, ok := splitAWSFargate(b); ok {
return stringify(rsyslogMsgs)
} else if rsyslogMsgs, ok := splitAWSBatch(b); ok {
return stringify(rsyslogMsgs)
} else if rsyslogMsgs, ok := splitAWSRDS(b); ok {
return stringify(rsyslogMsgs)
} else if rsyslogMsgs, ok := splitAWSGlue(b); ok {
return stringify(rsyslogMsgs)
}
rsyslogPrefix := `%s %s %s[%d]: %s`
// programName is a mocked ARN in the format expected by our log decoders
programName := env + "--" + app + `/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F` + task
mockPid := 1
hostname := "aws-batch"
out := [][]byte{}
for _, event := range b.LogEvents {
// Adding an extra Microsecond forces `Format` to include all 6 digits within the micorsecond format.
// Otherwise, time.Format omits trailing zeroes. (https://github.com/golang/go/issues/12472)
nsecs := event.Timestamp*int64(time.Millisecond) + int64(time.Microsecond)
logTime := time.Unix(0, nsecs).UTC().Format(RFC3339Micro)
// Fake an RSyslog prefix, expected by consumers
formatted := fmt.Sprintf(rsyslogPrefix, logTime, hostname, programName, mockPid, event.Message)
out = append(out, []byte(formatted))
}
return out
return stringify(splitDefault(b))
}

View file

@ -3,13 +3,29 @@ package splitter
import (
"bytes"
"compress/gzip"
"compress/zlib"
"crypto/md5"
b64 "encoding/base64"
"encoding/json"
"os"
"testing"
"time"
"github.com/Clever/amazon-kinesis-client-go/decode"
kpl "github.com/a8m/kinesis-producer"
"github.com/golang/protobuf/proto"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestMain(m *testing.M) {
// In the conversion of CloudWatch LogEvent struct to an RSyslog struct to a string,
// the timezone used in the final string depends on the locally set timezone.
// in order for tests to pass, we set TZ to UTC
os.Setenv("TZ", "UTC")
os.Exit(m.Run())
}
func TestUnpacking(t *testing.T) {
input := "H4sIAAAAAAAAADWOTQuCQBRF/8ow6wj6ENRdhLXIClJoERKTvsZHOiPzxiLE/96YtTzcy72n4zUQCQnpuwEe8vXxkJ6O8XUfJclqG/EJ1y8FZkgq3RYvYfMy1pJcUGm5NbptXDZSYg2IekRqb5QbbCxqtcHKgiEeXrJvL3qCsgN2HIuxbtFpWFG7sdky8L1ZECwXc9+b/PUGgXPMfnrspxeydQn5A5VkJYjKlkzfWeGWUInhme1QASEx+qpNeZ/1H1PFPn3yAAAA"
@ -28,7 +44,7 @@ func TestUnpacking(t *testing.T) {
LogEvents: []LogEvent{
{
ID: "",
Timestamp: 1498519943285,
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "CWL CONTROL MESSAGE: Checking health of destination Kinesis stream.",
},
},
@ -98,7 +114,7 @@ func TestFullLoop(t *testing.T) {
assert.Equal(t, leb, output)
}
func TestSplit(t *testing.T) {
func TestSplitBatch(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
@ -108,20 +124,433 @@ func TestSplit(t *testing.T) {
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: 1498519943285,
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "some log line",
},
{
ID: "99999992387663833181953011865369295871402094815542181889",
Timestamp: 1498519943285,
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "another log line",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777[1]: some log line"),
[]byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777[1]: another log line"),
[]byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: some log line"),
[]byte("2017-06-26T23:32:23.285001+00:00 aws-batch env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: another log line"),
}
assert.Equal(t, expected, lines)
}
func TestSplitLambda(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/aws/lambda/env--app",
LogStream: "2018/01/24/[3]62695bfa96de46938f56b156f5235205",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "START RequestId: 8edbd53f-64c7-4a3c-bf1e-efeff40f6512 Version: 3",
},
{
ID: "99999992387663833181953011865369295871402094815542181889",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: `{"aws_request_id":"8edbd53f-64c7-4a3c-bf1e-efeff40f6512","level":"info","source":"app","title":"some-log-title"}`,
},
{
ID: "99999992387663833181953011865369295871402094815542181889",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: `Example message that doesn't contain a request ID`,
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-lambda env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F8edbd53f-64c7-4a3c-bf1e-efeff40f6512: START RequestId: 8edbd53f-64c7-4a3c-bf1e-efeff40f6512 Version: 3`),
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-lambda env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F8edbd53f-64c7-4a3c-bf1e-efeff40f6512: {"aws_request_id":"8edbd53f-64c7-4a3c-bf1e-efeff40f6512","level":"info","source":"app","title":"some-log-title"}`),
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-lambda env--app/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: Example message that doesn't contain a request ID`),
}
assert.Equal(t, expected, lines)
for i, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-lambda", enhanced["hostname"])
assert.Equal(t, "env", enhanced["container_env"])
assert.Equal(t, "app", enhanced["container_app"])
if i != len(expected)-1 /* last line doesn't have a request ID */ {
assert.Equal(t, "8edbd53f-64c7-4a3c-bf1e-efeff40f6512", enhanced["container_task"])
}
}
}
func TestSplitFargate(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/ecs/production--clever-com-router",
LogStream: "fargate/clever-dev--clever-com-router/27b22d5d68aa4bd3923c95e7f32a3852",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "Starting haproxy: haproxy.",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-fargate production--clever-com-router/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F27b22d5d68aa4bd3923c95e7f32a3852: Starting haproxy: haproxy.`),
}
assert.Equal(t, expected, lines)
for _, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-fargate", enhanced["hostname"])
assert.Equal(t, "production", enhanced["container_env"])
assert.Equal(t, "clever-com-router", enhanced["container_app"])
assert.Equal(t, "27b22d5d68aa4bd3923c95e7f32a3852", enhanced["container_task"])
}
}
func TestSplitDefault(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "vpn_flow_logs",
LogStream: "eni-43403819-all",
SubscriptionFilters: []string{"SomeSubscription"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "2 589690932525 eni-43403819 10.0.0.233 172.217.6.46 64067 443 17 8 3969 1516891809 1516891868 ACCEPT OK",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 eni-43403819-all vpn_flow_logs--eni-43403819-all/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: 2 589690932525 eni-43403819 10.0.0.233 172.217.6.46 64067 443 17 8 3969 1516891809 1516891868 ACCEPT OK`),
}
assert.Equal(t, expected, lines)
}
func TestSplitRDS(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/aws/rds/cluster/production-aurora-test-db/slowquery",
LogStream: "clever-dev-aurora-test-db",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "Slow query: select * from table.",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-rds production-aurora-test-db: Slow query: select * from table.`),
}
assert.Equal(t, expected, lines)
for _, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-rds", enhanced["hostname"])
assert.Equal(t, "production-aurora-test-db", enhanced["programname"])
assert.Equal(t, "Slow query: select * from table.", enhanced["rawlog"])
}
}
func TestSplitGlue(t *testing.T) {
input := LogEventBatch{
MessageType: "DATA_MESSAGE",
Owner: "123456789012",
LogGroup: "/aws-glue/jobs/clever-dev/analytics-district-participation/aae75f00",
LogStream: "jr_8927660fecacbe026ccab656cb80befea8102ac2023df531b92889b112aada28-1",
SubscriptionFilters: []string{"ForwardLogsToKinesis"},
LogEvents: []LogEvent{
{
ID: "99999992379011144044923130086453437181614530551221780480",
Timestamp: NewUnixTimestampMillis(1498519943285),
Message: "foo bar.",
},
},
}
lines := Split(input)
expected := [][]byte{
[]byte(`2017-06-26T23:32:23.285001+00:00 aws-glue clever-dev--analytics-district-participation/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2Fjr_8927660fecacbe026ccab656cb80befea8102ac2023df531b92889b112aada28: foo bar.`),
}
assert.Equal(t, expected, lines)
for _, line := range expected {
enhanced, err := decode.ParseAndEnhance(string(line), "")
require.Nil(t, err)
assert.Equal(t, "aws-glue", enhanced["hostname"])
assert.Equal(t, "clever-dev", enhanced["container_env"])
assert.Equal(t, "analytics-district-participation", enhanced["container_app"])
assert.Equal(t, "jr_8927660fecacbe026ccab656cb80befea8102ac2023df531b92889b112aada28", enhanced["container_task"])
}
}
func TestSplitIfNecesary(t *testing.T) {
// We provide three different inputs to batchedWriter.splitMessageIfNecessary
// plain text
// zlib compressed text
// gzip compressed CloudWatch logs batch
// we verify that the split function matches the input against the correct splitter
// and decodes it.
assert := assert.New(t)
plainTextInput := []byte("hello, world!")
records, err := SplitMessageIfNecessary(plainTextInput)
assert.NoError(err)
assert.Equal(
records,
[][]byte{[]byte("hello, world!")},
)
var z bytes.Buffer
zbuf := zlib.NewWriter(&z)
zbuf.Write([]byte("hello, world!"))
zbuf.Close()
zlibSingleInput := z.Bytes()
records, err = SplitMessageIfNecessary(zlibSingleInput)
assert.NoError(err)
assert.Equal(
records,
[][]byte{[]byte("hello, world!")},
)
// the details of this part aren't super important since the actual functionality is
// tested in other tests; for this test we just want to make sure that split function
// correctly realizes it's gzip and call the appropriate CW-log-splitting logic
var g bytes.Buffer
gbuf := gzip.NewWriter(&g)
cwLogBatch := LogEventBatch{
MessageType: "test",
Owner: "test",
LogGroup: "test",
LogStream: "test",
SubscriptionFilters: []string{""},
LogEvents: []LogEvent{{
ID: "test",
Timestamp: UnixTimestampMillis(time.Date(2020, time.September, 9, 9, 10, 10, 0, time.UTC)),
Message: "test",
}},
}
cwLogBatchJSON, _ := json.Marshal(cwLogBatch)
gbuf.Write(cwLogBatchJSON)
gbuf.Close()
gzipBatchInput := g.Bytes()
expectedRecord := []byte("2020-09-09T09:10:10.000001+00:00 test test--test/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: test")
records, err = SplitMessageIfNecessary(gzipBatchInput)
assert.NoError(err)
assert.Equal(
records,
[][]byte{expectedRecord},
)
}
func createKPLAggregate(input [][]byte, compress bool) []byte {
var partitionKeyIndex uint64 = 0
records := []*kpl.Record{}
for _, log := range input {
if compress {
var z bytes.Buffer
zbuf := zlib.NewWriter(&z)
zbuf.Write(log)
zbuf.Close()
log = z.Bytes()
}
records = append(records, &kpl.Record{
PartitionKeyIndex: &partitionKeyIndex,
Data: log,
})
}
logProto, err := proto.Marshal(&kpl.AggregatedRecord{
PartitionKeyTable: []string{"ecs_task_arn"},
Records: records,
})
if err != nil {
panic(err)
}
log := append(kplMagicNumber, logProto...)
logHash := md5.Sum(logProto)
return append(log, logHash[0:16]...)
}
func TestKPLDeaggregate(t *testing.T) {
type test struct {
description string
input []byte
output [][]byte
shouldError bool
}
tests := []test{
{
description: "non-aggregated record",
input: []byte("hello"),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "one kpl-aggregated record",
input: createKPLAggregate(
[][]byte{[]byte("hello")},
false,
),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "three kpl-aggregated record",
input: createKPLAggregate([][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
false,
),
output: [][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
shouldError: false,
},
}
for _, tt := range tests {
t.Run(tt.description, func(t *testing.T) {
out, err := KPLDeaggregate(tt.input)
if tt.shouldError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, out, tt.output)
})
}
}
func TestDeaggregate(t *testing.T) {
type test struct {
description string
input []byte
output [][]byte
shouldError bool
}
tests := []test{
{
description: "non-aggregated record",
input: []byte("hello"),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "one kpl-aggregated record",
input: createKPLAggregate(
[][]byte{[]byte("hello")},
false,
),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "three kpl-aggregated record",
input: createKPLAggregate([][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
false,
),
output: [][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
shouldError: false,
},
{
description: "one kpl-aggregated zlib record",
input: createKPLAggregate(
[][]byte{[]byte("hello")},
true,
),
output: [][]byte{[]byte("hello")},
shouldError: false,
},
{
description: "three kpl-aggregated zlib record",
input: createKPLAggregate([][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
true,
),
output: [][]byte{
[]byte("hello, "),
[]byte("world"),
[]byte("!"),
},
shouldError: false,
},
}
var g bytes.Buffer
gbuf := gzip.NewWriter(&g)
cwLogBatch := LogEventBatch{
MessageType: "test",
Owner: "test",
LogGroup: "test",
LogStream: "test",
SubscriptionFilters: []string{""},
LogEvents: []LogEvent{{
ID: "test",
Timestamp: UnixTimestampMillis(time.Date(2020, time.September, 9, 9, 10, 10, 0, time.UTC)),
Message: "test",
}},
}
cwLogBatchJSON, _ := json.Marshal(cwLogBatch)
gbuf.Write(cwLogBatchJSON)
gbuf.Close()
gzipBatchInput := g.Bytes()
tests = append(tests, test{
description: "cloudwatch log batch",
input: gzipBatchInput,
output: [][]byte{[]byte("2020-09-09T09:10:10.000001+00:00 test test--test/arn%3Aaws%3Aecs%3Aus-east-1%3A999988887777%3Atask%2F12345678-1234-1234-1234-555566667777: test")},
shouldError: false,
})
for _, tt := range tests {
t.Run(tt.description, func(t *testing.T) {
out, err := Deaggregate(tt.input)
if tt.shouldError {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.Equal(t, out, tt.output)
})
}
}