Refactor to fix dead locks and race conditions.

This commit is contained in:
Xavi Ramirez 2017-08-03 21:22:52 +00:00
parent 29f68f77eb
commit ba951ff0da
4 changed files with 211 additions and 163 deletions

View file

@ -32,7 +32,7 @@ type batchedWriter struct {
checkpointMsg chan kcl.SequencePair checkpointMsg chan kcl.SequencePair
checkpointShutdown chan struct{} checkpointShutdown chan struct{}
checkpointTag chan string checkpointTag chan string
lastProcessedPair chan kcl.SequencePair lastIgnoredPair chan kcl.SequencePair
batchMsg chan tagMsgPair batchMsg chan tagMsgPair
shutdown chan struct{} shutdown chan struct{}
@ -58,11 +58,11 @@ func (b *batchedWriter) Initialize(shardID string, checkpointer kcl.Checkpointer
b.checkpointShutdown = make(chan struct{}) b.checkpointShutdown = make(chan struct{})
b.startCheckpointListener(checkpointer, b.checkpointMsg, b.checkpointShutdown) b.startCheckpointListener(checkpointer, b.checkpointMsg, b.checkpointShutdown)
b.checkpointTag = make(chan string) b.checkpointTag = make(chan string, 100) // Buffered to workaround
b.batchMsg = make(chan tagMsgPair) b.batchMsg = make(chan tagMsgPair)
b.shutdown = make(chan struct{}) b.shutdown = make(chan struct{})
b.lastProcessedPair = make(chan kcl.SequencePair) b.lastIgnoredPair = make(chan kcl.SequencePair)
b.startMessageHandler(b.batchMsg, b.checkpointTag, b.lastProcessedPair, b.shutdown) b.startMessageHandler(b.batchMsg, b.checkpointTag, b.lastIgnoredPair, b.shutdown)
return nil return nil
} }
@ -128,32 +128,48 @@ func (b *batchedWriter) createBatcher(tag string) batcher.Batcher {
// startMessageDistributer starts a go-routine that routes messages to batches. It's in uses a // startMessageDistributer starts a go-routine that routes messages to batches. It's in uses a
// go routine to avoid racey conditions. // go routine to avoid racey conditions.
func (b *batchedWriter) startMessageHandler( func (b *batchedWriter) startMessageHandler(
batchMsg <-chan tagMsgPair, checkpointTag <-chan string, lastPair <-chan kcl.SequencePair, batchMsg <-chan tagMsgPair, checkpointTag <-chan string, lastIgnored <-chan kcl.SequencePair,
shutdown <-chan struct{}, shutdown <-chan struct{},
) { ) {
go func() { getBatcher := make(chan string)
var lastProcessedPair kcl.SequencePair rtnBatcher := make(chan batcher.Batcher)
batchers := map[string]batcher.Batcher{} shutdownAdder := make(chan struct{})
areBatchersEmpty := true
go func() {
for { for {
select { select {
case tmp := <-batchMsg: case tmp := <-batchMsg:
batcher, ok := batchers[tmp.tag] getBatcher <- tmp.tag
if !ok { batcher := <-rtnBatcher
batcher = b.createBatcher(tmp.tag)
batchers[tmp.tag] = batcher
}
err := batcher.AddMessage(tmp.msg, tmp.pair) err := batcher.AddMessage(tmp.msg, tmp.pair)
if err != nil { if err != nil {
b.log.ErrorD("add-message", kv.M{ b.log.ErrorD("add-message", kv.M{
"err": err.Error(), "msg": string(tmp.msg), "tag": tmp.tag, "err": err.Error(), "msg": string(tmp.msg), "tag": tmp.tag,
}) })
} }
case <-shutdownAdder:
}
}
}()
go func() {
var lastIgnoredPair kcl.SequencePair
batchers := map[string]batcher.Batcher{}
areBatchersEmpty := true
for {
select {
case tag := <-getBatcher:
batcher, ok := batchers[tag]
if !ok {
batcher = b.createBatcher(tag)
batchers[tag] = batcher
}
areBatchersEmpty = false areBatchersEmpty = false
rtnBatcher <- batcher
case tag := <-checkpointTag: case tag := <-checkpointTag:
smallest := lastProcessedPair smallest := lastIgnoredPair
isAllEmpty := true isAllEmpty := true
for name, batch := range batchers { for name, batch := range batchers {
@ -166,7 +182,8 @@ func (b *batchedWriter) startMessageHandler(
continue continue
} }
if pair.IsLessThan(smallest) { // Check for empty because it's possible that no messages have been ignored
if smallest.IsEmpty() || pair.IsLessThan(smallest) {
smallest = pair smallest = pair
} }
@ -177,17 +194,18 @@ func (b *batchedWriter) startMessageHandler(
b.checkpointMsg <- smallest b.checkpointMsg <- smallest
} }
areBatchersEmpty = isAllEmpty areBatchersEmpty = isAllEmpty
case pair := <-lastPair: case pair := <-lastIgnored:
if areBatchersEmpty { if areBatchersEmpty && !pair.IsEmpty() {
b.checkpointMsg <- pair b.checkpointMsg <- pair
} }
lastProcessedPair = pair lastIgnoredPair = pair
case <-shutdown: case <-shutdown:
for _, batch := range batchers { for _, batch := range batchers {
batch.Flush() batch.Flush()
} }
b.checkpointMsg <- lastProcessedPair b.checkpointMsg <- b.lastProcessedSeq
b.checkpointShutdown <- struct{}{} b.checkpointShutdown <- struct{}{}
areBatchersEmpty = true areBatchersEmpty = true
} }
} }
@ -234,6 +252,7 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
if err != nil { if err != nil {
return err return err
} }
wasPairIgnored := true
for _, rawmsg := range messages { for _, rawmsg := range messages {
msg, tags, err := b.sender.ProcessMessage(rawmsg) msg, tags, err := b.sender.ProcessMessage(rawmsg)
@ -260,11 +279,14 @@ func (b *batchedWriter) ProcessRecords(records []kcl.Record) error {
// sequence number amount all the batch (let's call it A). We then checkpoint at // sequence number amount all the batch (let's call it A). We then checkpoint at
// the A-1 sequence number. // the A-1 sequence number.
b.batchMsg <- tagMsgPair{tag, msg, prevPair} b.batchMsg <- tagMsgPair{tag, msg, prevPair}
wasPairIgnored = false
} }
} }
prevPair = pair prevPair = pair
b.lastProcessedPair <- pair if wasPairIgnored {
b.lastIgnoredPair <- pair
}
} }
b.lastProcessedSeq = pair b.lastProcessedSeq = pair

View file

@ -86,30 +86,28 @@ type mockCheckpointer struct {
shutdown chan struct{} shutdown chan struct{}
} }
func NewMockCheckpointer(maxSeq string, timeout time.Duration) *mockCheckpointer { func NewMockCheckpointer(timeout time.Duration) *mockCheckpointer {
mcp := &mockCheckpointer{ mcp := &mockCheckpointer{
checkpoint: make(chan string), checkpoint: make(chan string),
done: make(chan struct{}, 1), done: make(chan struct{}, 1),
timeout: make(chan struct{}, 1), timeout: make(chan struct{}, 1),
shutdown: make(chan struct{}), shutdown: make(chan struct{}),
} }
mcp.startWaiter(maxSeq, timeout) mcp.startWaiter(timeout)
return mcp return mcp
} }
func (m *mockCheckpointer) startWaiter(maxSeq string, timeout time.Duration) { func (m *mockCheckpointer) startWaiter(timeout time.Duration) {
go func() { go func() {
for { for {
select { select {
case seq := <-m.checkpoint: case seq := <-m.checkpoint:
m.recievedSequences = append(m.recievedSequences, seq) m.recievedSequences = append(m.recievedSequences, seq)
if seq == maxSeq {
m.done <- struct{}{}
}
case <-time.NewTimer(timeout).C: case <-time.NewTimer(timeout).C:
m.timeout <- struct{}{} m.timeout <- struct{}{}
case <-m.shutdown: case <-m.shutdown:
m.done <- struct{}{}
return return
} }
} }
@ -126,15 +124,10 @@ func (m *mockCheckpointer) wait() error {
func (m *mockCheckpointer) Shutdown() { func (m *mockCheckpointer) Shutdown() {
m.shutdown <- struct{}{} m.shutdown <- struct{}{}
} }
func (m *mockCheckpointer) Checkpoint(sequenceNumber *string, subSequenceNumber *int) error { func (m *mockCheckpointer) Checkpoint(pair kcl.SequencePair, retry int) error {
m.checkpoint <- *sequenceNumber m.checkpoint <- pair.Sequence.String()
return nil return nil
} }
func (m *mockCheckpointer) CheckpointWithRetry(
sequenceNumber *string, subSequenceNumber *int, retryCount int,
) error {
return m.Checkpoint(sequenceNumber, subSequenceNumber)
}
func encode(str string) string { func encode(str string) string {
return base64.StdEncoding.EncodeToString([]byte(str)) return base64.StdEncoding.EncodeToString([]byte(str))
@ -148,7 +141,7 @@ func TestProcessRecordsIgnoredMessages(t *testing.T) {
BatchInterval: 10 * time.Millisecond, BatchInterval: 10 * time.Millisecond,
CheckpointFreq: 20 * time.Millisecond, CheckpointFreq: 20 * time.Millisecond,
}) })
mockcheckpointer := NewMockCheckpointer("4", 5*time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
wrt := NewBatchedWriter(mockconfig, ignoringSender{}, mocklog) wrt := NewBatchedWriter(mockconfig, ignoringSender{}, mocklog)
wrt.Initialize("test-shard", mockcheckpointer) wrt.Initialize("test-shard", mockcheckpointer)
@ -161,8 +154,13 @@ func TestProcessRecordsIgnoredMessages(t *testing.T) {
}) })
assert.NoError(err) assert.NoError(err)
err = wrt.Shutdown("TERMINATE")
assert.NoError(err)
err = mockcheckpointer.wait() err = mockcheckpointer.wait()
assert.NoError(err) assert.NoError(err)
assert.Contains(mockcheckpointer.recievedSequences, "4")
} }
func TestProcessRecordsMutliBatchBasic(t *testing.T) { func TestProcessRecordsMutliBatchBasic(t *testing.T) {
@ -173,7 +171,7 @@ func TestProcessRecordsMutliBatchBasic(t *testing.T) {
BatchInterval: 100 * time.Millisecond, BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Millisecond, CheckpointFreq: 200 * time.Millisecond,
}) })
mockcheckpointer := NewMockCheckpointer("8", 5*time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
@ -233,7 +231,7 @@ func TestProcessRecordsMutliBatchWithIgnores(t *testing.T) {
BatchInterval: 100 * time.Millisecond, BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Millisecond, CheckpointFreq: 200 * time.Millisecond,
}) })
mockcheckpointer := NewMockCheckpointer("26", 5*time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
@ -312,7 +310,7 @@ func TestStaggeredCheckpionting(t *testing.T) {
BatchInterval: 100 * time.Millisecond, BatchInterval: 100 * time.Millisecond,
CheckpointFreq: 200 * time.Nanosecond, CheckpointFreq: 200 * time.Nanosecond,
}) })
mockcheckpointer := NewMockCheckpointer("9", 5*time.Second) mockcheckpointer := NewMockCheckpointer(5 * time.Second)
mocksender := NewMsgAsTagSender() mocksender := NewMsgAsTagSender()
wrt := NewBatchedWriter(mockconfig, mocksender, mocklog) wrt := NewBatchedWriter(mockconfig, mocksender, mocklog)
@ -352,6 +350,7 @@ func TestStaggeredCheckpionting(t *testing.T) {
assert.NotContains(mockcheckpointer.recievedSequences, "6") assert.NotContains(mockcheckpointer.recievedSequences, "6")
assert.NotContains(mockcheckpointer.recievedSequences, "7") assert.NotContains(mockcheckpointer.recievedSequences, "7")
assert.NotContains(mockcheckpointer.recievedSequences, "8") assert.NotContains(mockcheckpointer.recievedSequences, "8")
assert.Contains(mockcheckpointer.recievedSequences, "9")
assert.Contains(mocksender.batches, "tag1") assert.Contains(mocksender.batches, "tag1")
assert.Equal(2, len(mocksender.batches["tag1"])) // One batch assert.Equal(2, len(mocksender.batches["tag1"])) // One batch
@ -365,8 +364,10 @@ func TestStaggeredCheckpionting(t *testing.T) {
assert.Equal(2, len(mocksender.batches["tag3"][0])) // with three items assert.Equal(2, len(mocksender.batches["tag3"][0])) // with three items
assert.Equal("tag3", string(mocksender.batches["tag3"][0][0])) assert.Equal("tag3", string(mocksender.batches["tag3"][0][0]))
assert.Equal("tag3", string(mocksender.batches["tag3"][0][1])) assert.Equal("tag3", string(mocksender.batches["tag3"][0][1]))
assert.Equal(2, len(mocksender.batches["tag3"][1]))
assert.Equal("tag3", string(mocksender.batches["tag3"][1][0])) assert.Equal("tag3", string(mocksender.batches["tag3"][1][0]))
assert.Equal("tag3", string(mocksender.batches["tag3"][1][1])) assert.Equal("tag3", string(mocksender.batches["tag3"][1][1]))
assert.Equal(2, len(mocksender.batches["tag3"][2]))
assert.Equal("tag3", string(mocksender.batches["tag3"][2][0])) assert.Equal("tag3", string(mocksender.batches["tag3"][2][0]))
assert.Equal("tag3", string(mocksender.batches["tag3"][2][1])) assert.Equal("tag3", string(mocksender.batches["tag3"][2][1]))
} }

View file

@ -13,8 +13,7 @@ type sampleRecordProcessor struct {
checkpointer kcl.Checkpointer checkpointer kcl.Checkpointer
checkpointRetries int checkpointRetries int
checkpointFreq time.Duration checkpointFreq time.Duration
largestSeq *big.Int largestPair kcl.SequencePair
largestSubSeq int
lastCheckpoint time.Time lastCheckpoint time.Time
} }
@ -31,9 +30,8 @@ func (srp *sampleRecordProcessor) Initialize(shardID string, checkpointer kcl.Ch
return nil return nil
} }
func (srp *sampleRecordProcessor) shouldUpdateSequence(sequenceNumber *big.Int, subSequenceNumber int) bool { func (srp *sampleRecordProcessor) shouldUpdateSequence(pair kcl.SequencePair) bool {
return srp.largestSeq == nil || sequenceNumber.Cmp(srp.largestSeq) == 1 || return srp.largestPair.IsLessThan(pair)
(sequenceNumber.Cmp(srp.largestSeq) == 0 && subSequenceNumber > srp.largestSubSeq)
} }
func (srp *sampleRecordProcessor) ProcessRecords(records []kcl.Record) error { func (srp *sampleRecordProcessor) ProcessRecords(records []kcl.Record) error {
@ -43,14 +41,13 @@ func (srp *sampleRecordProcessor) ProcessRecords(records []kcl.Record) error {
fmt.Fprintf(os.Stderr, "could not parse sequence number '%s'\n", record.SequenceNumber) fmt.Fprintf(os.Stderr, "could not parse sequence number '%s'\n", record.SequenceNumber)
continue continue
} }
if srp.shouldUpdateSequence(seqNumber, record.SubSequenceNumber) { pair := kcl.SequencePair{seqNumber, record.SubSequenceNumber}
srp.largestSeq = seqNumber if srp.shouldUpdateSequence(pair) {
srp.largestSubSeq = record.SubSequenceNumber srp.largestPair = pair
} }
} }
if time.Now().Sub(srp.lastCheckpoint) > srp.checkpointFreq { if time.Now().Sub(srp.lastCheckpoint) > srp.checkpointFreq {
largestSeq := srp.largestSeq.String() srp.checkpointer.Checkpoint(srp.largestPair, srp.checkpointRetries)
srp.checkpointer.CheckpointWithRetry(&largestSeq, &srp.largestSubSeq, srp.checkpointRetries)
srp.lastCheckpoint = time.Now() srp.lastCheckpoint = time.Now()
} }
return nil return nil

View file

@ -2,12 +2,10 @@ package kcl
import ( import (
"bufio" "bufio"
"bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"os" "os"
"sync"
"time" "time"
) )
@ -18,8 +16,7 @@ type RecordProcessor interface {
} }
type Checkpointer interface { type Checkpointer interface {
Checkpoint(sequenceNumber *string, subSequenceNumber *int) error Checkpoint(pair SequencePair, retryCount int) error
CheckpointWithRetry(sequenceNumber *string, subSequenceNumber *int, retryCount int) error
Shutdown() Shutdown()
} }
@ -31,93 +28,6 @@ func (ce CheckpointError) Error() string {
return ce.e return ce.e
} }
type checkpointer struct {
mux sync.Mutex
ioHandler ioHandler
}
func (c *checkpointer) getAction() (interface{}, error) {
line, err := c.ioHandler.readLine()
if err != nil {
return nil, err
}
action, err := c.ioHandler.loadAction(line.String())
if err != nil {
return nil, err
}
return action, nil
}
func (c *checkpointer) Checkpoint(sequenceNumber *string, subSequenceNumber *int) error {
c.mux.Lock()
defer c.mux.Unlock()
c.ioHandler.writeAction(ActionCheckpoint{
Action: "checkpoint",
SequenceNumber: sequenceNumber,
SubSequenceNumber: subSequenceNumber,
})
line, err := c.ioHandler.readLine()
if err != nil {
return err
}
actionI, err := c.ioHandler.loadAction(line.String())
if err != nil {
return err
}
action, ok := actionI.(ActionCheckpoint)
if !ok {
return fmt.Errorf("expected checkpoint response, got '%s'", line.String())
}
if action.Error != nil && *action.Error != "" {
return CheckpointError{
e: *action.Error,
}
}
return nil
}
// CheckpointWithRetry tries to save a checkPoint up to `retryCount` + 1 times.
// `retryCount` should be >= 0
func (c *checkpointer) CheckpointWithRetry(
sequenceNumber *string, subSequenceNumber *int, retryCount int,
) error {
sleepDuration := 5 * time.Second
for n := 0; n <= retryCount; n++ {
err := c.Checkpoint(sequenceNumber, subSequenceNumber)
if err == nil {
return nil
}
if cperr, ok := err.(CheckpointError); ok {
switch cperr.Error() {
case "ShutdownException":
return fmt.Errorf("Encountered shutdown exception, skipping checkpoint")
case "ThrottlingException":
fmt.Fprintf(os.Stderr, "Was throttled while checkpointing, will attempt again in %s\n", sleepDuration)
case "InvalidStateException":
fmt.Fprintf(os.Stderr, "MultiLangDaemon reported an invalid state while checkpointing\n")
default:
fmt.Fprintf(os.Stderr, "Encountered an error while checkpointing: %s", err)
}
}
if n == retryCount {
return fmt.Errorf("Failed to checkpoint after %d attempts, giving up.", retryCount)
}
time.Sleep(sleepDuration)
}
return nil
}
func (c *checkpointer) Shutdown() {
c.CheckpointWithRetry(nil, nil, 5)
}
type ioHandler struct { type ioHandler struct {
inputFile io.Reader inputFile io.Reader
outputFile io.Writer outputFile io.Writer
@ -134,13 +44,13 @@ func (i ioHandler) writeError(message string) {
fmt.Fprintf(i.errorFile, "%s\n", message) fmt.Fprintf(i.errorFile, "%s\n", message)
} }
func (i ioHandler) readLine() (*bytes.Buffer, error) { func (i ioHandler) readLine() (string, error) {
bio := bufio.NewReader(i.inputFile) bio := bufio.NewReader(i.inputFile)
line, err := bio.ReadString('\n') line, err := bio.ReadString('\n')
if err != nil { if err != nil {
return nil, err return "", err
} }
return bytes.NewBufferString(line), nil return line, nil
} }
type ActionInitialize struct { type ActionInitialize struct {
@ -197,6 +107,8 @@ func (i ioHandler) loadAction(line string) (interface{}, error) {
return nil, err return nil, err
} }
return actionProcessRecords, nil return actionProcessRecords, nil
case "shutdownRequested":
fallthrough
case "shutdown": case "shutdown":
var actionShutdown ActionShutdown var actionShutdown ActionShutdown
if err := json.Unmarshal(lineBytes, &actionShutdown); err != nil { if err := json.Unmarshal(lineBytes, &actionShutdown); err != nil {
@ -223,25 +135,37 @@ func (i ioHandler) writeAction(action interface{}) error {
return nil return nil
} }
func New(inputFile io.Reader, outputFile, errorFile io.Writer, recordProcessor RecordProcessor) *KCLProcess { func New(
inputFile io.Reader, outputFile, errorFile io.Writer, recordProcessor RecordProcessor,
) *KCLProcess {
i := ioHandler{ i := ioHandler{
inputFile: inputFile, inputFile: inputFile,
outputFile: outputFile, outputFile: outputFile,
errorFile: errorFile, errorFile: errorFile,
} }
return &KCLProcess{ return &KCLProcess{
ioHandler: i, ioHandler: i,
checkpointer: &checkpointer{
ioHandler: i,
},
recordProcessor: recordProcessor, recordProcessor: recordProcessor,
next: make(chan struct{}),
out: make(chan string),
outErr: make(chan error),
checkpoint: make(chan SequencePair),
checkpointErr: make(chan error),
} }
} }
type KCLProcess struct { type KCLProcess struct {
ioHandler ioHandler ioHandler ioHandler
checkpointer Checkpointer
recordProcessor RecordProcessor recordProcessor RecordProcessor
next chan struct{}
out chan string
outErr chan error
checkpoint chan SequencePair
checkpointErr chan error
} }
func (kclp *KCLProcess) reportDone(responseFor string) error { func (kclp *KCLProcess) reportDone(responseFor string) error {
@ -257,13 +181,13 @@ func (kclp *KCLProcess) reportDone(responseFor string) error {
func (kclp *KCLProcess) performAction(a interface{}) (string, error) { func (kclp *KCLProcess) performAction(a interface{}) (string, error) {
switch action := a.(type) { switch action := a.(type) {
case ActionInitialize: case ActionInitialize:
return action.Action, kclp.recordProcessor.Initialize(action.ShardID, kclp.checkpointer) return action.Action, kclp.recordProcessor.Initialize(action.ShardID, kclp)
case ActionProcessRecords: case ActionProcessRecords:
return action.Action, kclp.recordProcessor.ProcessRecords(action.Records) return action.Action, kclp.recordProcessor.ProcessRecords(action.Records)
case ActionShutdown: case ActionShutdown:
return action.Action, kclp.recordProcessor.Shutdown(action.Reason) return action.Action, kclp.recordProcessor.Shutdown(action.Reason)
default: default:
return "", fmt.Errorf("unknown action to dispatch: %s", action) return "", fmt.Errorf("unknown action to dispatch: %+#v", action)
} }
} }
@ -280,20 +204,124 @@ func (kclp *KCLProcess) handleLine(line string) error {
return kclp.reportDone(responseFor) return kclp.reportDone(responseFor)
} }
func (kclp *KCLProcess) Run() { func (kclp *KCLProcess) Checkpoint(pair SequencePair, retryCount int) error {
for { sleepDuration := 5 * time.Second
line, err := kclp.ioHandler.readLine()
if err != nil { for n := 0; n <= retryCount; n++ {
kclp.ioHandler.writeError("Read line error: " + err.Error()) kclp.checkpoint <- pair
return err := <-kclp.checkpointErr
} else if line == nil { if err == nil {
kclp.ioHandler.writeError("Empty read line recieved") return nil
return
} }
err = kclp.handleLine(line.String()) if cperr, ok := err.(CheckpointError); ok {
if err != nil { switch cperr.Error() {
kclp.ioHandler.writeError("Handle line error: " + err.Error()) case "ShutdownException":
return fmt.Errorf("Encountered shutdown exception, skipping checkpoint")
case "ThrottlingException":
fmt.Fprintf(os.Stderr, "Checkpointing throttling, pause for %s\n", sleepDuration)
case "InvalidStateException":
fmt.Fprintf(os.Stderr, "MultiLangDaemon invalid state while checkpointing\n")
default:
fmt.Fprintf(os.Stderr, "Encountered an error while checkpointing: %s", err)
}
}
if n == retryCount {
return fmt.Errorf("Failed to checkpoint after %d attempts, giving up.", retryCount)
}
time.Sleep(sleepDuration)
}
return nil
}
func (kclp *KCLProcess) Shutdown() {
kclp.Checkpoint(SequencePair{}, 5)
}
func (kclp *KCLProcess) processCheckpoint(pair SequencePair) error {
var seq *string
var subSeq *int
if !pair.IsEmpty() { // an empty pair is a signal to shutdown
tmp := pair.Sequence.String()
seq = &tmp
subSeq = &pair.SubSequence
}
kclp.ioHandler.writeAction(ActionCheckpoint{
Action: "checkpoint",
SequenceNumber: seq,
SubSequenceNumber: subSeq,
})
line, err := kclp.ioHandler.readLine()
if err != nil {
return err
}
actionI, err := kclp.ioHandler.loadAction(line)
if err != nil {
return err
}
action, ok := actionI.(ActionCheckpoint)
if !ok {
return fmt.Errorf("expected checkpoint response, got '%s'", line)
}
if action.Error != nil && *action.Error != "" {
return CheckpointError{e: *action.Error}
}
return nil
}
func (kclp *KCLProcess) startLineProcessor(
next chan struct{}, out chan string, outErr chan error,
checkpoint chan SequencePair, checkpointErr chan error,
) {
go func() {
for {
select {
case <-next:
line, err := kclp.ioHandler.readLine()
if err != nil {
outErr <- err
} else {
out <- line
}
case pair := <-checkpoint:
err := kclp.processCheckpoint(pair)
checkpointErr <- err
}
}
}()
}
func (kclp *KCLProcess) processNextLine() error {
kclp.next <- struct{}{} // We're ready for a new line
var err error
var line string
select {
case err = <-kclp.outErr:
case line = <-kclp.out:
if line == "" {
err = fmt.Errorf("Empty read line recieved")
} else {
err = kclp.handleLine(line)
}
}
return err
}
func (kclp *KCLProcess) Run() {
kclp.startLineProcessor(kclp.next, kclp.out, kclp.outErr, kclp.checkpoint, kclp.checkpointErr)
for {
err := kclp.processNextLine()
if err == io.EOF {
kclp.ioHandler.writeError("IO stream closed")
return
} else if err != nil {
kclp.ioHandler.writeError(fmt.Sprintf("ERR Handle line: %+#v", err))
return return
} }
} }