Added a more coherent shutdown pathway

This commit is contained in:
Xavi Ramirez 2017-08-08 19:09:31 +00:00
parent 1be812a887
commit b7743c9ea7
4 changed files with 45 additions and 32 deletions

View file

@ -28,7 +28,7 @@ type batcherManager struct {
batchMsg chan tagMsgPair batchMsg chan tagMsgPair
lastIgnored chan kcl.SequencePair lastIgnored chan kcl.SequencePair
lastProcessed chan kcl.SequencePair lastProcessed chan kcl.SequencePair
shutdown chan struct{} shutdown chan chan<- struct{}
} }
func NewBatcherManager( func NewBatcherManager(
@ -46,7 +46,7 @@ func NewBatcherManager(
batchMsg: make(chan tagMsgPair), batchMsg: make(chan tagMsgPair),
lastIgnored: make(chan kcl.SequencePair), lastIgnored: make(chan kcl.SequencePair),
lastProcessed: make(chan kcl.SequencePair), lastProcessed: make(chan kcl.SequencePair),
shutdown: make(chan struct{}), shutdown: make(chan chan<- struct{}),
} }
bm.startMessageHandler(bm.batchMsg, bm.lastIgnored, bm.lastProcessed, bm.shutdown) bm.startMessageHandler(bm.batchMsg, bm.lastIgnored, bm.lastProcessed, bm.shutdown)
@ -66,8 +66,11 @@ func (b *batcherManager) LatestProcessed(pair kcl.SequencePair) {
b.lastProcessed <- pair b.lastProcessed <- pair
} }
func (b *batcherManager) Shutdown() { func (b *batcherManager) Shutdown() <-chan struct{} {
b.shutdown <- struct{}{} done := make(chan struct{})
b.shutdown <- done
return done
} }
func (b *batcherManager) createBatcher() *batcher { func (b *batcherManager) createBatcher() *batcher {
@ -132,7 +135,7 @@ func (b *batcherManager) sendCheckpoint(
// go routine to avoid racey conditions. // go routine to avoid racey conditions.
func (b *batcherManager) startMessageHandler( func (b *batcherManager) startMessageHandler(
batchMsg <-chan tagMsgPair, lastIgnored, lastProcessed <-chan kcl.SequencePair, batchMsg <-chan tagMsgPair, lastIgnored, lastProcessed <-chan kcl.SequencePair,
shutdown <-chan struct{}, shutdown <-chan chan<- struct{},
) { ) {
go func() { go func() {
var lastProcessedPair kcl.SequencePair var lastProcessedPair kcl.SequencePair
@ -189,12 +192,16 @@ func (b *batcherManager) startMessageHandler(
} }
case pair := <-lastProcessed: case pair := <-lastProcessed:
lastProcessedPair = pair lastProcessedPair = pair
case <-shutdown: case done := <-shutdown:
for tag, batcher := range batchers { for tag, batcher := range batchers {
b.sendBatch(batcher, tag) b.sendBatch(batcher, tag)
} }
b.chkpntManager.Checkpoint(lastProcessedPair) b.chkpntManager.Checkpoint(lastProcessedPair)
b.chkpntManager.Shutdown() chkDone := b.chkpntManager.Shutdown()
<-chkDone
done <- struct{}{}
return
} }
} }
}() }()

View file

@ -15,7 +15,7 @@ type checkpointManager struct {
checkpointFreq time.Duration checkpointFreq time.Duration
checkpoint chan kcl.SequencePair checkpoint chan kcl.SequencePair
shutdown chan struct{} shutdown chan chan<- struct{}
} }
func NewCheckpointManager( func NewCheckpointManager(
@ -27,7 +27,7 @@ func NewCheckpointManager(
checkpointFreq: config.CheckpointFreq, checkpointFreq: config.CheckpointFreq,
checkpoint: make(chan kcl.SequencePair), checkpoint: make(chan kcl.SequencePair),
shutdown: make(chan struct{}), shutdown: make(chan chan<- struct{}),
} }
cm.startCheckpointHandler(checkpointer, cm.checkpoint, cm.shutdown) cm.startCheckpointHandler(checkpointer, cm.checkpoint, cm.shutdown)
@ -39,33 +39,35 @@ func (cm *checkpointManager) Checkpoint(pair kcl.SequencePair) {
cm.checkpoint <- pair cm.checkpoint <- pair
} }
func (cm *checkpointManager) Shutdown() { func (cm *checkpointManager) Shutdown() <-chan struct{} {
cm.shutdown <- struct{}{} done := make(chan struct{})
cm.shutdown <- done
return done
} }
func (cm *checkpointManager) startCheckpointHandler( func (cm *checkpointManager) startCheckpointHandler(
checkpointer kcl.Checkpointer, checkpoint <-chan kcl.SequencePair, shutdown <-chan struct{}, checkpointer kcl.Checkpointer, checkpoint <-chan kcl.SequencePair,
shutdown <-chan chan<- struct{},
) { ) {
go func() { go func() {
lastCheckpoint := time.Now() lastCheckpoint := time.Now()
for { for {
var doneShutdown chan<- struct{}
pair := kcl.SequencePair{} pair := kcl.SequencePair{}
isShuttingDown := false
select { select {
case pair = <-checkpoint: case pair = <-checkpoint:
case <-shutdown: case doneShutdown = <-shutdown:
isShuttingDown = true
} }
// This is a write throttle to ensure we don't checkpoint faster than cm.checkpointFreq. // This is a write throttle to ensure we don't checkpoint faster than cm.checkpointFreq.
// The latest pair number is always used. // The latest pair number is always used.
for !isShuttingDown && time.Now().Sub(lastCheckpoint) < cm.checkpointFreq { for doneShutdown == nil && time.Now().Sub(lastCheckpoint) < cm.checkpointFreq {
select { select {
case pair = <-checkpoint: // Keep updating checkpoint pair while waiting case pair = <-checkpoint: // Keep updating checkpoint pair while waiting
case <-shutdown: case doneShutdown = <-shutdown:
isShuttingDown = true
case <-time.NewTimer(cm.checkpointFreq - time.Now().Sub(lastCheckpoint)).C: case <-time.NewTimer(cm.checkpointFreq - time.Now().Sub(lastCheckpoint)).C:
} }
} }
@ -76,8 +78,9 @@ func (cm *checkpointManager) startCheckpointHandler(
stats.Counter("checkpoints-sent", 1) stats.Counter("checkpoints-sent", 1)
} }
if isShuttingDown { if doneShutdown != nil {
checkpointer.Shutdown() checkpointer.Shutdown()
doneShutdown <- struct{}{}
return return
} }
} }

View file

@ -143,7 +143,8 @@ func (b *batchedWriter) Shutdown(reason string) error {
b.log.ErrorD("shutdown-failover", kv.M{"shard-id": b.shardID, "reason": reason}) b.log.ErrorD("shutdown-failover", kv.M{"shard-id": b.shardID, "reason": reason})
} }
b.batcherManager.Shutdown() done := b.batcherManager.Shutdown()
<-done
return nil return nil
} }

View file

@ -13,6 +13,7 @@ import (
type RecordProcessor interface { type RecordProcessor interface {
Initialize(shardID string, checkpointer Checkpointer) error Initialize(shardID string, checkpointer Checkpointer) error
ProcessRecords(records []Record) error ProcessRecords(records []Record) error
// Shutdown this call should block until it's safe to shutdown the process
Shutdown(reason string) error Shutdown(reason string) error
} }
@ -138,7 +139,6 @@ func New(
ioHandler: i, ioHandler: i,
recordProcessor: recordProcessor, recordProcessor: recordProcessor,
isShuttingDown: false,
nextCheckpointPair: SequencePair{}, nextCheckpointPair: SequencePair{},
} }
} }
@ -149,7 +149,6 @@ type KCLProcess struct {
ioHandler ioHandler ioHandler ioHandler
recordProcessor RecordProcessor recordProcessor RecordProcessor
isShuttingDown bool
nextCheckpointPair SequencePair nextCheckpointPair SequencePair
} }
@ -163,7 +162,8 @@ func (kclp *KCLProcess) Checkpoint(pair SequencePair) {
} }
func (kclp *KCLProcess) Shutdown() { func (kclp *KCLProcess) Shutdown() {
kclp.isShuttingDown = true kclp.ioHandler.writeError("Checkpoint shutdown")
kclp.sendCheckpoint(nil, nil) // nil sequence num is signal to shutdown
} }
func (kclp *KCLProcess) handleCheckpointAction(action ActionCheckpoint) error { func (kclp *KCLProcess) handleCheckpointAction(action ActionCheckpoint) error {
@ -230,6 +230,17 @@ func (kclp *KCLProcess) handleLine(line string) error {
switch action := action.(type) { switch action := action.(type) {
case ActionCheckpoint: case ActionCheckpoint:
err = kclp.handleCheckpointAction(action) err = kclp.handleCheckpointAction(action)
case ActionShutdown:
kclp.ioHandler.writeError("Received shutdown action...")
// Shutdown should block until it's save to shutdown the process
err = kclp.recordProcessor.Shutdown(action.Reason)
if err != nil { // Log error and continue shutting down
kclp.ioHandler.writeError(fmt.Sprintf("ERR shutdown: %+#v", err))
}
kclp.ioHandler.writeError("Reporting shutdown done")
return kclp.reportDone("shutdown")
case ActionInitialize: case ActionInitialize:
err = kclp.recordProcessor.Initialize(action.ShardID, kclp) err = kclp.recordProcessor.Initialize(action.ShardID, kclp)
if err == nil { if err == nil {
@ -240,11 +251,6 @@ func (kclp *KCLProcess) handleLine(line string) error {
if err == nil { if err == nil {
err = kclp.reportDone(action.Action) err = kclp.reportDone(action.Action)
} }
case ActionShutdown:
err = kclp.recordProcessor.Shutdown(action.Reason)
if err == nil {
err = kclp.reportDone(action.Action)
}
default: default:
err = fmt.Errorf("unknown action to dispatch: %+#v", action) err = fmt.Errorf("unknown action to dispatch: %+#v", action)
} }
@ -285,9 +291,5 @@ func (kclp *KCLProcess) Run() {
} }
} }
kclp.ckpmux.Unlock() kclp.ckpmux.Unlock()
if kclp.isShuttingDown {
kclp.sendCheckpoint(nil, nil) // nil sequence num is signal to shutdown
}
} }
} }