Skip to content

Commit a5262e1

Browse files
authored
add debug log an fix flaky test (#4290)
- add debug logs on Manager.update for completeness - Refactor and rename TestManager_OutputChange - use eventually instead of time.Sleep - rename to better reflect what it tests - Remove TestManager_StartStop as TestManager_StartStopComponent already covers it
1 parent 00550ca commit a5262e1

File tree

2 files changed

+96
-262
lines changed

2 files changed

+96
-262
lines changed

pkg/component/runtime/manager.go

+30-22
Original file line numberDiff line numberDiff line change
@@ -772,24 +772,27 @@ func (m *Manager) update(model component.Model, teardown bool) error {
772772
stop = append(stop, existing)
773773
}
774774
m.currentMx.RUnlock()
775-
if len(stop) > 0 {
776-
var stoppedWg sync.WaitGroup
777-
stoppedWg.Add(len(stop))
778-
for _, existing := range stop {
779-
m.logger.Debugf("Stopping component %q", existing.id)
780-
_ = existing.stop(teardown, model.Signed)
781-
// stop is async, wait for operation to finish,
782-
// otherwise new instance may be started and components
783-
// may fight for resources (e.g ports, files, locks)
784-
go func(state *componentRuntimeState) {
785-
m.waitForStopped(state)
786-
stoppedWg.Done()
787-
}(existing)
788-
}
789-
stoppedWg.Wait()
775+
776+
var stoppedWg sync.WaitGroup
777+
stoppedWg.Add(len(stop))
778+
for _, existing := range stop {
779+
m.logger.Debugf("Stopping component %q", existing.id)
780+
_ = existing.stop(teardown, model.Signed)
781+
// stop is async, wait for operation to finish,
782+
// otherwise new instance may be started and components
783+
// may fight for resources (e.g. ports, files, locks)
784+
go func(state *componentRuntimeState) {
785+
err := m.waitForStopped(state)
786+
if err != nil {
787+
m.logger.Errorf("updating components: failed waiting %s stop",
788+
state.id)
789+
}
790+
stoppedWg.Done()
791+
}(existing)
790792
}
793+
stoppedWg.Wait()
791794

792-
// start all not started
795+
// start new components
793796
for _, comp := range newComponents {
794797
// new component; create its runtime
795798
logger := m.baseLogger.Named(fmt.Sprintf("component.runtime.%s", comp.ID))
@@ -800,6 +803,7 @@ func (m *Manager) update(model component.Model, teardown bool) error {
800803
m.currentMx.Lock()
801804
m.current[comp.ID] = state
802805
m.currentMx.Unlock()
806+
m.logger.Debugf("Starting component %q", comp.ID)
803807
if err = state.start(); err != nil {
804808
return fmt.Errorf("failed to start component %s: %w", comp.ID, err)
805809
}
@@ -808,10 +812,11 @@ func (m *Manager) update(model component.Model, teardown bool) error {
808812
return nil
809813
}
810814

811-
func (m *Manager) waitForStopped(comp *componentRuntimeState) {
815+
func (m *Manager) waitForStopped(comp *componentRuntimeState) error {
812816
if comp == nil {
813-
return
817+
return nil
814818
}
819+
815820
currComp := comp.getCurrent()
816821
compID := currComp.ID
817822
timeout := defaultStopTimeout
@@ -828,20 +833,23 @@ func (m *Manager) waitForStopped(comp *componentRuntimeState) {
828833
latestState := comp.getLatest()
829834
if latestState.State == client.UnitStateStopped {
830835
m.logger.Debugf("component %q stopped.", compID)
831-
return
836+
return nil
832837
}
833838

839+
// it might happen the component stop signal isn't received but the
840+
// manager detects it stopped running. Then the manager removes it from
841+
// its list of current components. Therefore, we also need to check if
842+
// the component was removed, if it was, we consider it stopped.
834843
m.currentMx.RLock()
835844
if _, exists := m.current[compID]; !exists {
836845
m.currentMx.RUnlock()
837-
return
846+
return nil
838847
}
839848
m.currentMx.RUnlock()
840849

841850
select {
842851
case <-timeoutCh:
843-
m.logger.Errorf("timeout exceeded waiting for component %q to stop", compID)
844-
return
852+
return fmt.Errorf("timeout exceeded after %s", timeout)
845853
case <-time.After(stopCheckRetryPeriod):
846854
}
847855
}

0 commit comments

Comments
 (0)