@@ -772,24 +772,27 @@ func (m *Manager) update(model component.Model, teardown bool) error {
772
772
stop = append (stop , existing )
773
773
}
774
774
m .currentMx .RUnlock ()
775
- if len (stop ) > 0 {
776
- var stoppedWg sync.WaitGroup
777
- stoppedWg .Add (len (stop ))
778
- for _ , existing := range stop {
779
- m .logger .Debugf ("Stopping component %q" , existing .id )
780
- _ = existing .stop (teardown , model .Signed )
781
- // stop is async, wait for operation to finish,
782
- // otherwise new instance may be started and components
783
- // may fight for resources (e.g ports, files, locks)
784
- go func (state * componentRuntimeState ) {
785
- m .waitForStopped (state )
786
- stoppedWg .Done ()
787
- }(existing )
788
- }
789
- stoppedWg .Wait ()
775
+
776
+ var stoppedWg sync.WaitGroup
777
+ stoppedWg .Add (len (stop ))
778
+ for _ , existing := range stop {
779
+ m .logger .Debugf ("Stopping component %q" , existing .id )
780
+ _ = existing .stop (teardown , model .Signed )
781
+ // stop is async, wait for operation to finish,
782
+ // otherwise new instance may be started and components
783
+ // may fight for resources (e.g. ports, files, locks)
784
+ go func (state * componentRuntimeState ) {
785
+ err := m .waitForStopped (state )
786
+ if err != nil {
787
+ m .logger .Errorf ("updating components: failed waiting %s stop" ,
788
+ state .id )
789
+ }
790
+ stoppedWg .Done ()
791
+ }(existing )
790
792
}
793
+ stoppedWg .Wait ()
791
794
792
- // start all not started
795
+ // start new components
793
796
for _ , comp := range newComponents {
794
797
// new component; create its runtime
795
798
logger := m .baseLogger .Named (fmt .Sprintf ("component.runtime.%s" , comp .ID ))
@@ -800,6 +803,7 @@ func (m *Manager) update(model component.Model, teardown bool) error {
800
803
m .currentMx .Lock ()
801
804
m .current [comp .ID ] = state
802
805
m .currentMx .Unlock ()
806
+ m .logger .Debugf ("Starting component %q" , comp .ID )
803
807
if err = state .start (); err != nil {
804
808
return fmt .Errorf ("failed to start component %s: %w" , comp .ID , err )
805
809
}
@@ -808,10 +812,11 @@ func (m *Manager) update(model component.Model, teardown bool) error {
808
812
return nil
809
813
}
810
814
811
- func (m * Manager ) waitForStopped (comp * componentRuntimeState ) {
815
+ func (m * Manager ) waitForStopped (comp * componentRuntimeState ) error {
812
816
if comp == nil {
813
- return
817
+ return nil
814
818
}
819
+
815
820
currComp := comp .getCurrent ()
816
821
compID := currComp .ID
817
822
timeout := defaultStopTimeout
@@ -828,20 +833,23 @@ func (m *Manager) waitForStopped(comp *componentRuntimeState) {
828
833
latestState := comp .getLatest ()
829
834
if latestState .State == client .UnitStateStopped {
830
835
m .logger .Debugf ("component %q stopped." , compID )
831
- return
836
+ return nil
832
837
}
833
838
839
+ // it might happen the component stop signal isn't received but the
840
+ // manager detects it stopped running. Then the manager removes it from
841
+ // its list of current components. Therefore, we also need to check if
842
+ // the component was removed, if it was, we consider it stopped.
834
843
m .currentMx .RLock ()
835
844
if _ , exists := m .current [compID ]; ! exists {
836
845
m .currentMx .RUnlock ()
837
- return
846
+ return nil
838
847
}
839
848
m .currentMx .RUnlock ()
840
849
841
850
select {
842
851
case <- timeoutCh :
843
- m .logger .Errorf ("timeout exceeded waiting for component %q to stop" , compID )
844
- return
852
+ return fmt .Errorf ("timeout exceeded after %s" , timeout )
845
853
case <- time .After (stopCheckRetryPeriod ):
846
854
}
847
855
}
0 commit comments