refactor and improve the parameterized spin_some tests for executors (#2460)

wjwwood · web-flow · commit b50f9ab41840 · 2024-03-22T12:07:30.000-07:00
* refactor and improve the spin_some parameterized tests for executors

Signed-off-by: William Woodall &lt;william@osrfoundation.org&gt;

* disable spin_some_max_duration for the StaticSingleThreadedExecutor and EventsExecutor

Signed-off-by: William Woodall &lt;william@osrfoundation.org&gt;

* fixup and clarify the docstring for Executor::spin_some()

Signed-off-by: William Woodall &lt;william@osrfoundation.org&gt;

* style

Signed-off-by: William Woodall &lt;william@osrfoundation.org&gt;

* review comments

Signed-off-by: William Woodall &lt;william@osrfoundation.org&gt;

---------

Signed-off-by: William Woodall &lt;william@osrfoundation.org&gt;
diff --git a/rclcpp/include/rclcpp/executor.hpp b/rclcpp/include/rclcpp/executor.hpp
@@ -282,16 +282,23 @@ class Executor
   void
   spin_node_some(std::shared_ptr<rclcpp::Node> node);
 
-  /// Collect work once and execute all available work, optionally within a duration.
+  /// Collect work once and execute all available work, optionally within a max duration.
   /**
-   * This function can be overridden. The default implementation is suitable for a
-   * single-threaded model of execution.
-   * Adding subscriptions, timers, services, etc. with blocking callbacks will cause this function
-   * to block (which may have unintended consequences).
+   * This function can be overridden.
+   * The default implementation is suitable for a single-threaded model of execution.
+   * Adding subscriptions, timers, services, etc. with blocking or long running
+   * callbacks may cause the function exceed the max_duration significantly.
+   *
+   * If there is no work to be done when this called, it will return immediately
+   * because the collecting of available work is non-blocking.
+   * Before each piece of ready work is executed this function checks if the
+   * max_duration has been exceeded, and if it has it returns without starting
+   * the execution of the next piece of work.
+   *
+   * If a max_duration of 0 is given, then all of the collected work will be
+   * executed before the function returns.
    *
    * \param[in] max_duration The maximum amount of time to spend executing work, or 0 for no limit.
-   * Note that spin_some() may take longer than this time as it only returns once max_duration has
-   * been exceeded.
    */
   RCLCPP_PUBLIC
   virtual void
diff --git a/rclcpp/test/rclcpp/executors/test_executors.cpp b/rclcpp/test/rclcpp/executors/test_executors.cpp
@@ -374,7 +374,20 @@ class TestWaitable : public rclcpp::Waitable
   {
     (void) data;
     count_++;
-    std::this_thread::sleep_for(3ms);
+    if (nullptr != on_execute_callback_) {
+      on_execute_callback_();
+    } else {
+      // TODO(wjwwood): I don't know why this was here, but probably it should
+      //   not be there, or test cases where that is important should use the
+      //   on_execute_callback?
+      std::this_thread::sleep_for(3ms);
+    }
+  }
+
+  void
+  set_on_execute_callback(std::function<void()> on_execute_callback)
+  {
+    on_execute_callback_ = on_execute_callback;
   }
 
   void
@@ -404,6 +417,7 @@ class TestWaitable : public rclcpp::Waitable
 private:
   size_t count_ = 0;
   rclcpp::GuardCondition gc_;
+  std::function<void()> on_execute_callback_ = nullptr;
 };
 
 TYPED_TEST(TestExecutors, spinAll)
@@ -448,45 +462,180 @@ TYPED_TEST(TestExecutors, spinAll)
   spinner.join();
 }
 
-TYPED_TEST(TestExecutors, spinSome)
+// Helper function to convert chrono durations into a scalar that GoogleTest
+// can more easily compare and print.
+template<typename DurationT>
+auto
+to_nanoseconds_helper(DurationT duration)
+{
+  return std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
+}
+
+// The purpose of this test is to check that the ExecutorT.spin_some() method:
+//   - works nominally (it can execute entities)
+//   - it can execute multiple items at once
+//   - it does not wait for work to be available before returning
+TYPED_TEST(TestExecutors, spin_some)
 {
   using ExecutorType = TypeParam;
-  ExecutorType executor;
+
+  // Use an isolated callback group to avoid interference from any housekeeping
+  // items that may be in the default callback group of the node.
+  constexpr bool automatically_add_to_executor_with_node = false;
+  auto isolated_callback_group = this->node->create_callback_group(
+    rclcpp::CallbackGroupType::MutuallyExclusive,
+    automatically_add_to_executor_with_node);
+
+  // Check that spin_some() returns quickly when there is no work to be done.
+  // This can be a false positive if there is somehow some work for the executor
+  // to do that has not been considered, but the isolated callback group should
+  // avoid that.
+  {
+    ExecutorType executor;
+    executor.add_callback_group(isolated_callback_group, this->node->get_node_base_interface());
+
+    auto start = std::chrono::steady_clock::now();
+    // spin_some with some non-trival "max_duration" and check that it does not
+    // take anywhere near that long to execute.
+    constexpr auto max_duration = 10s;
+    executor.spin_some(max_duration);
+    EXPECT_LT(
+      to_nanoseconds_helper(std::chrono::steady_clock::now() - start),
+      to_nanoseconds_helper(max_duration / 2))
+      << "spin_some() took a long time to execute when it should have done "
+      << "nothing and should not have blocked either, but this could be a "
+      << "false negative if the computer is really slow";
+  }
+
+  // Check that having one thing ready gets executed by spin_some().
   auto waitable_interfaces = this->node->get_node_waitables_interface();
-  auto my_waitable = std::make_shared<TestWaitable>();
-  waitable_interfaces->add_waitable(my_waitable, nullptr);
-  executor.add_node(this->node);
+  auto my_waitable1 = std::make_shared<TestWaitable>();
+  waitable_interfaces->add_waitable(my_waitable1, isolated_callback_group);
+  {
+    ExecutorType executor;
+    executor.add_callback_group(isolated_callback_group, this->node->get_node_base_interface());
+
+    my_waitable1->trigger();
+
+    // The long duration should not matter, as executing the waitable is
+    // non-blocking, and spin_some() should exit after completing the available
+    // work.
+    auto start = std::chrono::steady_clock::now();
+    constexpr auto max_duration = 10s;
+    executor.spin_some(max_duration);
+    EXPECT_LT(
+      to_nanoseconds_helper(std::chrono::steady_clock::now() - start),
+      to_nanoseconds_helper(max_duration / 2))
+      << "spin_some() took a long time to execute when it should have very "
+      << "little to do and should not have blocked either, but this could be a "
+      << "false negative if the computer is really slow";
+
+    EXPECT_EQ(my_waitable1->get_count(), 1u)
+      << "spin_some() failed to execute a waitable that was triggered";
+  }
 
-  // Long timeout, doesn't block test from finishing because spin_some should exit after the
-  // first one completes.
-  bool spin_exited = false;
-  std::thread spinner([&spin_exited, &executor, this]() {
-      executor.spin_some(1s);
-      executor.remove_node(this->node, true);
-      spin_exited = true;
-    });
+  // Check that multiple things being ready are executed by spin_some().
+  auto my_waitable2 = std::make_shared<TestWaitable>();
+  waitable_interfaces->add_waitable(my_waitable2, isolated_callback_group);
+  {
+    ExecutorType executor;
+    executor.add_callback_group(isolated_callback_group, this->node->get_node_base_interface());
+
+    const size_t original_my_waitable1_count = my_waitable1->get_count();
+    my_waitable1->trigger();
+    my_waitable2->trigger();
+
+    // The long duration should not matter, as executing the waitable is
+    // non-blocking, and spin_some() should exit after completing the available
+    // work.
+    auto start = std::chrono::steady_clock::now();
+    constexpr auto max_duration = 10s;
+    executor.spin_some(max_duration);
+    EXPECT_LT(
+      to_nanoseconds_helper(std::chrono::steady_clock::now() - start),
+      to_nanoseconds_helper(max_duration / 2))
+      << "spin_some() took a long time to execute when it should have very "
+      << "little to do and should not have blocked either, but this could be a "
+      << "false negative if the computer is really slow";
+
+    EXPECT_EQ(my_waitable1->get_count(), original_my_waitable1_count + 1)
+      << "spin_some() failed to execute a waitable that was triggered";
+    EXPECT_EQ(my_waitable2->get_count(), 1u)
+      << "spin_some() failed to execute a waitable that was triggered";
+  }
+}
 
-  // Do some work until sufficient calls to the waitable occur, but keep going until either
-  // count becomes too large, spin exits, or the 1 second timeout completes.
-  auto start = std::chrono::steady_clock::now();
-  while (
-    my_waitable->get_count() <= 1 &&
-    !spin_exited &&
-    (std::chrono::steady_clock::now() - start < 1s))
+// The purpose of this test is to check that the ExecutorT.spin_some() method:
+//   - does not continue executing after max_duration has elapsed
+TYPED_TEST(TestExecutors, spin_some_max_duration)
+{
+  using ExecutorType = TypeParam;
+
+  // TODO(wjwwood): The `StaticSingleThreadedExecutor` and the `EventsExecutor`
+  //   do not properly implement max_duration (it seems), so disable this test
+  //   for them in the meantime.
+  //   see: https://github.com/ros2/rclcpp/issues/2462
+  if (
+    std::is_same<ExecutorType, rclcpp::executors::StaticSingleThreadedExecutor>() ||
+    std::is_same<ExecutorType, rclcpp::experimental::executors::EventsExecutor>())
   {
-    my_waitable->trigger();
-    this->publisher->publish(test_msgs::msg::Empty());
-    std::this_thread::sleep_for(1ms);
+    GTEST_SKIP();
   }
-  // The count of "execute" depends on whether the executor starts spinning before (1) or after (0)
-  // the first iteration of the while loop
-  EXPECT_LE(1u, my_waitable->get_count());
-  waitable_interfaces->remove_waitable(my_waitable, nullptr);
-  EXPECT_TRUE(spin_exited);
-  // Cancel if it hasn't exited already.
-  executor.cancel();
 
-  spinner.join();
+  // Use an isolated callback group to avoid interference from any housekeeping
+  // items that may be in the default callback group of the node.
+  constexpr bool automatically_add_to_executor_with_node = false;
+  auto isolated_callback_group = this->node->create_callback_group(
+    rclcpp::CallbackGroupType::MutuallyExclusive,
+    automatically_add_to_executor_with_node);
+
+  // Set up a situation with two waitables that take time to execute, such that
+  // the time it takes to execute two waitables far exceeds the max_duration
+  // given to spin_some(), which should result in spin_some() starting to
+  // execute one of them, have the max duration elapse, finish executing one
+  // of them, then returning before starting on the second.
+  constexpr auto max_duration = 100ms;  // relatively short because we expect to exceed it
+  constexpr auto waitable_callback_duration = max_duration * 2;
+  auto long_running_callback = [&waitable_callback_duration]() {
+      std::this_thread::sleep_for(waitable_callback_duration);
+    };
+
+  auto waitable_interfaces = this->node->get_node_waitables_interface();
+
+  auto my_waitable1 = std::make_shared<TestWaitable>();
+  my_waitable1->set_on_execute_callback(long_running_callback);
+  waitable_interfaces->add_waitable(my_waitable1, isolated_callback_group);
+
+  auto my_waitable2 = std::make_shared<TestWaitable>();
+  my_waitable2->set_on_execute_callback(long_running_callback);
+  waitable_interfaces->add_waitable(my_waitable2, isolated_callback_group);
+
+  ExecutorType executor;
+  executor.add_callback_group(isolated_callback_group, this->node->get_node_base_interface());
+
+  auto start = std::chrono::steady_clock::now();
+  // spin_some and check that it does not take longer than two of waitable_callback_duration,
+  // nor significantly less than a single waitable_callback_duration.
+  executor.spin_some(max_duration);
+  auto spin_some_run_time = std::chrono::steady_clock::now() - start;
+  EXPECT_GT(
+    to_nanoseconds_helper(spin_some_run_time),
+    to_nanoseconds_helper(waitable_callback_duration / 2))
+    << "spin_some() took less than half the expected time to execute a single "
+    << "waitable, which implies it did not actually execute one when it was "
+    << "expected to";
+  EXPECT_LT(
+    to_nanoseconds_helper(spin_some_run_time),
+    to_nanoseconds_helper(waitable_callback_duration * 2))
+    << "spin_some() took longer than expected to execute by a significant margin, but "
+    << "this could be a false positive on a very slow computer";
+
+  // check that exactly one of the waitables were executed (do not depend on a specific order)
+  size_t number_of_waitables_executed = my_waitable1->get_count() + my_waitable2->get_count();
+  EXPECT_EQ(number_of_waitables_executed, 1u)
+    << "expected exactly one of the two waitables to be executed, but "
+    << "my_waitable1->get_count(): " << my_waitable1->get_count() << " and "
+    << "my_waitable2->get_count(): " << my_waitable2->get_count();
 }
 
 // Check spin_node_until_future_complete with node base pointer