You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
=================================== FAILURES ===================================
[2025-04-01T17:38:12Z] _ test_abort[engine_args0-Hello my name is Robert and-RequestOutputKind.DELTA] _
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7fd1fa052e70>
[2025-04-01T17:38:12Z] output_kind = <RequestOutputKind.DELTA: 1>
[2025-04-01T17:38:12Z] engine_args = AsyncEngineArgs(model='meta-llama/Llama-3.2-1B-Instruct', served_model_name=None, tokenizer='meta-llama/Llama-3.2-1B-I...additional_config=None, enable_reasoning=None, reasoning_parser=None, use_tqdm_on_load=True, disable_log_requests=True)
[2025-04-01T17:38:12Z] prompt = 'Hello my name is Robert and'
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] @pytest.mark.parametrize(
[2025-04-01T17:38:12Z] "output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
[2025-04-01T17:38:12Z] @pytest.mark.parametrize("engine_args,prompt",
[2025-04-01T17:38:12Z] [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
[2025-04-01T17:38:12Z] (VISION_ENGINE_ARGS, VISION_PROMPT)])
[2025-04-01T17:38:12Z] @pytest.mark.asyncio
[2025-04-01T17:38:12Z] async def test_abort(monkeypatch: pytest.MonkeyPatch,
[2025-04-01T17:38:12Z] output_kind: RequestOutputKind,
[2025-04-01T17:38:12Z] engine_args: AsyncEngineArgs, prompt: PromptType):
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] with monkeypatch.context() as m, ExitStack() as after:
[2025-04-01T17:38:12Z] m.setenv("VLLM_USE_V1", "1")
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] engine = AsyncLLM.from_engine_args(engine_args)
[2025-04-01T17:38:12Z] after.callback(engine.shutdown)
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] NUM_REQUESTS = 100
[2025-04-01T17:38:12Z] NUM_EXPECTED_TOKENS = 100
[2025-04-01T17:38:12Z] NUM_EXPECTED_TOKENS_LONG = 50000
[2025-04-01T17:38:12Z] REQUEST_IDS_TO_ABORT = range(1, 100, 10)
[2025-04-01T17:38:12Z] PARALLEL_SAMPLE_REQ_IDS = range(1, 100, 15)
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] request_ids = [f"request-{i}" for i in range(NUM_REQUESTS)]
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # Create concurrent requests.
[2025-04-01T17:38:12Z] tasks: list[asyncio.Task] = []
[2025-04-01T17:38:12Z] for idx, request_id in enumerate(request_ids):
[2025-04-01T17:38:12Z] max_tokens = NUM_EXPECTED_TOKENS_LONG if (
[2025-04-01T17:38:12Z] idx in REQUEST_IDS_TO_ABORT) else NUM_EXPECTED_TOKENS
[2025-04-01T17:38:12Z] n = 3 if idx in PARALLEL_SAMPLE_REQ_IDS else 1
[2025-04-01T17:38:12Z] tasks.append(
[2025-04-01T17:38:12Z] asyncio.create_task(
[2025-04-01T17:38:12Z] generate(engine, request_id, prompt, output_kind,
[2025-04-01T17:38:12Z] max_tokens, n)))
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # API server cancels requests when they disconnect.
[2025-04-01T17:38:12Z] for idx in REQUEST_IDS_TO_ABORT:
[2025-04-01T17:38:12Z] tasks[idx].cancel()
[2025-04-01T17:38:12Z] await asyncio.sleep(0.1)
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # Confirm the other requests are okay.
[2025-04-01T17:38:12Z] for idx, task in enumerate(tasks):
[2025-04-01T17:38:12Z] # Confirm that it was actually canceled.
[2025-04-01T17:38:12Z] if idx in REQUEST_IDS_TO_ABORT:
[2025-04-01T17:38:12Z] with pytest.raises(asyncio.CancelledError):
[2025-04-01T17:38:12Z] await task
[2025-04-01T17:38:12Z] else:
[2025-04-01T17:38:12Z] # Otherwise, make sure the request was not impacted.
[2025-04-01T17:38:12Z] num_generated_tokens, request_id = await task
[2025-04-01T17:38:12Z] n = 3 if idx in PARALLEL_SAMPLE_REQ_IDS else 1
[2025-04-01T17:38:12Z] expected_tokens = NUM_EXPECTED_TOKENS * n
[2025-04-01T17:38:12Z] assert num_generated_tokens == expected_tokens, (
[2025-04-01T17:38:12Z] f"{request_id} generated {num_generated_tokens} but "
[2025-04-01T17:38:12Z] f"expected {expected_tokens}")
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] # Make sure all aborted requests were really aborted.
[2025-04-01T17:38:12Z] > assert not engine.output_processor.has_unfinished_requests()
[2025-04-01T17:38:12Z] E assert not True
[2025-04-01T17:38:12Z] E + where True = has_unfinished_requests()
[2025-04-01T17:38:12Z] E + where has_unfinished_requests = <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0>.has_unfinished_requests
[2025-04-01T17:38:12Z] E + where <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0> = <vllm.v1.engine.async_llm.AsyncLLM object at 0x7fd1ef132750>.output_processor
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] v1/engine/test_async_llm.py:178: AssertionError
[2025-04-01T17:38:12Z] =============================== warnings summary ===============================
[2025-04-01T17:38:12Z] tests/v1/engine/test_async_llm.py: 12 warnings
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core_client.py: 1 warning
[2025-04-01T17:38:12Z] tests/v1/engine/test_llm_engine.py: 2 warnings
[2025-04-01T17:38:12Z] /usr/lib/python3.12/multiprocessing/popen_fork.py:66: DeprecationWarning: This process (pid=1700) is multi-threaded, use of fork() may lead to deadlocks in the child.
[2025-04-01T17:38:12Z] self.pid = os.fork()
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core.py::test_engine_core
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core.py::test_engine_core_advanced_sampling
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core.py::test_engine_core_concurrent_batches
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core_client.py::test_engine_core_client[True]
[2025-04-01T17:38:12Z] tests/v1/engine/test_engine_core_client.py::test_engine_core_client[False]
[2025-04-01T17:38:12Z] /vllm-workspace/tests/utils.py:720: DeprecationWarning: This process (pid=1700) is multi-threaded, use of fork() may lead to deadlocks in the child.
[2025-04-01T17:38:12Z] pid = os.fork()
[2025-04-01T17:38:12Z]
[2025-04-01T17:38:12Z] -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
[2025-04-01T17:38:12Z] =========================== short test summary info ============================
[2025-04-01T17:38:12Z] FAILED v1/engine/test_async_llm.py::test_abort[engine_args0-Hello my name is Robert and-RequestOutputKind.DELTA] - assert not True
[2025-04-01T17:38:12Z] + where True = has_unfinished_requests()
[2025-04-01T17:38:12Z] + where has_unfinished_requests = <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0>.has_unfinished_requests
[2025-04-01T17:38:12Z] + where <vllm.v1.engine.output_processor.OutputProcessor object at 0x7fd1ef1614c0> = <vllm.v1.engine.async_llm.AsyncLLM object at 0x7fd1ef132750>.output_processor
[2025-04-01T17:38:12Z] ============ 1 failed, 44 passed, 20 warnings in 1059.59s (0:17:39) ============
[2025-04-01T17:38:14Z] 🚨 Error: The command exited with status 1
Before submitting a new issue...
Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
The text was updated successfully, but these errors were encountered:
Your current environment
...
🐛 Describe the bug
main commit 51d7c6a
Seen in #15894
https://buildkite.com/organizations/vllm/pipelines/ci/builds/16742/jobs/0195f24d-e81a-46a3-ad08-6a51983d65d6/log
Before submitting a new issue...
The text was updated successfully, but these errors were encountered: