Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH enable faulthandler traceback reporting on worker crash by SIGSEV #419

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
### 3.5.0 - in development

- Automatically call `faulthandler.enable()` when starting loky worker
processes to report more informative information (post-mortem Python
tracebacks in particular) on worker crashs. (#419).

- Fix a random deadlock caused by a race condition at executor shutdown that
was observed on Linux and Windows. (#438)

Expand Down
30 changes: 29 additions & 1 deletion loky/process_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
__author__ = "Thomas Moreau (thomas.moreau.2010@gmail.com)"


import faulthandler
import os
import gc
import sys
Expand Down Expand Up @@ -377,6 +378,28 @@
result_queue.put(_ResultItem(work_id, exception=exc))


def _enable_faulthandler_if_needed():
if "PYTHONFAULTHANDLER" in os.environ:
# Respect the environment variable to configure faulthandler. This
# makes it possible to never enable faulthandler in the loky workers by
# setting PYTHONFAULTHANDLER=0 explicitly in the environment.
mp.util.debug(
f"faulthandler explicitly configured by environment variable: "
f"PYTHONFAULTHANDLER={os.environ['PYTHONFAULTHANDLER']}."
)
else:
if faulthandler.is_enabled():
# Fault handler is already enabled, possibly via a custom
# initializer to customize the behavior.
mp.util.debug("faulthandler already enabled.")

Check warning on line 394 in loky/process_executor.py

View check run for this annotation

Codecov / codecov/patch

loky/process_executor.py#L394

Added line #L394 was not covered by tests
else:
# Enable faulthandler by default with default paramaters otherwise.
mp.util.debug(
"Enabling faulthandler to report tracebacks on worker crashes."
)
faulthandler.enable()


def _process_worker(
call_queue,
result_queue,
Expand Down Expand Up @@ -423,6 +446,8 @@
pid = os.getpid()

mp.util.debug(f"Worker started with timeout={timeout}")
_enable_faulthandler_if_needed()

while True:
try:
call_item = call_queue.get(block=True, timeout=timeout)
Expand Down Expand Up @@ -712,7 +737,10 @@
"terminated. This could be caused by a segmentation fault "
"while calling the function or by an excessive memory usage "
"causing the Operating System to kill the worker.\n"
f"{exit_codes}"
f"{exit_codes}\n"
"Detailed tracebacks of the workers should have been printed "
"to stderr in the executor process if faulthandler was not "
"disabled."
)

self.thread_wakeup.clear()
Expand Down
77 changes: 77 additions & 0 deletions tests/test_reusable_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,83 @@ def test_reusable_executor_reuse_true(self):
executor4 = get_reusable_executor()
assert executor4 is executor3

def test_faulthandler_enabled(self):
cmd = """if 1:
from loky import get_reusable_executor
import faulthandler

def f(i):
if {expect_enabled}:
assert faulthandler.is_enabled()
else:
assert not faulthandler.is_enabled()
if i == 5:
faulthandler._sigsegv()

if {enable_faulthandler_via_initializer}:
executor = get_reusable_executor(max_workers=2, initializer=faulthandler.enable)
else:
executor = get_reusable_executor(max_workers=2)

list(executor.map(f, range(10)))
# This should always trigger a crash.
"""

def check_faulthandler_output(
expect_enabled=True, enable_faulthandler_via_initializer=False
):
p = subprocess.Popen(
[
sys.executable,
"-c",
cmd.format(
expect_enabled=expect_enabled,
enable_faulthandler_via_initializer=enable_faulthandler_via_initializer,
),
],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
p.wait()
out, err = p.communicate()

# The worker is always expected to crash, irrespective of whether
# faulthandler is enabled or not.
assert p.returncode == 1, out.decode()

if expect_enabled:
assert b"Current thread" in err, err.decode()

original_pythonfaulthandler_env = os.environ.pop(
"PYTHONFAULTHANDLER", None
)
try:
# Case 1: faulthandler should be automatically enabled by default.
check_faulthandler_output(expect_enabled=True)

# Case 2: faulthandler should also be enabled when
# PYTHONFAULTHANDLER=1 is set.
os.environ["PYTHONFAULTHANDLER"] = "1"
check_faulthandler_output(expect_enabled=True)

# Case 3: faulthandler should not be enabled when
# PYTHONFAULTHANDLER=0 is set explicitly.
os.environ["PYTHONFAULTHANDLER"] = "0"
check_faulthandler_output(expect_enabled=False)

# Case 4: faulthandler can also be enabled manually via the initializer.
del os.environ["PYTHONFAULTHANDLER"]
check_faulthandler_output(
expect_enabled=True, enable_faulthandler_via_initializer=True
)
finally:
if original_pythonfaulthandler_env is None:
os.environ.pop("PYTHONFAULTHANDLER", None)
else:
os.environ["PYTHONFAULTHANDLER"] = (
original_pythonfaulthandler_env
)


class TestExecutorInitializer(ReusableExecutorMixin):
def _initializer(self, x):
Expand Down
Loading