Skip to content

Commit

Permalink
Try journald for kernlogs then fallback to files
Browse files Browse the repository at this point in the history
Searching journald is fastest but not always available
or possible so fall back to kern.log or file-based journal
from sos if not available.

Also pulls kern.log customisation out of the fake_data_root
and puts into the unit test files so that the data root is
restored to its original state.
  • Loading branch information
dosaboy committed Jan 15, 2025
1 parent 1745ed9 commit eb601cd
Show file tree
Hide file tree
Showing 10 changed files with 580 additions and 263 deletions.
3 changes: 0 additions & 3 deletions examples/hotsos-example-kernel.short.summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,3 @@ potential-issues:
- Unattended upgrades are enabled which can lead to uncontrolled changes to
this environment. If maintenance windows are required please consider disabling
unattended upgrades.
kernel:
MemoryWarnings:
- 1 reports of oom-killer invoked in kern.log - please check.
3 changes: 0 additions & 3 deletions examples/hotsos-example-kernel.summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,3 @@ kernel:
model: intel core processor (skylake, ibrs)
smt: disabled
cpufreq-scaling-governor: unknown
potential-issues:
MemoryWarnings:
- 1 reports of oom-killer invoked in kern.log - please check.
2 changes: 2 additions & 0 deletions hotsos/core/host_helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .common import InstallInfoBase
from .cli.cli import (
CLIExecError,
CLIHelper,
CLIHelperFile,
)
Expand Down Expand Up @@ -40,6 +41,7 @@
)

__all__ = [
CLIExecError.__name__,
CLIHelper.__name__,
CLIHelperFile.__name__,
ConfigBase.__name__,
Expand Down
85 changes: 56 additions & 29 deletions hotsos/core/host_helpers/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pathlib
import pickle
import tempfile
from dataclasses import dataclass, field
from functools import cached_property

from hotsos.core.config import HotSOSConfig
Expand Down Expand Up @@ -64,6 +65,9 @@ def __init__(self, *args, **kwargs):

def format_journalctl_cmd(self, **kwargs):
""" Add optional extras to journalctl command. """
if kwargs.get("opts"):
self.cmd = f"{self.cmd} {kwargs.get('opts')}"

if kwargs.get("unit"):
self.cmd = f"{self.cmd} --unit {kwargs.get('unit')}"

Expand All @@ -84,7 +88,13 @@ def __init__(self, *args, **kwargs):
self.register_hook("pre-exec", self.preformat_sos_journalctl)

def preformat_sos_journalctl(self, **kwargs):
self.path = f"journalctl -oshort-iso -D {self.path}"
default_opts = '-oshort-iso'
if kwargs.get("opts"):
self.path = (f"journalctl {default_opts} {kwargs.get('opts')} "
f"-D {self.path}")
else:
self.path = f"journalctl {default_opts} -D {self.path}"

if kwargs.get("unit"):
self.path = f"{self.path} --unit {kwargs.get('unit')}"

Expand All @@ -94,27 +104,48 @@ def preformat_sos_journalctl(self, **kwargs):
self.path = f"{self.path} --since {self.since_date}"


class CLICacheWrapper():
""" Wrapper for cli cache. """
def __init__(self, cache_load_f, cache_save_f):
self.load_f = cache_load_f
self.save_f = cache_save_f

def load(self, key):
return self.load_f(key)

def save(self, key, value):
return self.save_f(key, value)


@dataclass
class SourceRunner():
""" Manager to control how we execute commands.
Ensures that we try data sources in a consistent order.
@param cmdkey: unique key identifying this command.
@param sources: list of command source implementations.
@param cache: CLICacheWrapper object.
@param output_file: If a file path is provided the output of running a
command is saved to that file.
@param catch_exceptions: By default we catch binary execution
exceptions and return an exit code rather than
allowing the exception to be raised. If not
required this can be set to False.
"""
def __init__(self, cmdkey, sources, cache, output_file=None):
"""
@param cmdkey: unique key identifying this command.
@param sources: list of command source implementations.
@param cache: CLICacheWrapper object.
"""
self.cmdkey = cmdkey
self.sources = sources
self.cache = cache
self.output_file = output_file
cmdkey: str
sources: list
cache: CLICacheWrapper
output_file: str = field(default=None)
catch_exceptions: bool = field(default=True)

def __post_init__(self):
# Command output can differ between CLIHelper and CLIHelperFile so we
# need to cache them separately.
if output_file:
self.cache_cmdkey = f"{cmdkey}.file"
if self.output_file:
self.cache_cmdkey = f"{self.cmdkey}.file"
else:
self.cache_cmdkey = cmdkey
self.cache_cmdkey = self.cmdkey

def bsource(self, *args, **kwargs):
# binary sources only apply if data_root is system root
Expand Down Expand Up @@ -146,6 +177,9 @@ def bsource(self, *args, **kwargs):
# as success.
break
except CLIExecError as exc:
if not self.catch_exceptions:
raise

bin_out = CmdOutput(exc.return_value)

return bin_out
Expand All @@ -160,6 +194,9 @@ def fsource(self, *args, **kwargs):
return fsource(*args, **kwargs,
skip_load_contents=skip_load_contents)
except CLIExecError as exc:
if not self.catch_exceptions:
raise

return CmdOutput(exc.return_value)
except SourceNotFound:
pass
Expand Down Expand Up @@ -204,19 +241,6 @@ def __call__(self, *args, **kwargs):
return out.value


class CLICacheWrapper():
""" Wrapper for cli cache. """
def __init__(self, cache_load_f, cache_save_f):
self.load_f = cache_load_f
self.save_f = cache_save_f

def load(self, key):
return self.load_f(key)

def save(self, key, value):
return self.save_f(key, value)


class CLIHelperBase(HostHelpersBase):
""" Base class for clihelper implementations. """
def __init__(self):
Expand Down Expand Up @@ -287,10 +311,12 @@ class CLIHelperFile(CLIHelperBase):
temporary file and the path to that file is returned.
"""

def __init__(self, *args, delete_temp=True, **kwargs):
def __init__(self, *args, catch_exceptions=True, delete_temp=True,
**kwargs):
super().__init__(*args, **kwargs)
self.delete_temp = delete_temp
self._tmp_file_mtime = None
self._catch_exceptions = catch_exceptions

def __enter__(self):
return self
Expand All @@ -315,7 +341,8 @@ def output_file(self):
def __getattr__(self, cmdname):
try:
ret = SourceRunner(cmdname, self.command_catalog[cmdname],
self.cli_cache, output_file=self.output_file)
self.cli_cache, output_file=self.output_file,
catch_exceptions=self._catch_exceptions)
return ret
except KeyError as exc:
raise CommandNotFound(cmdname, exc) from exc
Expand Down
7 changes: 4 additions & 3 deletions hotsos/core/plugins/kernel/kernlog/calltrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,10 +483,11 @@ def __init__(self, *args, **kwargs):
self.run()

def run(self):
for tracetype in self.tracetypes:
self.searcher.add(tracetype.searchdef, self.path)
self.results = self.perform_search([t.searchdef
for t in self.tracetypes])
if not self.results:
return

self.results = self.searcher.run()
for tracetype in self.tracetypes:
if isinstance(tracetype.searchdef, SequenceSearchDef):
results = self.results.find_sequence_sections(
Expand Down
75 changes: 65 additions & 10 deletions hotsos/core/plugins/kernel/kernlog/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
import os

from hotsos.core.config import HotSOSConfig
from hotsos.core.host_helpers import CLIHelper, HostNetworkingHelper
from hotsos.core.host_helpers import (
CLIExecError,
CLIHelper,
CLIHelperFile,
HostNetworkingHelper,
)
from hotsos.core.search import (
CommonTimestampMatcher,
FileSearcher,
Expand Down Expand Up @@ -83,9 +88,60 @@ def __iter__(self):
""" Iterate over each call trace found. """


class KernLogSource(CLIHelperFile):
"""
We want to try different sources for kern logs. We implement CLIHelperFile
so that we can try the systemd journal first and fall back to other paths
if not successful.
"""
def __init__(self):
super().__init__(catch_exceptions=False)
self.path = None
self.attempt = 0
self.fs_paths = ['var/log/kern.log',
'sos_commands/logs/journalctl_--no-pager']
self.fs_paths = [os.path.join(os.path.join(HotSOSConfig.data_root, f))
for f in self.fs_paths]

def __enter__(self):
if os.path.exists(os.path.join(HotSOSConfig.data_root,
'var/log/journal')):
try:
self.path = self.journalctl(opts='-k')
log.debug("using journal as source of kernlogs")
except CLIExecError:
log.info("Failed to get kernlogs from systemd journal. "
"Trying fallback sources.")
else:
log.info("systemd journal not available. Trying fallback kernlog "
"sources.")

if not self.path:
for path in self.fs_paths:
if os.path.exists(path):
if path.endswith('kern.log') and HotSOSConfig.use_all_logs:
self.path = f"{path}*"
else:
self.path = path

log.debug("using %s as source of kernlogs", self.path)
break
else:
paths = ', '.join(self.fs_paths)
log.warning("no kernlog sources found (tried %s and "
"journal)", paths)

return self


class KernLogBase():
""" Base class for kernlog analysis implementations. """
def __init__(self):
self.hostnet_helper = HostNetworkingHelper()
self.cli_helper = CLIHelper()

@staticmethod
def perform_search(searchdefs):
try:
constraint = SearchConstraintSearchSince(
ts_matcher_cls=CommonTimestampMatcher)
Expand All @@ -94,14 +150,13 @@ def __init__(self):
"calltrace checker: %s", exc)
constraint = None

self.searcher = FileSearcher(constraint=constraint)
self.hostnet_helper = HostNetworkingHelper()
self.cli_helper = CLIHelper()
searcher = FileSearcher(constraint=constraint)
with KernLogSource() as kls:
if kls.path is None:
log.info("no kernlogs found")
return None

@property
def path(self):
path = os.path.join(HotSOSConfig.data_root, 'var/log/kern.log')
if HotSOSConfig.use_all_logs:
return f"{path}*"
for sd in searchdefs:
searcher.add(sd, kls.path)

return path
return searcher.run()
9 changes: 5 additions & 4 deletions hotsos/core/plugins/kernel/kernlog/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ class KernLogEvents(KernLogBase):
""" Kern log events info. """
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
for event in [self.over_mtu_dropped_packets_search_def]:
self.searcher.add(event, self.path)

self.results = self.searcher.run()
self.results = self.perform_search([
self.over_mtu_dropped_packets_search_def])

@property
def over_mtu_dropped_packets_search_def(self):
Expand All @@ -32,6 +30,9 @@ def over_mtu_dropped_packets(self):
@return: dict of interfaces and an integer count of associated dropped
packet messages.
"""
if not self.results:
return {}

interfaces = {}
for r in self.results.find_by_tag('over-mtu-dropped'):
if r.get(1) in interfaces:
Expand Down
Loading

0 comments on commit eb601cd

Please sign in to comment.