From 55ca3f28eaad23f62dbb1afe7901acf917cc9347 Mon Sep 17 00:00:00 2001 From: Karl Nilsson Date: Fri, 26 Jan 2024 13:49:47 +0000 Subject: [PATCH] Be more defensive when evaluating retention. It is possible that osiris_log:evaluate_retention/1 may throw an error, in particular `missing_file` which could occur if an osiris member was deleted whilst an evaluation request was outstanding. Currently this crashes the evaluation process which isn't a big deal as it will restart but we'd lose any pending evaluation requests when this happen so it is better to try to avoid that. --- src/osiris_retention.erl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/osiris_retention.erl b/src/osiris_retention.erl index c8564a5..61e582a 100644 --- a/src/osiris_retention.erl +++ b/src/osiris_retention.erl @@ -9,6 +9,7 @@ -behaviour(gen_server). +-include("osiris.hrl"). %% API functions -export([start_link/0, eval/4]). @@ -65,15 +66,20 @@ handle_call(_Request, _From, State) -> %% @spec handle_cast(Msg, State) -> {noreply, State} | %% {noreply, State, Timeout} | %% {stop, Reason, State} -handle_cast({eval, Pid, _Name, Dir, Specs, Fun} = Eval, State) -> +handle_cast({eval, Pid, Name, Dir, Specs, Fun} = Eval, State) -> %% only do retention evaluation for stream processes that are %% alive as the callback Fun passed in would update a shared atomic %% value and this atomic is new per process incarnation case is_process_alive(Pid) of true -> - Result = osiris_log:evaluate_retention(Dir, Specs), - _ = Fun(Result), - {noreply, schedule(Eval, Result, State)}; + try osiris_log:evaluate_retention(Dir, Specs) of + Result -> + _ = Fun(Result), + {noreply, schedule(Eval, Result, State)} + catch _:Err -> + ?DEBUG_(Name, "retention evaluation failed with ~w", [Err]), + {noreply, State} + end; false -> {noreply, State} end.