From 3c932a1e4bae8f85bd8dd665640f7b44c4679d2d Mon Sep 17 00:00:00 2001 From: Ayaz Abbas Date: Wed, 12 Jun 2024 15:49:45 +0100 Subject: [PATCH] apply the same notification rules on zd events to tg events --- README.md | 10 +++++----- pyth_observer/dispatch.py | 21 +++++++++++---------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 185543b..dc01a66 100644 --- a/README.md +++ b/README.md @@ -33,18 +33,18 @@ Event types are configured via environment variables: - `TelegramEvent` - `TELEGRAM_BOT_TOKEN` - API token for the Telegram bot + - `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts - `ZendutyEvent` - `ZENDUTY_INTEGRATION_KEY` - Integration key for Zenduty service API integration - `OPEN_ALERTS_FILE` - Path to local file used for persisting open alerts -### Zenduty Alert Thresholds -- Zenduty alert will fire if a check fails 5 or more times within 5 minutes. -- The alert will be resolved if the check failed < 4 times within 5 minutes. +### Alert Thresholds +- Alert thresholds apply to ZendutyEvent and TelegramEvent (resolution only applies to zenduty) - Checks run approximately once per minute. - These thresholds can be overridden per check type in config.yaml - - `zenduty_alert_threshold`: number of failures in 5 minutes >= to this value trigger an alert (default: 5) - - `zenduty_resolution_threshold`: number of failures in 5 minutes <= this value resolve the alert (default: 3) + - `alert_threshold`: number of failures in 5 minutes >= to this value trigger an alert (default: 5) + - `resolution_threshold`: number of failures in 5 minutes <= this value resolve the alert (default: 3) ## Finding the Telegram Group Chat ID diff --git a/pyth_observer/dispatch.py b/pyth_observer/dispatch.py index 886e453..9f1172b 100644 --- a/pyth_observer/dispatch.py +++ b/pyth_observer/dispatch.py @@ -47,7 +47,7 @@ def __init__(self, config, publishers): self.open_alerts = self.load_alerts() # below is used to store events to later send if mutilple failures occur # events cannot be stored in open_alerts as they are not JSON serializable. - self.zenduty_events = {} + self.delayed_events = {} def load_alerts(self): try: @@ -79,7 +79,7 @@ async def run(self, states: List[State]): for event_type in self.config["events"]: event: Event = globals()[event_type](check, context) - if event_type == "ZendutyEvent": + if event_type in ["ZendutyEvent", "TelegramEvent"]: alert_identifier = self.generate_alert_identifier(check) alert = self.open_alerts.get(alert_identifier) if alert is None: @@ -89,11 +89,12 @@ async def run(self, states: List[State]): "failures": 1, "last_window_failures": None, "sent": False, + "event_type": event_type } else: alert["failures"] += 1 - self.zenduty_events[alert_identifier] = event - continue # Skip sending immediately for ZendutyEvent + self.delayed_events[alert_identifier] = event + continue # Skip sending immediately for ZendutyEvent or TelegramEvent sent_events.append(event.send()) @@ -177,8 +178,8 @@ async def process_zenduty_events(self, current_time): for identifier, info in self.open_alerts.items(): self.check_zd_alert_status(identifier, current_time) check_config = self.config["checks"]["global"][info["type"]] - alert_threshold = check_config.get("zenduty_alert_threshold", 5) - resolution_threshold = check_config.get("zenduty_resolution_threshold", 3) + alert_threshold = check_config.get("alert_threshold", 5) + resolution_threshold = check_config.get("resolution_threshold", 3) # Resolve the alert if raised and failed < $threshold times in the last 5m window resolved = False if ( @@ -187,7 +188,7 @@ async def process_zenduty_events(self, current_time): ): logger.debug(f"Resolving Zenduty alert {identifier}") resolved = True - if info["sent"]: + if info["sent"] and info.get("event_type", "ZendutyEvent") == "ZendutyEvent": response = await send_zenduty_alert( identifier, identifier, resolved=True ) @@ -208,7 +209,7 @@ async def process_zenduty_events(self, current_time): logger.debug(f"Raising Zenduty alert {identifier}") self.open_alerts[identifier]["sent"] = True self.open_alerts[identifier]["last_alert"] = current_time.isoformat() - event = self.zenduty_events.get(identifier) + event = self.delayed_events.get(identifier) if event: to_alert.append(event.send()) @@ -216,8 +217,8 @@ async def process_zenduty_events(self, current_time): for identifier in to_remove: if self.open_alerts.get(identifier): del self.open_alerts[identifier] - if self.zenduty_events.get(identifier): - del self.zenduty_events[identifier] + if self.delayed_events.get(identifier): + del self.delayed_events[identifier] with open(self.open_alerts_file, "w") as file: json.dump(self.open_alerts, file)