Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OGUI-1590] Add support for final ECS operation (error/timeout) in DCS SOR pane #2697

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 86 additions & 3 deletions Control/lib/adapters/DcsIntegratedEventAdapter.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@
* or submit itself to any jurisdiction.
*/

const {
EcsOperationAndStepStatus: {
DONE_ERROR,
DONE_TIMEOUT
}
} = require('../common/ecsOperationAndStepStatus.enum.js');

/**
* DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event
* @class DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event
*
* The DCS SOR event is a special event that comes from either:
* * the DCS service itself (when containing the payload "dcsEvent") and it is for one detector only
Expand All @@ -26,7 +33,77 @@
}

/**
* Build a DCS Integrated Event from an AliECS Integrated Service Event. If it is a DCSevent, the detector will replace detectors array
* Build a DCS Integrated Event from an AliECS Integrated Service Event - SOR. If it is a DCSevent, the detector will replace detectors array
*
* // IntegratedService event, related to SOR but with a failure on ECS side (such as timeout)
* @example
* {
* "timestamp": 1733497646607,
* "integratedServiceEvent": {
* "name": "readout-dataflow.dcs.sor",
* "error": "DCS SOR timed out after 1s: rpc error: code = DeadlineExceeded desc = Deadline Exceeded",
* "operationName": "dcs.StartOfRun()",
* "operationStatus": "ONGOING",
* "operationStep": "perform DCS call: StartOfRun",
* "operationStepStatus": "DONE_TIMEOUT",
* "environmentId": "2rRm96N9k7E",
* "payload": "{\"detectors\":[\"EMC\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\"},\"runNumber\":1601}"
* }
* // IntegratedService event with final state DONE_ERROR following the DONE_TIMEOUT from above
* @example
* {
* "timestamp": 1734004912438,
* "integratedServiceEvent": {
* "name": "readout-dataflow.dcs.sor",
* "error": "DCS SOR timed out after 100ms: rpc error: code = DeadlineExceeded desc = context deadline exceeded : SOR failed for EMC, FDD, DCS EOR will run anyway for this run",
* "operationName": "dcs.StartOfRun()",
* "operationStatus": "DONE_ERROR",
* "operationStep": "perform DCS call: StartOfRun",
* "operationStepStatus": "DONE_ERROR",
* "environmentId": "2rYQabnjWy2",
* "payload": "{\"detectors\":[\"EMC\",\"FDD\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\",\"FDD\":\"SOR_AVAILABLE\"},\"failedDetectors\":[\"EMC\",\"FDD\"],\"runNumber\":1622}"
* }
*
* // IntegratedService event, related to SOR_PROGRESSING with payload from DCS
* @example
* {
* "timestamp": 1734004912360,
* "timestampNano": 1734004912360675322,
* "environmentEvent": null,
* "taskEvent": null,
* "roleEvent": null,
* "callEvent": null,
* "integratedServiceEvent": {
* "name": "readout-dataflow.dcs.sor",
* "error": null,
* "operationName": "dcs.StartOfRun()",
* "operationStatus": "ONGOING",
* "operationStep": "perform DCS call: StartOfRun",
* "operationStepStatus": "ONGOING",
* "environmentId": "2rYQabnjWy2",
* "payload": \"{
* \"dcsEvent\": {
* \"eventtype\":20,
* \"detector\":2,
* \"state\":5,\"
* extraParameters\":{
* \"run_no\":\"1622\"
* },
* \"timestamp\":\"2024-12-12 13:01:52.358\",
* \"message\":\"run_type\"
* },
* \"detector\":\"EMC\",
* \"detectors\":[\"EMC\",\"FDD\"],
* \"detectorsReadiness\":{
* \"EMC\":\"SOR_AVAILABLE\",
* \"FDD\":\"SOR_AVAILABLE\"
* },
* \"runNumber\":1622,
* \"state\":\"SOR_PROGRESSING\"
* }"
* }
*
* Final OperationStates: DONE_TIMEOUT/DONE_ERROR/DONE_OK
* @param {object} event - AliECS Integrated Service Event
* @param {number} timestamp - timestamp of the event (int64 as per proto file definition)
* @return {object} DCS Integrated Event
Expand All @@ -37,7 +114,13 @@

const payloadJSON = JSON.parse(payload);
const { dcsEvent, runNumber, detector = null, state } = payloadJSON;
if (!dcsEvent) {

if (!dcsEvent

Check warning on line 118 in Control/lib/adapters/DcsIntegratedEventAdapter.js

View check run for this annotation

Codecov / codecov/patch

Control/lib/adapters/DcsIntegratedEventAdapter.js#L118

Added line #L118 was not covered by tests
&& operationStatus !== DONE_ERROR && operationStatus !== DONE_TIMEOUT
&& operationStepStatus !== DONE_ERROR && operationStepStatus !== DONE_TIMEOUT
) {
// if there is no DCS event and status is not final error or timeout, we ignore the event as we expect to have `RUN_OK` from DCS as final state
// or DONE_TIMEOUT or DONE_ERROR from ECS. We are not interested in DONE_OK from ECS as this means all detectors in RUN_OK which we already look for
return null;
}
let { detectors } = payloadJSON;
Expand Down
27 changes: 27 additions & 0 deletions Control/lib/common/ecsOperationAndStepStatus.enum.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* @license
* Copyright 2019-2020 CERN and copyright holders of ALICE O2.
* See http://alice-o2.web.cern.ch/copyright for details of the copyright holders.
* All rights not expressly granted are reserved.
*
* This software is distributed under the terms of the GNU General Public
* License v3 (GPL Version 3), copied verbatim in the file "COPYING".
*
* In applying this license CERN does not waive the privileges and immunities
* granted to it by virtue of its status as an Intergovernmental Organization
* or submit itself to any jurisdiction.
*/

/**
* Available ECS Statuses of operations for Kafka Events
* These operations can be under the label:
* * operationStatus
* * operationStepStatus
*/
const EcsOperationAndStepStatus = Object.freeze({
DONE_OK: 'DONE_OK',
DONE_ERROR: 'DONE_ERROR',
DONE_TIMEOUT: 'DONE_TIMEOUT',
});

exports.EcsOperationAndStepStatus = EcsOperationAndStepStatus;
6 changes: 5 additions & 1 deletion Control/public/common/enums/DetectorState.enum.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,9 @@ export const DetectorStateStyle = Object.freeze({
EOR_AVAILABLE: '',
PFR_AVAILABLE: '',
PFR_UNAVAILABLE: '',
TIMEOUT: '',
TIMEOUT: 'bg-danger white',
// Custom states for the SOR/EOR operations covered by ECS when DCS does not reply
DONE_TIMEOUT: 'bg-danger white',
DONE_ERROR: 'bg-danger white',
DONE_OK: 'bg-primary white',
});
38 changes: 33 additions & 5 deletions Control/public/pages/Environment/components/dcs/dcsSorPanel.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { infoLoggerButtonLink } from './../../../../common/buttons/infoLoggerRed
/**
* Panel that will display DCS last states during the SOR activity at the start of run
* @param {string} id - environment id
* @param {array<string>} detectors - list of detectors
* @param {array<string>} detectors - list of detectors as received by the environment currently displayed in variable `includedDetectors`
* @return {vnode}
*/
export const dcsSorPanel = (id, detectors) => {
Expand Down Expand Up @@ -67,18 +67,46 @@ export const dcsSorPanel = (id, detectors) => {
}

/**
* Group operations by detector
* Group events by detector and filter out events that are arriving after a final event such as
* some detectors might end the SOR sequence and arrive in RUN_OK, DONE_TIMEOUT, DONE_ERROR state but still recieve from ECS event that if failed.
* This is incorrect form ECS and should be filtered out.
* @param {array<object>} operations - list of operations
* @return {object}
*/
const groupOperationsByDetector = (operations) => {
const groupedOperations = {};
operations.forEach((operation) => {
operation.detectors.forEach((detector) => {
operations.forEach((event) => {
const eventCopy = JSON.parse(JSON.stringify(event));
eventCopy.detectors.forEach((detector) => {
if (!groupedOperations[detector]) {
groupedOperations[detector] = [];
if (!eventCopy?.state) {
// first operation might be an error or timeout which comes without a state
eventCopy.state = eventCopy.operationStepStatus ?? eventCopy.operationStatus;
}
groupedOperations[detector].push(eventCopy);
} else {
const lastOperation = groupedOperations[detector][groupedOperations[detector].length - 1];
if (eventCopy.state) {
// If there is a state, it means it is still an event from DCS
groupedOperations[detector].push(eventCopy);
} else if (
lastOperation?.state !== 'RUN_OK'
&& lastOperation?.state !== 'DONE_TIMEOUT'
&& lastOperation?.state !== 'DONE_ERROR'
) {
// we only add event or step with status DONE_TIMEOUT or DONE_ERROR if the last event state of that detector is SOR_PROGRESSING
const operationStatus = eventCopy.operationStatus;
const operationStepStatus = eventCopy.operationStepStatus;
// priority is given to operationStep as it offers more granularity
if (operationStepStatus === 'DONE_TIMEOUT' || operationStepStatus === 'DONE_ERROR') {
eventCopy.state = operationStepStatus;
} else if (operationStatus === 'DONE_TIMEOUT' || operationStatus === 'DONE_ERROR') {
eventCopy.state = operationStatus;
}
groupedOperations[detector].push(eventCopy);
}
}
groupedOperations[detector].push(operation);
});
});
return groupedOperations;
Expand Down
Loading