Manage the maintenance state of alarms¶
Prerequisite¶
git checkout origin/osp-alarms-web-configuration .
git checkout origin/osp-scripts-configuration .
git checkout origin/osp-variables-configuration .
Description¶
In this example we will present a way to manage the maintenance state of alarms.
Alarms are defined to have three levels of maintenance :
site level : a physical location where multiple devices are present.
node level : a sub-system on a site that regroup devices (for example the generator management).
device level : a specific device.
When a site, node or device has its maintenance enable, it will silence linked alarms and generate a new alarm to indicate that the node or site is in maintenance.
Steps¶
Note
To simplify scripting and share helper functions, all the logic is inside the root/maintenance/maintenance.lua
.
1. Create the configuration to enable/disable maintenance¶
This action is used for enabling/disabling the maintenance manually. It takes as parameter :
An actionType to define if the activation is for a device, node or site.
An optional location if the activation is for a site.
An optional node if the activation is for a node.
An optional serial if the activation is for a device.
The script determines which alarms need to be put in maintenance depending on those parameters.
root/maintenance/manual-enable-disable/action.alarms
{
"moduleId": "modules.alarms.alarms-1",
"scriptFile": "root/maintenance/maintenance.lua",
"accessedValues": [],
"beginBatch": "begin_batch",
"criteria": "criteria",
"execute": "execute",
"endBatch": "end_batch"
}
root/maintenance/maintenance.lua
157---------------------------------------------------------------------
158-- Enable maintenance upon a user action.
159-- used by root.maintenance.enable
160function begin_batch()
161 batch_execution_data = {}
162end
163
164function criteria(data)
165 local actionType = data[ACTION_TYPE]
166 local location = data[ACTION_TYPE_SITE]
167 local node = data[ACTION_TYPE_NODE]
168 local serial = data[ACTION_TYPE_DEVICE]
169
170 if type(location) ~= "table" then
171 location = {location}
172 end
173
174 if type(node) ~= "table" then
175 node = {node}
176 end
177
178 if type(serial) ~= "table" then
179 serial = {serial}
180 end
181
182 if actionType == ACTION_TYPE_DEVICE and next(serial) ~= nil then
183 local filter = FILTER_BEGIN
184 local filter_middle = [[{'_id': '%s'},]]
185 for _, v in pairs(serial) do
186 filter = filter .. string.format(filter_middle, v)
187 end
188 return filter .. FILTER_END
189 elseif actionType == ACTION_TYPE_NODE and next(node) ~= nil and next(location) ~= nil then
190 local filter = FILTER_BEGIN
191 local filter_middle = [[
192 {
193 '$and': [
194 {'additionalData.%s': '%s'},
195 {'%s': '%s'},
196 {'summary':
197 {
198 '$ne': '%s'
199 }
200 }
201 ]
202 }
203 ]]
204
205 for i, n in pairs(node) do
206 filter = filter .. string.format(filter_middle, NODE_FIELD, n, SITE_FIELD, location[i], DEVICE_MAINTENANCE_SUMMARY)
207 end
208 return filter .. FILTER_END
209 elseif actionType == ACTION_TYPE_SITE and next(location) ~= nil then
210 local filter = FILTER_BEGIN
211 local filter_middle = [[
212 {
213 '$and': [
214 {'%s': '%s'},
215 {'summary':
216 {
217 '$ne': '%s'
218 }
219 }
220 ]
221 }
222 ]]
223 for _, v in pairs(location) do
224 filter = filter .. string.format(filter_middle, SITE_FIELD, v, SITE_MAINTENANCE_SUMMARY)
225 end
226 return filter .. FILTER_END
227 else
228 return FILTER_EMPTY
229 end
230end
231
232function execute(alarm, data, actions)
233 local maintenanceUntil = data["until"]
234 if data["hours"] ~= nil then
235 local now = Timestamp.now()
236 now:plusHours(data["hours"])
237 maintenanceUntil = now:getValue()
238 end
239 local type = data[ACTION_TYPE]
240
241 if type == ACTION_TYPE_DEVICE then
242 store.set(MAINTENANCE_STORE_PREFIX .. "-" .. alarm.id, maintenanceUntil)
243 elseif type == ACTION_TYPE_NODE then
244 store.set(MAINTENANCE_STORE_PREFIX .. "-" .. alarm.location .. "-" .. alarm.additionalData[NODE_FIELD], maintenanceUntil)
245 elseif type == ACTION_TYPE_SITE then
246 store.set(MAINTENANCE_STORE_PREFIX .. "-" .. alarm.location, maintenanceUntil)
247 end
248
249 if alarm.tags[SITE_STATUS_TAG] ~= nil then
250 return
251 end
252
253 update_alarm(alarm, data, actions, maintenanceUntil)
254end
255
256function end_batch()
257 for k, v in pairs(batch_execution_data) do
258 if v[ACTION_TYPE] == ACTION_TYPE_NODE then
259 alarms:insert(
260 severity.get("root.alarms.severities.minor"),
261 "Node under maintenance",
262 v[SITE_FIELD],
263 SCRIPT_NAME,
264 k,
265 {DEVICE_STATUS_TAG},
266 {node=v[NODE_FIELD], maintenance="In maintenance", monitored=true, region=v[REGION_FIELD], assigned=ASSIGNED_OCC, customer=v[ALARM_CUSTOMER]}
267 )
268 elseif v[ACTION_TYPE] == ACTION_TYPE_SITE then
269 alarms:insert(
270 severity.get("root.alarms.severities.minor"),
271 "Site under maintenance",
272 v[SITE_FIELD],
273 SCRIPT_NAME,
274 k,
275 {SITE_STATUS_TAG},
276 {maintenance= "In maintenance", monitored=true, region=v[REGION_FIELD], assigned=ASSIGNED_OCC, customer=v[ALARM_CUSTOMER]}
277 )
278 end
279 end
280end
2. Create the configuration to disable the site/node maintenance automatically¶
This action scans the alarms in maintenance and if the maintenance has expired, preforms the following actions :
Set the severity to clear
Set the maintenance field to Normal
Add a message in the journal
root/maintenance/auto-disable/action.alarms
{
"moduleId": "modules.alarms.alarms-1",
"period": {
"unit": "SECONDS",
"value": 5
},
"scriptFile": "root/maintenance/maintenance.lua",
"criteria": "criteria_disable",
"execute": "execute_disable"
}
root/maintenance/maintenance.lua
77---------------------------------------------------------------------
78-- Check regularly to disable maintenance mode when nessecary.
79-- used by root.maintenance.disable
80function criteria_disable(unused)
81 local sev = 100 --Replace with the severity to use by default when the maintenance is disable.
82 if severity.has("root.alarms.severities.clear") then
83 sev = severity.get("root.alarms.severities.clear")
84 end
85
86 local filter = [[
87 {
88 '$and': [
89 {'additionalData.%s': {'$exists': true}},
90 {
91 '$or': [
92 {'additionalData.%s': {'$ne': '%s'}},
93 {
94 '$and': [
95 {'summary': '%s'},
96 {'severity': {'$ne': %s}}
97 ]
98 },
99 {
100 '$and': [
101 {'summary': '%s'},
102 {'severity': {'$ne': %s}}
103 ]
104 }
105 ]
106 }
107 ]
108 }
109 ]]
110
111 return string.format(filter, MAINTENANCE_FIELD, MAINTENANCE_FIELD, NORMAL_MODE, SITE_MAINTENANCE_SUMMARY, sev, DEVICE_MAINTENANCE_SUMMARY, sev)
112end
113
114function execute_disable(alarm, unused, actions)
115 local now = Timestamp.now()
116 local sev = 100 --Replace with the severity to use by default when the maintenance is disable.
117 if severity.has("root.alarms.severities.clear") then
118 sev = severity.get("root.alarms.severities.clear")
119 end
120
121
122 if alarm.summary == SITE_MAINTENANCE_SUMMARY then
123 local locationMaintenance = nil
124 if alarm.location ~= nil then
125 locationMaintenance = getSiteMaintenance(alarm.location)
126 end
127
128 -- check if location maintenance is done
129 if locationMaintenance ~= nil and now:isNewerThan(Timestamp.from(locationMaintenance)) then
130 actions:escalate(sev)
131 actions:edit(nil, nil, nil, nil, nil, {maintenance = NORMAL_MODE})
132 actions:journal(MAINTENANCE_DISABLE_MESSAGE, SCRIPT_NAME)
133 end
134 elseif alarm.summary == DEVICE_MAINTENANCE_SUMMARY then
135 local deviceMaintenance = nil
136 if alarm.additionalData[NODE_FIELD] ~= nil and alarm.location ~= nil then
137 deviceMaintenance = getDeviceMaintenance(alarm.location, alarm.additionalData[NODE_FIELD])
138 end
139
140 -- check if device maintenance is done
141 if deviceMaintenance ~= nil and
142 now:isNewerThan(Timestamp.from(deviceMaintenance)) then
143 actions:escalate(sev)
144 actions:edit(nil, nil, nil, nil, nil, {maintenance = NORMAL_MODE})
145 actions:journal(MAINTENANCE_DISABLE_MESSAGE, SCRIPT_NAME)
146 end
147 else
148 local maintenanceUntil = Timestamp.from(alarm.additionalData[MAINTENANCE_UNTIL_FIELD])
149
150 if now:isNewerThan(maintenanceUntil) then
151 actions:edit(nil, nil, nil, nil, nil, {maintenance = NORMAL_MODE})
152 actions:journal(MAINTENANCE_DISABLE_MESSAGE, SCRIPT_NAME)
153 end
154 end
155end
1. Create the configuration to process new alarms¶
When a new alarm arrives, we must check if the maintenance is enabled for it.
root/maintenance/insert/pre-insert.alarms
{
"moduleId": "modules.alarms.alarms-1",
"priority": 5,
"scriptFile": "root/maintenance/maintenance.lua",
"accessedValues": [],
"for": "match_insertion",
"thenExecute": "then_execute_insertion"
}
root/maintenance/maintenance.lua
52---------------------------------------------------------------------
53-- Check if a new alarm need to be set with the maintenance mode
54-- used by root.maintenance.insertion
55function match_insertion(alarm)
56 if alarm.tags[SITE_STATUS_TAG] ~= nil then
57 return false
58 end
59 return true
60end
61
62function then_execute_insertion(alarm, data, operations)
63 local olderMaintenance = getOlderMaintenanceUntil(alarm)
64 local now = Timestamp.now()
65 local newUntil = Timestamp.from(olderMaintenance)
66
67 local newAlarm = operations:forward()
68 newAlarm.additionalData = alarm.additionalData
69 if newUntil:isNewerThan(now) then
70 newAlarm.additionalData[MAINTENANCE_FIELD] = MAINTENANCE_MODE
71 else
72 newAlarm.additionalData[MAINTENANCE_FIELD] = NORMAL_MODE
73 end
74 newAlarm.additionalData[MAINTENANCE_UNTIL_FIELD] = olderMaintenance
75end