Monitoring
Contents
Monitoring#
Command assert#
I use this script to check that the result of shell commands correspond to some expected output. You can execute any arbirtary shell command.
If the resulting output is an unexpected one, a notification is sent.
The script also creates an RSS feed to complement the standard notifications. The RSS feed file should be accessible by an HTTP server such as Apache.

A Gotify notification showing a Gitea server error#
Basic setup#
install the dependencies
apt-get install python3-yaml python3-requests feedgenerator
install fpyutils. See reference
create a new user
useradd --system -s /bin/bash -U command-assert passwd command-assert usermod -aG jobs command-assert
create the jobs directories. See reference
mkdir -p /home/jobs/{scripts,services}/by-user/command-assert
create the
script
/home/jobs/scripts/by-user/command-assert/command_assert.py#1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# command_assert.py 5# 6# Copyright (C) 2020-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com) 7# 8# This program is free software: you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation, either version 3 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program. If not, see <http://www.gnu.org/licenses/>. 20r"""command_assert.py.""" 21 22import datetime 23import pathlib 24import re 25import shlex 26import subprocess 27import sys 28import uuid 29 30import feedgenerator 31import fpyutils 32import yaml 33 34 35class InvalidCache(Exception): 36 pass 37 38 39class InvalidConfiguration(Exception): 40 pass 41 42 43def send_notification(message: str, notify: dict): 44 if notify['gotify']['enabled']: 45 m = notify['gotify']['message'] + '\n' + message 46 fpyutils.notify.send_gotify_message(notify['gotify']['url'], 47 notify['gotify']['token'], m, 48 notify['gotify']['title'], 49 notify['gotify']['priority']) 50 if notify['email']['enabled']: 51 fpyutils.notify.send_email( 52 message, notify['email']['smtp_server'], notify['email']['port'], 53 notify['email']['sender'], notify['email']['user'], 54 notify['email']['password'], notify['email']['receiver'], 55 notify['email']['subject']) 56 57 58def run_command( 59 command: str, 60 file_descriptor: str, 61 process_timeout_interval: int = 60, 62 process_in_timeout_retval: int = -131072, 63 process_in_timeout_output: str = '<--##--##-->', 64) -> tuple: 65 r"""Run the command and capture the selected output and return value.""" 66 if file_descriptor not in ['stderr', 'stdout', 'both']: 67 raise ValueError 68 69 command = shlex.split(command) 70 try: 71 # No exception is raised unless the process goes in timeout. 72 result = subprocess.run(command, 73 capture_output=True, 74 timeout=process_timeout_interval) 75 if file_descriptor == 'stdout': 76 output = result.stdout 77 elif file_descriptor == 'stderr': 78 output = result.stderr 79 elif file_descriptor == 'both': 80 output = result.stdout + result.stderr 81 output = output.decode('UTF-8') 82 retval = result.returncode 83 except subprocess.TimeoutExpired: 84 output = process_in_timeout_output 85 retval = process_in_timeout_retval 86 87 return output, retval 88 89 90def assert_output(output: str, 91 expected_output: str, 92 retval: int, 93 expected_retval: int, 94 strict_matching=False) -> bool: 95 r"""Check that the output and the return value correspond to expected values.""" 96 # Escape special regex characters. 97 expected_output = re.escape(expected_output) 98 99 if strict_matching: 100 assertion_passes = re.match( 101 expected_output, output) is not None and retval == expected_retval 102 else: 103 # Similar to grep. 104 assertion_passes = re.search( 105 expected_output, output) is not None and retval == expected_retval 106 107 return assertion_passes 108 109 110######## 111# Feed # 112######## 113def add_feed_element(feed, id: int, title: str, content: str, 114 date: datetime.datetime, description: str, 115 author_email: str, author_name: str, link: str): 116 feed.add_item( 117 unique_id=str(id), 118 title=title, 119 link=link, 120 description=description, 121 author_email=author_email, 122 author_name=author_name, 123 pubdate=date, 124 updatedate=date, 125 content=content, 126 ) 127 128 129######### 130# Files # 131######### 132def read_yaml_file(file: str) -> dict: 133 data = dict() 134 if pathlib.Path(file).is_file(): 135 data = yaml.load(open(file, 'r'), Loader=yaml.SafeLoader) 136 137 return data 138 139 140def read_cache_file(file: str) -> dict: 141 cache = read_yaml_file(file) 142 if not check_cache_structure(cache): 143 raise InvalidCache 144 145 return cache 146 147 148def write_cache(cache: dict, cache_file: str): 149 with open(cache_file, 'w') as f: 150 f.write(yaml.dump(cache)) 151 152 153################################## 154# Check configuration structure # 155################################## 156def check_configuration_structure(configuration: dict) -> bool: 157 ok = True 158 if ('message_status' in configuration 159 and 'process_in_timeout' in configuration 160 and 'feed' in configuration and 'commands' in configuration): 161 ok = True 162 else: 163 ok = False 164 165 if (ok and 'ok' in configuration['message_status'] 166 and 'error' in configuration['message_status'] 167 and 'retval' in configuration['process_in_timeout'] 168 and 'output' in configuration['process_in_timeout'] 169 and 'enabled' in configuration['feed'] 170 and 'feed' in configuration['feed'] 171 and 'cache' in configuration['feed'] 172 and 'total_last_feeds_to_keep' in configuration['feed'] 173 and 'title' in configuration['feed'] 174 and 'link' in configuration['feed'] 175 and 'author_name' in configuration['feed'] 176 and 'author_email' in configuration['feed'] 177 and 'description' in configuration['feed'] 178 and isinstance(configuration['message_status']['ok'], str) 179 and isinstance(configuration['message_status']['error'], str) 180 and isinstance(configuration['process_in_timeout']['retval'], int) 181 and isinstance(configuration['process_in_timeout']['output'], str) 182 and isinstance(configuration['feed']['enabled'], bool) 183 and isinstance(configuration['feed']['feed'], str) 184 and isinstance(configuration['feed']['cache'], str) and isinstance( 185 configuration['feed']['total_last_feeds_to_keep'], int) 186 and isinstance(configuration['feed']['title'], str) 187 and isinstance(configuration['feed']['link'], str) 188 and isinstance(configuration['feed']['author_name'], str) 189 and isinstance(configuration['feed']['author_email'], str) 190 and isinstance(configuration['feed']['description'], str)): 191 ok = ok & True 192 else: 193 ok = ok & False 194 195 if isinstance(configuration['commands'], dict): 196 ok = ok & True 197 else: 198 ok = ok & False 199 200 commands_keys = list(configuration['commands'].keys()) 201 i = 0 202 while ok and i < len(commands_keys): 203 cmd = configuration['commands'][commands_keys[i]] 204 if ('command' in cmd and 'file_descriptor' in cmd 205 and 'strict_matching' in cmd and 'expected_output' in cmd 206 and 'expected_retval' in cmd and 'timeout_interval' in cmd 207 and 'log_if_ok' in cmd and 'feed' in cmd 208 and isinstance(cmd['command'], str) 209 and isinstance(cmd['file_descriptor'], str) 210 and isinstance(cmd['strict_matching'], bool) 211 and isinstance(cmd['expected_output'], str) 212 and isinstance(cmd['expected_retval'], int) 213 and isinstance(cmd['timeout_interval'], int) 214 and isinstance(cmd['log_if_ok'], bool) 215 and isinstance(cmd['feed'], dict)): 216 ok = ok & True 217 feed = cmd['feed'] 218 else: 219 ok = ok & False 220 if (ok and 'enabled' in feed and 'title' in feed and 'content' in feed 221 and 'description' in feed 222 and 'no_repeat_timeout_seconds' in feed 223 and isinstance(feed['enabled'], bool) 224 and isinstance(feed['title'], str) 225 and isinstance(feed['content'], str) 226 and isinstance(feed['description'], str) 227 and isinstance(feed['no_repeat_timeout_seconds'], int)): 228 ok = ok & True 229 else: 230 ok = ok & False 231 232 i += 1 233 234 return ok 235 236 237######################### 238# Check cache structure # 239######################### 240def check_cache_structure(cache: dict) -> bool: 241 i = 0 242 ok = True 243 elements = list(cache.keys()) 244 245 if len(elements) > 0: 246 min = elements[0] 247 248 while ok and i < len(elements): 249 if not isinstance(elements[i], int): 250 ok = ok & False 251 if ok and elements[i] > 0: 252 if elements[i] < min: 253 ok = ok & False 254 else: 255 min = elements[i] 256 i += 1 257 258 i = 0 259 while ok and i < len(cache): 260 if (ok and 'command_id' in cache[elements[i]] 261 and 'content' in cache[elements[i]] 262 and 'description' in cache[elements[i]] 263 and 'email' in cache[elements[i]] 264 and 'link' in cache[elements[i]] 265 and 'name' in cache[elements[i]] 266 and 'pub_date' in cache[elements[i]] 267 and 'title' in cache[elements[i]] 268 and isinstance(cache[elements[i]]['command_id'], str) 269 and isinstance(cache[elements[i]]['content'], str) 270 and isinstance(cache[elements[i]]['description'], str) 271 and isinstance(cache[elements[i]]['email'], str) 272 and isinstance(cache[elements[i]]['link'], str) 273 and isinstance(cache[elements[i]]['name'], str) and isinstance( 274 cache[elements[i]]['pub_date'], datetime.datetime) 275 and isinstance(cache[elements[i]]['title'], str)): 276 ok = ok & True 277 else: 278 ok = ok & False 279 280 i += 1 281 282 return ok 283 284 285if __name__ == '__main__': 286 287 def main(): 288 r"""Run the pipeline.""" 289 # Load the configuration. 290 configuration_file = shlex.quote(sys.argv[1]) 291 config = yaml.load(open(configuration_file, 'r'), 292 Loader=yaml.SafeLoader) 293 if not check_configuration_structure(config): 294 raise InvalidConfiguration 295 296 commands = config['commands'] 297 # Create a new feed. 298 feed = feedgenerator.Atom1Feed( 299 title=config['feed']['title'], 300 link=config['feed']['link'], 301 author_name=config['feed']['author_name'], 302 author_email=config['feed']['author_email'], 303 description=config['feed']['description'], 304 ) 305 now = datetime.datetime.now(datetime.timezone.utc) 306 307 # Load feed cache. 308 cache = read_cache_file(config['feed']['cache']) 309 if cache is None: 310 cache = dict() 311 312 # First and last key will be used as offsets. 313 if len(cache) > 0: 314 last_key = list(cache.keys())[-1] 315 first_key = list(cache.keys())[0] 316 else: 317 last_key = 0 318 first_key = 1 319 320 # Keep only the last existing n elements. 321 # Elements added to the running session will be purged on 322 # the next run. 323 old_cache_len = len(cache) 324 cache = dict( 325 list(cache.items()) 326 [-config['feed']['total_last_feeds_to_keep']:len(cache)]) 327 328 # Update the first key be removing the first elements. 329 first_key += old_cache_len - config['feed']['total_last_feeds_to_keep'] 330 # Set a default value if there are not enough elements. 331 if first_key < 0: 332 first_key = 1 333 334 # i is the unique id of the feed, excluding the offset. 335 i = 0 336 for c in cache: 337 # Replay existing cache. 338 add_feed_element( 339 feed, 340 first_key + i, 341 cache[c]['title'], 342 cache[c]['content'], 343 cache[c]['pub_date'], 344 cache[c]['description'], 345 cache[c]['email'], 346 cache[c]['name'], 347 cache[c]['link'], 348 ) 349 i += 1 350 351 # Counter for the cache elements. 352 k = 1 353 for command in commands: 354 output, retval = run_command( 355 commands[command]['command'], 356 commands[command]['file_descriptor'], 357 commands[command]['timeout_interval'], 358 config['process_in_timeout']['retval'], 359 config['process_in_timeout']['output'], 360 ) 361 assertion_passes = assert_output( 362 output, commands[command]['expected_output'], retval, 363 commands[command]['expected_retval'], 364 commands[command]['strict_matching']) 365 if assertion_passes: 366 result = config['message_status']['ok'] 367 else: 368 result = config['message_status']['error'] 369 370 # Log results. 371 if not assertion_passes or commands[command]['log_if_ok']: 372 373 message = command + ' returned: ' + result 374 send_notification(message, config['notify']) 375 376 # Create new feed. 377 if commands[command]['feed']['enabled']: 378 command_id = str(uuid.uuid3(uuid.NAMESPACE_DNS, command)) 379 found = False 380 idx = None 381 j = len(cache) - 1 382 cache_keys = list(cache.keys()) 383 # Get the most recent item. Filter by uuid. 384 # See 385 # https://docs.python.org/3.8/library/stdtypes.html#dict.values 386 # about dict order iteration. 387 while not found and j >= 0: 388 if cache[cache_keys[j]]['command_id'] == command_id: 389 found = True 390 idx = cache_keys[j] 391 j -= 1 392 393 timeout = commands[command]['feed'][ 394 'no_repeat_timeout_seconds'] 395 if (not found or 396 (found and 397 (now - cache[idx]['pub_date']).seconds > timeout)): 398 add_feed_element( 399 feed, 400 first_key + i, 401 commands[command]['feed']['title'], 402 commands[command]['feed']['content'], 403 now, 404 config['feed']['description'], 405 config['feed']['author_email'], 406 config['feed']['author_name'], 407 str(), 408 ) 409 410 # Always append. 411 # last_key+k always > last_key 412 cache[last_key + k] = { 413 'title': commands[command]['feed']['title'], 414 'content': commands[command]['feed']['content'], 415 'pub_date': now, 416 'description': config['feed']['description'], 417 'email': config['feed']['author_email'], 418 'name': config['feed']['author_name'], 419 'link': str(), 420 'command_id': command_id, 421 } 422 423 k += 1 424 i += 1 425 426 # if k > 1 means that new elements were added in the last run. 427 if ((k > 1 or not pathlib.Path(config['feed']['feed']).is_file()) 428 and config['feed']['enabled']): 429 write_cache(cache, config['feed']['cache']) 430 with open(config['feed']['feed'], 'w') as fp: 431 feed.write(fp, 'utf-8') 432 433 main()
create a
configuration file
/home/jobs/scripts/by-user/command-assert/command_assert.mypurpose.yaml#1# 2# command_assert.mypurpose.yaml 3# 4# Copyright (C) 2020-2021 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com) 5# 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <http://www.gnu.org/licenses/>. 18 19# The string that is used for the notifications 20message status: 21 ok: 'OK' 22 error: 'ERROR' 23 24# Default values if a process goes in timeout. 25process in timeout: 26 retval: -131072 27 output: '<--##--##-->' 28 29# XML feed header. 30feed: 31 enabled: true 32 33 # Path of the XML feed file. 34 # This file is most useful if served with a web server. 35 feed: '/home/command-assert/out/command_assert.mypurpose.xml' 36 37 # Path of the cache file. 38 cache: '/home/jobs/scripts/by-user/command-assert/.command_assert.mypurpose.yml' 39 40 total_last_feeds_to_keep: 128 41 42 # Feed metadata. 43 title: 'Outages of mypurpose' 44 link: 'https://outage.my.domain' 45 author_name: 'bot' 46 author_email: 'myusername@gmail.com' 47 description: 'Updates on outages' 48 49commands: 50 webserver SSL: 51 # The command as you would execute in a shell. 52 command: 'curl --head https://my-server.com' 53 54 # {stdout,stderr,both} 55 file_descriptor: 'stdout' 56 57 # If set to true match for the exact expected_output. 58 strict_matching: false 59 60 # A pattern that needs to be matched in the output. 61 # Regex are NOT supported. 62 expected_output: 'Server: Apache' 63 64 # The return value is usually 0 for successful processes. 65 expected_retval: 0 66 67 # Force kill the process after this time interval in seconds. 68 timeout_interval: 5 69 70 # if set to true, send notifications even if the process completes correctly. 71 log_if_ok: false 72 73 feed: 74 enabled: true 75 title: 'outage mypurpose' 76 77 # use HTML. 78 content: '<em>Sorry</em>, the webserver was down' 79 80 description: 'outage mypurpose' 81 82 # If an error already exists in cache for less than no_repeat_timeout_seconds, 83 # then do not repeat the feed. 84 no_repeat_timeout_seconds: 3600 85 86 SSH server: 87 command: 'ssh -p nonexistent@my-server.com' 88 file_descriptor: 'stderr' 89 strict_matching: false 90 expected_output: 'NOTICE' 91 expected_retval: 255 92 timeout_interval: 5 93 log_if_ok: false 94 feed: 95 enabled: true 96 title: 'outage mypurpose' 97 content: '<em>Sorry</em>, the SSH server was down' 98 description: 'outage mypurpose' 99 no_repeat_timeout_seconds: 3600 100 101notify: 102 email: 103 enabled: true 104 smtp_server: 'smtp.gmail.com' 105 port: 465 106 sender: 'myusername@gmail.com' 107 user: 'myusername' 108 password: 'my awesome password' 109 receiver: 'myusername@gmail.com' 110 subject: 'command assert' 111 gotify: 112 enabled: true 113 url: '<gotify url>' 114 token: '<app token>' 115 title: 'command assert' 116 message: 'command assert' 117 priority: 5
create a
Systemd service unit file
/home/jobs/services/by-user/command-assert/command-assert.mypurpose.service#1[Unit] 2Description=Command assert mypurpose 3Requires=network-online.target 4After=network-online.target 5 6[Service] 7Type=simple 8ExecStart=/home/jobs/scripts/by-user/command-assert/command_assert.py /home/jobs/scripts/by-user/command-assert/command_assert.mypurpose.yaml 9User=command-assert 10Group=command-assert 11 12[Install] 13WantedBy=multi-user.target
create a
Systemd timer unit file
/home/jobs/services/by-user/command-assert/command-assert.mypurpose.timer#1[Unit] 2Description=Once every 30 minutes command assert mypurpose 3 4[Timer] 5OnCalendar=*:0/30 6Persistent=true 7 8[Install] 9WantedBy=timers.target
fix owners and permissions
chown -R command-assert:command-assert /home/jobs/{scripts,services}/by-user/command-assert chmod 700 -R /home/jobs/{scripts,services}/by-user/command-assert
run the deploy script