From 75bd14267e7a5507e0d58b9945dbb24e5d831af8 Mon Sep 17 00:00:00 2001 From: Christian Hesse Date: Fri, 20 Jan 2023 14:24:20 +0100 Subject: check-health: monitor CPU load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ---- ✂️ ---- 🧮️📈️ Health warning: CPU load The average CPU load on MikroTik is at 76%! ---- ✂️ ---- 🧮️📉️ Health recovery: CPU load The average CPU load on MikroTik decreased to 64%. ---- ✂️ ---- --- check-health | 16 ++++++++++++++++ .../notification-01-cpu-load-high.avif | Bin 0 -> 6066 bytes doc/check-health.d/notification-01-voltage.avif | Bin 4053 -> 0 bytes doc/check-health.d/notification-02-cpu-load-ok.avif | Bin 0 -> 6378 bytes .../notification-02-temperature-high.avif | Bin 3615 -> 0 bytes .../notification-03-temperature-ok.avif | Bin 3763 -> 0 bytes doc/check-health.d/notification-03-voltage.avif | Bin 0 -> 4053 bytes doc/check-health.d/notification-04-psu-fail.avif | Bin 3544 -> 0 bytes .../notification-04-temperature-high.avif | Bin 0 -> 3615 bytes doc/check-health.d/notification-05-psu-ok.avif | Bin 3561 -> 0 bytes .../notification-05-temperature-ok.avif | Bin 0 -> 3763 bytes doc/check-health.d/notification-06-psu-fail.avif | Bin 0 -> 3544 bytes doc/check-health.d/notification-07-psu-ok.avif | Bin 0 -> 3561 bytes doc/check-health.md | 20 +++++++++++++------- global-functions | 1 + 15 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 doc/check-health.d/notification-01-cpu-load-high.avif delete mode 100644 doc/check-health.d/notification-01-voltage.avif create mode 100644 doc/check-health.d/notification-02-cpu-load-ok.avif delete mode 100644 doc/check-health.d/notification-02-temperature-high.avif delete mode 100644 doc/check-health.d/notification-03-temperature-ok.avif create mode 100644 doc/check-health.d/notification-03-voltage.avif delete mode 100644 doc/check-health.d/notification-04-psu-fail.avif create mode 100644 doc/check-health.d/notification-04-temperature-high.avif delete mode 100644 doc/check-health.d/notification-05-psu-ok.avif create mode 100644 doc/check-health.d/notification-05-temperature-ok.avif create mode 100644 doc/check-health.d/notification-06-psu-fail.avif create mode 100644 doc/check-health.d/notification-07-psu-ok.avif diff --git a/check-health b/check-health index e754d69..3957ad1 100644 --- a/check-health +++ b/check-health @@ -10,6 +10,8 @@ :global GlobalFunctionsReady; :while ($GlobalFunctionsReady != true) do={ :delay 500ms; } +:global CheckHealthCPULoad; +:global CheckHealthCPULoadNotified; :global CheckHealthLast; :global CheckHealthTemperature; :global CheckHealthTemperatureDeviation; @@ -43,6 +45,20 @@ $ScriptLock $0; +:set CheckHealthCPULoad (($CheckHealthCPULoad * 4 + [ /system/resource/get cpu-load ] * 10) / 5); +:if ($CheckHealthCPULoad > 750 && $CheckHealthCPULoadNotified != true) do={ + $SendNotification2 ({ origin=$0; \ + subject=([ $SymbolForNotification "abacus,chart-increasing" ] . "Health warning: CPU load"); \ + message=("The average CPU load on " . $Identity . " is at " . ($CheckHealthCPULoad / 10) . "%!") }); + :set CheckHealthCPULoadNotified true; +} +:if ($CheckHealthCPULoad < 650 && $CheckHealthCPULoadNotified = true) do={ + $SendNotification2 ({ origin=$0; \ + subject=([ $SymbolForNotification "abacus,chart-decreasing" ] . "Health recovery: CPU load"); \ + message=("The average CPU load on " . $Identity . " decreased to " . ($CheckHealthCPULoad / 10) . "%.") }); + :set CheckHealthCPULoadNotified false; +} + :foreach Voltage in=[ /system/health/find where type="V" ] do={ :local Name [ /system/health/get $Voltage name ]; :local Value [ /system/health/get $Voltage value ]; diff --git a/doc/check-health.d/notification-01-cpu-load-high.avif b/doc/check-health.d/notification-01-cpu-load-high.avif new file mode 100644 index 0000000..3c1a468 Binary files /dev/null and b/doc/check-health.d/notification-01-cpu-load-high.avif differ diff --git a/doc/check-health.d/notification-01-voltage.avif b/doc/check-health.d/notification-01-voltage.avif deleted file mode 100644 index f4d6005..0000000 Binary files a/doc/check-health.d/notification-01-voltage.avif and /dev/null differ diff --git a/doc/check-health.d/notification-02-cpu-load-ok.avif b/doc/check-health.d/notification-02-cpu-load-ok.avif new file mode 100644 index 0000000..4f12b70 Binary files /dev/null and b/doc/check-health.d/notification-02-cpu-load-ok.avif differ diff --git a/doc/check-health.d/notification-02-temperature-high.avif b/doc/check-health.d/notification-02-temperature-high.avif deleted file mode 100644 index 1a93610..0000000 Binary files a/doc/check-health.d/notification-02-temperature-high.avif and /dev/null differ diff --git a/doc/check-health.d/notification-03-temperature-ok.avif b/doc/check-health.d/notification-03-temperature-ok.avif deleted file mode 100644 index 3bb9c68..0000000 Binary files a/doc/check-health.d/notification-03-temperature-ok.avif and /dev/null differ diff --git a/doc/check-health.d/notification-03-voltage.avif b/doc/check-health.d/notification-03-voltage.avif new file mode 100644 index 0000000..f4d6005 Binary files /dev/null and b/doc/check-health.d/notification-03-voltage.avif differ diff --git a/doc/check-health.d/notification-04-psu-fail.avif b/doc/check-health.d/notification-04-psu-fail.avif deleted file mode 100644 index a4e52a9..0000000 Binary files a/doc/check-health.d/notification-04-psu-fail.avif and /dev/null differ diff --git a/doc/check-health.d/notification-04-temperature-high.avif b/doc/check-health.d/notification-04-temperature-high.avif new file mode 100644 index 0000000..1a93610 Binary files /dev/null and b/doc/check-health.d/notification-04-temperature-high.avif differ diff --git a/doc/check-health.d/notification-05-psu-ok.avif b/doc/check-health.d/notification-05-psu-ok.avif deleted file mode 100644 index bcc5a39..0000000 Binary files a/doc/check-health.d/notification-05-psu-ok.avif and /dev/null differ diff --git a/doc/check-health.d/notification-05-temperature-ok.avif b/doc/check-health.d/notification-05-temperature-ok.avif new file mode 100644 index 0000000..3bb9c68 Binary files /dev/null and b/doc/check-health.d/notification-05-temperature-ok.avif differ diff --git a/doc/check-health.d/notification-06-psu-fail.avif b/doc/check-health.d/notification-06-psu-fail.avif new file mode 100644 index 0000000..a4e52a9 Binary files /dev/null and b/doc/check-health.d/notification-06-psu-fail.avif differ diff --git a/doc/check-health.d/notification-07-psu-ok.avif b/doc/check-health.d/notification-07-psu-ok.avif new file mode 100644 index 0000000..bcc5a39 Binary files /dev/null and b/doc/check-health.d/notification-07-psu-ok.avif differ diff --git a/doc/check-health.md b/doc/check-health.md index 07151ff..b498998 100644 --- a/doc/check-health.md +++ b/doc/check-health.md @@ -12,32 +12,38 @@ Description This script is run from scheduler periodically, sending notification on health related events: +* high CPU load * voltage jumps up or down more than configured threshold or drops below limit * power supply failed or recovered * temperature is above or below threshold Note that bad initial state will not trigger an event. -Only sensors available in hardware can be checked. See what your -hardware supports: +Monitoring CPU load works on all devices. Other than that only sensors +available in hardware can be checked. See what your hardware supports: /system/health/print; ### Sample notifications +#### CPU load + +![check-health notification cpu load high](check-health.d/notification-01-cpu-load-high.avif) +![check-health notification cpu load ok](check-health.d/notification-02-cpu-load-ok.avif) + #### Voltage -![check-health notification voltage](check-health.d/notification-01-voltage.avif) +![check-health notification voltage](check-health.d/notification-03-voltage.avif) #### Temperature -![check-health notification](check-health.d/notification-02-temperature-high.avif) -![check-health notification](check-health.d/notification-03-temperature-ok.avif) +![check-health notification temperature high](check-health.d/notification-04-temperature-high.avif) +![check-health notification temperature ok](check-health.d/notification-05-temperature-ok.avif) #### PSU state -![check-health notification](check-health.d/notification-04-psu-fail.avif) -![check-health notification](check-health.d/notification-05-psu-ok.avif) +![check-health notification psu fail](check-health.d/notification-06-psu-fail.avif) +![check-health notification psu ok](check-health.d/notification-07-psu-ok.avif) Requirements and installation ----------------------------- diff --git a/global-functions b/global-functions index bfb7b01..0e94dea 100644 --- a/global-functions +++ b/global-functions @@ -1075,6 +1075,7 @@ # return UTF-8 symbol for unicode name :set SymbolByUnicodeName do={ :local Symbols { + "abacus"="\F0\9F\A7\AE"; "alarm-clock"="\E2\8F\B0"; "calendar"="\F0\9F\93\85"; "chart-decreasing"="\F0\9F\93\89"; -- cgit v1.2.3-54-g00ecf