import
12
.gitignore
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/.idea/
|
||||
__pycache__/
|
||||
|
||||
# pip build
|
||||
/src/de.p1st.monitor.egg-info/
|
||||
/dist/
|
||||
/build/
|
||||
/venv/
|
||||
|
||||
# makepkg
|
||||
/packaging/python-de-p1st-monitor-git-*-any.pkg.tar.zst
|
||||
/packaging/de-p1st-monitor/
|
24
.run/main (export).run.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="main (export)" type="PythonConfigurationType" factoryName="Python">
|
||||
<module name="de-p1st-monitor" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/src/de/p1st/monitor" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/src/de/p1st/monitor/main.py" />
|
||||
<option name="PARAMETERS" value="-c $PROJECT_DIR$/cfg/yodaTux.ini --export" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
24
.run/main (help).run.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="main (help)" type="PythonConfigurationType" factoryName="Python">
|
||||
<module name="de-p1st-monitor" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/src/de/p1st/monitor" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/src/de/p1st/monitor/main.py" />
|
||||
<option name="PARAMETERS" value="--help" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
24
.run/main (log).run.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="main (log)" type="PythonConfigurationType" factoryName="Python">
|
||||
<module name="de-p1st-monitor" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/src/de/p1st/monitor" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/src/de/p1st/monitor/main.py" />
|
||||
<option name="PARAMETERS" value="-c $PROJECT_DIR$/cfg/yodaTux.ini" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
</component>
|
32
LICENSE
Normal file
@ -0,0 +1,32 @@
|
||||
The Clear BSD License
|
||||
|
||||
Copyright (c) 2023 Daniel Langbein
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted (subject to the limitations in the disclaimer
|
||||
below) provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
|
||||
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
47
Makefile
Normal file
@ -0,0 +1,47 @@
|
||||
PKGNAME := de-p1st-monitor
|
||||
|
||||
.PHONY: all
|
||||
all: install-pkgbuild
|
||||
|
||||
.PHONY: install-pkgbuild
|
||||
install-pkgbuild: cron ## Install with pacman (on Arch Linux)
|
||||
sudo pacman -S --needed base-devel
|
||||
cd packaging && makepkg -fCcsri && rm -rf $(PKGNAME)
|
||||
${MAKE} install-files
|
||||
|
||||
.PHONY: install-pip
|
||||
install-pip: notify cron ## Install with pip
|
||||
sudo python3 -m pip install --upgrade --force-reinstall .
|
||||
${MAKE} install-files
|
||||
|
||||
.PHONY: install-files
|
||||
install-files:
|
||||
sudo install -m0644 cron.d/$(PKGNAME) /etc/cron.d/$(PKGNAME)
|
||||
|
||||
sudo install --directory -m755 /etc/$(PKGNAME)/
|
||||
sudo install -m0644 cfg/* /etc/$(PKGNAME)/
|
||||
|
||||
.PHONY: notify ## Check if exec-notify is installed.
|
||||
notify:
|
||||
# `type` does not work e.g. on Ubuntu 18.04
|
||||
which exec-notify
|
||||
|
||||
.PHONY: cron ## Check if cron (e.g. cronie) is running.
|
||||
cron:
|
||||
# Check if cron.d exists
|
||||
stat /etc/cron.d/
|
||||
# Check if cron is running
|
||||
pgrep cron
|
||||
|
||||
|
||||
.PHONY: clean-pkgbuild
|
||||
clean-pkgbuild: clean-files
|
||||
sudo pacman -Rns python-$(PKGNAME)-git
|
||||
|
||||
.PHONY: clean-pip
|
||||
clean-pip: clean-files
|
||||
sudo python3 -m pip uninstall -y $(PKGNAME)
|
||||
|
||||
.PHONY: clean-files
|
||||
clean-files:
|
||||
sudo rm -rf /etc/cron.d/$(PKGNAME) /etc/$(PKGNAME) /var/log/$(PKGNAME).cron
|
248
README.md
Normal file
@ -0,0 +1,248 @@
|
||||
# de-p1st-monitor
|
||||
|
||||
## Research
|
||||
|
||||
See [./research](./research).
|
||||
|
||||
- HDD temp:
|
||||
- Modern hard drives will throttle their read and write speeds
|
||||
when the drive reaches a critical pre-set temperature
|
||||
(usually around 60°C)
|
||||
- 20-50°C (short-term)
|
||||
- 20-40°C (long-term usage)
|
||||
- SSD temp:
|
||||
- Most SSDs implement thermal throttling as a safety feature
|
||||
if a drive gets too hot. As the driver approaches the 70ºC limit
|
||||
that most manufacturers set, the more likely it is that the
|
||||
drive will start to slow itself down to prevent failure.
|
||||
- 30-50°C
|
||||
|
||||
## Keep it simple!
|
||||
|
||||
Lines of code including docstrings and comments:
|
||||
|
||||
```shell
|
||||
find ./src -name '*.py' | xargs wc -l
|
||||
#=> 1394 total
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
See [cfg/yodaTux.ini](cfg/yodaTux.ini) for a configuration file covering all config options.
|
||||
|
||||
## Installation
|
||||
|
||||
Install dependencies:
|
||||
|
||||
- on Arch Linux
|
||||
|
||||
```shell
|
||||
# TODO
|
||||
# Optional: 1-wire temperature sensor.
|
||||
sudo pacman -S digitemp # TODO: configure your sensor
|
||||
```
|
||||
|
||||
- on Ubuntu
|
||||
|
||||
```shell
|
||||
sudo apt-get install python3-pip
|
||||
|
||||
# Ubuntu 18.04 and below
|
||||
sudo apt-get install python3-setuptools
|
||||
sudo apt-get install python3-wheel
|
||||
|
||||
sudo apt-get install python3-psutil
|
||||
|
||||
# Ubuntu 18.04 and below: psutil < 5.6.2
|
||||
sudo apt-get install python3-dev
|
||||
sudo apt-get install build-essential
|
||||
# Ubuntu 20.04 and below: psutil < 5.6.2
|
||||
sudo python3 -m pip install psutil --upgrade
|
||||
```
|
||||
|
||||
Install:
|
||||
|
||||
- on Arch Linux
|
||||
|
||||
```shell
|
||||
make
|
||||
```
|
||||
|
||||
- on Ubuntu
|
||||
|
||||
```shell
|
||||
make install-pip
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Command line interface
|
||||
|
||||
```
|
||||
usage: de-p1st-monitor [-h] [--config CONFIG] [--export]
|
||||
|
||||
Iterates over all config sections. For each section the current sensor data is
|
||||
read and logged to a .csv file.
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--config CONFIG, -c CONFIG
|
||||
Path to .ini configuration file.
|
||||
--export, -e If `True`, export .csv files and print their paths to
|
||||
stdout. No sensor data is logged during this.
|
||||
```
|
||||
|
||||
### Periodic logging
|
||||
|
||||
Add a cron entry executing this e.g. every 3 Minutes:
|
||||
|
||||
```shell
|
||||
de-p1st-monitor
|
||||
```
|
||||
|
||||
## Example log files
|
||||
|
||||
```shell
|
||||
ssh nas 'tail -n 1 /var/log/de-p1st-monitor/*'
|
||||
```
|
||||
```
|
||||
==> /var/log/de-p1st-monitor/cpu_15min.csv <==
|
||||
20230315T103001,0.10400390625
|
||||
|
||||
==> /var/log/de-p1st-monitor/cpu_1min.csv <==
|
||||
20230315T103001,0.03076171875
|
||||
|
||||
==> /var/log/de-p1st-monitor/cpu_5min.csv <==
|
||||
20230315T103001,0.0301513671875
|
||||
|
||||
==> /var/log/de-p1st-monitor/drive_20d86155-30d4-404c-95e8-c701cfb16ca5.csv <==
|
||||
20230315T103001,24
|
||||
|
||||
==> /var/log/de-p1st-monitor/drive_4651c3f1-e4b8-45aa-a823-df762530a307.csv <==
|
||||
20230315T103001,21
|
||||
|
||||
==> /var/log/de-p1st-monitor/drive_68c349e8-5118-4773-9fd5-5dbad9acee4e.csv <==
|
||||
20230315T103001,29
|
||||
|
||||
==> /var/log/de-p1st-monitor/drive_b8ef1da9-d76d-44b4-86d4-71c82c888b6f.csv <==
|
||||
20230315T103001,28
|
||||
|
||||
==> /var/log/de-p1st-monitor/filesystem_3CBA-B4EA.csv <==
|
||||
20230315T103001,0.228
|
||||
|
||||
==> /var/log/de-p1st-monitor/filesystem_a454430b-dee3-4b6b-8325-f7bdb9435ed1.csv <==
|
||||
20230314T231501,0.762
|
||||
|
||||
==> /var/log/de-p1st-monitor/filesystem_b8ef1da9-d76d-44b4-86d4-71c82c888b6f.csv <==
|
||||
20230315T103001,0.034
|
||||
|
||||
==> /var/log/de-p1st-monitor/filesystem_c385a436-0288-486f-a2b9-c64c2db667e7.csv <==
|
||||
20230315T103001,0.374
|
||||
|
||||
==> /var/log/de-p1st-monitor/memory.csv <==
|
||||
20230315T103001,4127,15329
|
||||
|
||||
==> /var/log/de-p1st-monitor/net_enp0s31f6.csv <==
|
||||
20230315T103001,69366974632,58725303985,20230304T173014
|
||||
|
||||
==> /var/log/de-p1st-monitor/swap.csv <==
|
||||
20230315T103001,25,4095
|
||||
|
||||
==> /var/log/de-p1st-monitor/temp_coretemp_Core 0.csv <==
|
||||
20230315T103001,26.0
|
||||
|
||||
==> /var/log/de-p1st-monitor/temp_coretemp_Core 1.csv <==
|
||||
20230315T103001,34.0
|
||||
|
||||
==> /var/log/de-p1st-monitor/temp_coretemp_Package id 0.csv <==
|
||||
20230315T103001,35.0
|
||||
```
|
||||
|
||||
## Plots
|
||||
|
||||
### Creating plots with graph-cli
|
||||
|
||||
1) Export and fetch data
|
||||
|
||||
```shell
|
||||
ssh_target=rootnas
|
||||
dst=~/de-p1st-monitor-"${ssh_target}"
|
||||
files="${dst}".files
|
||||
|
||||
# Export .csv files on SSH target and save list of exported files to $files.
|
||||
ssh "${ssh_target}" 'de-p1st-monitor --export' > "${files}"
|
||||
|
||||
rm -rf "${dst}"
|
||||
mkdir -p "${dst}"
|
||||
|
||||
rsync --checksum --archive --progress --human-readable --delete \
|
||||
--files-from="${files}" "${ssh_target}":/ "${dst}"
|
||||
mv "${dst}"/var/log/de-p1st-monitor/* "${dst}"
|
||||
rm -r "${dst}"/var "${files}"
|
||||
|
||||
cd "${dst}"
|
||||
```
|
||||
|
||||
2) Install (python) `graph-cli`
|
||||
|
||||
```shell
|
||||
python -m venv ~/de-p1st-monitor.venv
|
||||
source ~/de-p1st-monitor.venv/bin/activate
|
||||
pip install graph-cli
|
||||
```
|
||||
|
||||
3) Create plots
|
||||
|
||||
Create one plot for each .csv file:
|
||||
|
||||
```shell
|
||||
sample_duration=4H
|
||||
|
||||
for file in *.csv; do
|
||||
graph "${file}" -x 1 --resample "${sample_duration}" --figsize 1600x1000 -o "${file}".resample-"${sample_duration}"-mean.png || {
|
||||
echo "Error while processing ${file}"
|
||||
}
|
||||
done
|
||||
|
||||
for file in {swap,memory}.csv {temp_,drive_,net_,cpu_,filesystem_}*.csv; do
|
||||
graph "${file}" -x 1 --resample "${sample_duration}" --resample-action max --figsize 1600x1000 -o "${file}".resample-"${sample_duration}"-max.png || {
|
||||
echo "Error while processing ${file}"
|
||||
}
|
||||
done
|
||||
```
|
||||
|
||||
4) Optionally, create more plots
|
||||
|
||||
Some self-explaining examples:
|
||||
|
||||
```shell
|
||||
# x and y axis by column name
|
||||
graph cpu_1min.csv -x 'datetime#Date' -y 'float#LoadAverage1min' --resample 1H -o cpu_1min_resample-1H.png
|
||||
# x and y axis by column number
|
||||
graph cpu_1min.csv -x 1 -y 2 --resample 1H -o cpu_1min_resample-1H.png
|
||||
# specify x axis; use all other axes for y
|
||||
graph cpu_1min.csv -x 1 --resample 1H -o cpu_1min_resample-1H.png
|
||||
# increased plot size
|
||||
graph cpu_1min.csv -x 1 --resample 1H --figsize 1600x1000 -o cpu_1min_resample-1H.png
|
||||
```
|
||||
|
||||
```shel
|
||||
# resample using sum
|
||||
graph net_enp0s31f6.csv.exported.csv -x 1 --resample 1H --resample-action sum --figsize 1600x1000 -o net_enp0s31f6.csv.exported_resample-1H-sum.png
|
||||
```
|
||||
|
||||
```shel
|
||||
# resample using max
|
||||
graph cpu_1min.csv -x 1 --resample 1H --resample-action max --figsize 1600x1000 -o cpu_1min_resample-1H-max.png
|
||||
```
|
||||
|
||||
|
||||
### Example plots
|
||||
|
||||
![img](images/cpu_1min.csv.resample-1H.png)
|
||||
![img](images/drive_68c349e8-5118-4773-9fd5-5dbad9acee4e.csv.resample-1H.png)
|
||||
![img](images/filesystem_c385a436-0288-486f-a2b9-c64c2db667e7.csv.resample-1H.png)
|
||||
![img](images/memory.csv.resample-1H.png)
|
||||
![img](images/net_enp0s31f6.csv.exported.csv.resample-1H.png)
|
||||
![img](images/swap.csv.resample-1H.png)
|
||||
![img](images/temp_coretemp_Package%20id%200.csv.resample-1H.png)
|
264
TODO.md
Normal file
@ -0,0 +1,264 @@
|
||||
# TODOs
|
||||
|
||||
## Public IP address
|
||||
|
||||
Logg the public IP address. Reuse `netcup-dns` python functions.
|
||||
|
||||
## Rewrite
|
||||
|
||||
~~* easier configuration
|
||||
~~* easier read/write from/to csv~~
|
||||
~~* use classes & objects~~~~
|
||||
|
||||
~~* create plots?~~
|
||||
|
||||
* Don't send emit warning again, if during previous log a lower warning was emitted
|
||||
* Example:
|
||||
* log1: 30°C OK
|
||||
* log2: 40°C Warning sent
|
||||
* log3: 35°C Still above limit, but don't send warning again as value decreased
|
||||
* log4: 37°C Send another warning: The value increased since last logging
|
||||
|
||||
## Use Grafana to visualize metrics
|
||||
|
||||
One can use Prometheus + Grafana to collect and visualize server metrics.
|
||||
|
||||
> https://geekflare.com/best-open-source-monitoring-software/
|
||||
> This list won’t be complete without including two fantastic open-source solutions – Prometheus and Grafana. Its DIY solution where you use Prometheus to scrape the metrics from server, OS, applications and use Grafana to visualize them.
|
||||
|
||||
As we do already collect logs, we should do some research on how to
|
||||
import data into Grafana.
|
||||
|
||||
### Time series
|
||||
|
||||
* https://grafana.com/docs/grafana/latest/fundamentals/timeseries/#introduction-to-time-series
|
||||
|
||||
E.g. CPU and memory usage, sensor data.
|
||||
|
||||
* https://grafana.com/docs/grafana/latest/fundamentals/timeseries/#time-series-databases
|
||||
|
||||
A time series database (TSDB) is a database explicitly designed for time series data.
|
||||
|
||||
Some supported TSDBs are:
|
||||
|
||||
* Graphite
|
||||
* InfluxDB
|
||||
* Prometheus
|
||||
|
||||
### Installation
|
||||
|
||||
* https://grafana.com/docs/grafana/latest/setup-grafana/installation/docker/#alpine-image-recommended
|
||||
* https://grafana.com/docs/grafana/latest/setup-grafana/installation/docker/#install-official-and-community-grafana-plugins
|
||||
|
||||
* https://grafana.com/grafana/plugins/marcusolsson-csv-datasource/?tab=installation
|
||||
* https://grafana.github.io/grafana-csv-datasource/
|
||||
* https://grafana.com/grafana/plugins/marcusolsson-json-datasource/?tab=installation
|
||||
* https://grafana.github.io/grafana-json-datasource/
|
||||
|
||||
```shell
|
||||
sudo docker run --rm \
|
||||
-p 3000:3000 \
|
||||
--name=grafana \
|
||||
-e "GF_INSTALL_PLUGINS=marcusolsson-json-datasource,marcusolsson-csv-datasource" \
|
||||
grafana/grafana-oss
|
||||
```
|
||||
|
||||
TODO: test csv or json data import tools
|
||||
|
||||
## Netdata - Can be exported to Grafana
|
||||
|
||||
* https://github.com/netdata/netdata/blob/master/docs/getting-started/introduction.md
|
||||
|
||||
## Monit - An existing monitoring service
|
||||
|
||||
### General notes and links
|
||||
|
||||
* Monit is a widely used service for system monitoring.
|
||||
* OPNsense uses Monit: https://docs.opnsense.org/manual/monit.html
|
||||
|
||||
* Short slideshow presentation: https://mmonit.com/monit/#slideshow
|
||||
* https://wiki.ubuntuusers.de/Monit/
|
||||
|
||||
* Excellent configuration and usage summary in the Arch Linux Wiki: https://wiki.archlinux.org/title/Monit
|
||||
|
||||
* Examples
|
||||
* https://mmonit.com/wiki/Monit/ConfigurationExamples
|
||||
* One can use the returncode or stdout of an executed shell script
|
||||
* https://mmonit.com/wiki/Monit/ConfigurationExamples#HDDHealth
|
||||
```
|
||||
check program HDD_Health with path "/usr/local/etc/monit/scripts/sdahealth.sh"
|
||||
every 120 cycles
|
||||
if content != "PASSED" then alert
|
||||
# if status > 0 then alert
|
||||
group health
|
||||
```
|
||||
* Documentation
|
||||
* Event queue - Store events (notifications) if mail server is not reachable
|
||||
* https://mmonit.com/monit/documentation/monit.html#Event-queue
|
||||
```
|
||||
set eventqueue basedir /var/monit
|
||||
```
|
||||
* https://mmonit.com/monit/documentation/monit.html#SPACE-USAGE-TEST
|
||||
```
|
||||
check filesystem rootfs with path /
|
||||
if space usage > 90% then alert
|
||||
```
|
||||
* https://mmonit.com/monit/documentation/monit.html#PROGRAM-STATUS-TEST
|
||||
```
|
||||
check program myscript with path /usr/local/bin/myscript.sh
|
||||
if status != 0 then alert
|
||||
```
|
||||
* https://mmonit.com/monit/documentation/monit.html#PROGRAM-OUTPUT-CONTENT-TEST
|
||||
* https://mmonit.com/monit/documentation/monit.html#Link-upload-and-download-bytes
|
||||
```
|
||||
check network eth0 with interface eth0
|
||||
if upload > 500 kB/s then alert
|
||||
if total downloaded > 1 GB in last 2 hours then alert
|
||||
if total downloaded > 10 GB in last day then alert
|
||||
```
|
||||
|
||||
* https://mmonit.com/monit/documentation/monit.html#MANAGE-YOUR-MONIT-INSTANCES
|
||||
|
||||
### Monitoring all your monit instances
|
||||
|
||||
* Monit itself does only monitor the current system
|
||||
* Multi-server monitoring is a paid extra service called M/Monit :/
|
||||
* But there are other open source services for this
|
||||
* https://github.com/monmon-io/monmon#why-did-you-create-monmon
|
||||
|
||||
### Setup
|
||||
|
||||
Install and start:
|
||||
|
||||
```shell
|
||||
sudo pacman -S --needed monit lm_sensors smartmontools
|
||||
sudo systemctl start monit
|
||||
sudo systemctl status monit | grep 'Active: active (running)'
|
||||
```
|
||||
|
||||
Print default configuration:
|
||||
|
||||
```shell
|
||||
sudo cat /etc/monitrc | grep -v '^#'
|
||||
#=> set daemon 30
|
||||
#=> - A cycle is 30 seconds long.
|
||||
#=> set log syslog
|
||||
#=> - We will overwrite this config value later on.
|
||||
#=> set httpd port 2812
|
||||
#=> - Only listen on localhost with username admin and pwd monit.
|
||||
```
|
||||
|
||||
Include `monit.d`:
|
||||
|
||||
```shell
|
||||
sudo mkdir -p /etc/monit.d/
|
||||
! sudo cat /etc/monitrc | grep -q '^include' && echo 'include /etc/monit.d/*' | sudo tee -a /etc/monitrc
|
||||
```
|
||||
|
||||
Log to file:
|
||||
|
||||
```shell
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/log <<< 'set log /var/log/monit.log'
|
||||
sudo systemctl restart monit
|
||||
# tail -f /var/log/monit.log
|
||||
```
|
||||
|
||||
System:
|
||||
|
||||
```shell
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/system <<< 'check system $HOST
|
||||
if filedescriptors >= 80% then alert
|
||||
if loadavg (5min) > 2 for 4 cycles then alert
|
||||
if memory usage > 75% for 4 cycles then alert
|
||||
if swap usage > 50% for 4 cycles then alert'
|
||||
sudo systemctl restart monit
|
||||
```
|
||||
|
||||
Filesystem:
|
||||
|
||||
```shell
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/fs <<< 'check filesystem rootfs with path /
|
||||
if space usage > 80% then alert'
|
||||
sudo systemctl restart monit
|
||||
```
|
||||
|
||||
SSL options:
|
||||
|
||||
* https://mmonit.com/monit/documentation/monit.html#SSL-OPTIONS
|
||||
|
||||
```shell
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/ssl <<< '# Enable certificate verification for all SSL connections
|
||||
set ssl options {
|
||||
verify: enable
|
||||
}'
|
||||
sudo systemctl restart monit
|
||||
```
|
||||
|
||||
Mailserver, alerts and eventqueue:
|
||||
|
||||
* https://mmonit.com/monit/documentation/monit.html#Setting-a-mail-server-for-alert-delivery
|
||||
* https://mmonit.com/monit/documentation/monit.html#Setting-an-error-reminder
|
||||
* https://mmonit.com/monit/documentation/monit.html#Event-queue
|
||||
* If no mail server is available, Monit can queue events in the local file-system for retry until the mail server recovers.
|
||||
* By default, the queue is disabled and if the alert handler fails, Monit will simply drop the alert message.
|
||||
|
||||
```shell
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/mail <<< 'set mailserver smtp.mail.de
|
||||
port 465
|
||||
username "langbein@mail.de"
|
||||
password "qiXF6cUgfvSVqd0pAoFTqZEHIcUKzc3n"
|
||||
using SSL
|
||||
with timeout 20 seconds
|
||||
|
||||
set mail-format {
|
||||
from: langbein@mail.de
|
||||
subject: $SERVICE - $EVENT at $DATE
|
||||
message: Monit $ACTION $SERVICE at $DATE on $HOST: $DESCRIPTION.
|
||||
}
|
||||
|
||||
set alert daniel@systemli.org with reminder on 10 cycles
|
||||
|
||||
set eventqueue basedir /var/monit'
|
||||
sudo systemctl restart monit
|
||||
sudo monit -v | grep 'Mail'
|
||||
```
|
||||
|
||||
Test alert:
|
||||
|
||||
* https://wiki.ubuntuusers.de/Monit/#E-Mail-Benachrichtigungen-testen
|
||||
* It is enough to restart monit. It will send an email that it's state has changed (stopped/started).
|
||||
* But if desired, one can also create a test for a non-existing file:
|
||||
|
||||
```shell
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/alerttest <<< 'check file alerttest with path /.nonexistent.file'
|
||||
sudo systemctl restart monit
|
||||
```
|
||||
|
||||
Example script - run a speedtest:
|
||||
|
||||
```shell
|
||||
sudo pacman -S --needed speedtest-cli
|
||||
sudo install -m700 /dev/stdin /etc/monit.d/speedtest <<< 'check program speedtest with path /usr/bin/speedtest-cli
|
||||
every 120 cycles
|
||||
if status != 0 then alert'
|
||||
sudo systemctl restart monit
|
||||
```
|
||||
|
||||
Check config syntax:
|
||||
|
||||
```shell
|
||||
sudo monit -t
|
||||
```
|
||||
|
||||
################## TODOS ##########################
|
||||
|
||||
* See Firefox bookmark folder 20230219_monit.
|
||||
* Disk health
|
||||
* BTRFS balance
|
||||
* Save disk usage and temperatures to CSV log file
|
||||
* e.g. by using `check program check-and-log-temp.sh` monit configuration
|
||||
* Or: do checks by monit and every couple minutes run `check program log-system-info.sh`
|
||||
|
||||
### Monit behind Nginx
|
||||
|
||||
TODO: Nginx reverse proxy with basic authentication.
|
79
cfg/yodaNas.ini
Normal file
@ -0,0 +1,79 @@
|
||||
[logging]
|
||||
dir = /var/log/de-p1st-monitor/
|
||||
|
||||
[filesystem.1]
|
||||
; NVME
|
||||
mountpoint = /
|
||||
warn_if_above = 0.75
|
||||
[filesystem.2]
|
||||
; NVME
|
||||
mountpoint = /boot
|
||||
warn_if_above = 0.75
|
||||
[filesystem.3]
|
||||
; 12TB1
|
||||
uuid = c385a436-0288-486f-a2b9-c64c2db667e7
|
||||
warn_if_above = 0.66
|
||||
[filesystem.4]
|
||||
; 3TB1 and 3TB2
|
||||
uuid = a454430b-dee3-4b6b-8325-f7bdb9435ed1
|
||||
warn_if_above = 0.85
|
||||
unmounted_ok = true
|
||||
|
||||
[memory]
|
||||
warn_if_above = 0.85
|
||||
[swap]
|
||||
warn_if_above = 0.85
|
||||
|
||||
[cpu1]
|
||||
warn_if_above = 3.0
|
||||
warn_threshold = 2
|
||||
warn_data_range = 2
|
||||
[cpu5]
|
||||
warn_if_above = 2.0
|
||||
warn_threshold = 2
|
||||
warn_data_range = 2
|
||||
[cpu15]
|
||||
warn_if_above = 1.0
|
||||
warn_threshold = 2
|
||||
warn_data_range = 2
|
||||
|
||||
[temp.1]
|
||||
sensor = coretemp
|
||||
label = Package id 0
|
||||
warn_if_above = 60
|
||||
[temp.2]
|
||||
sensor = coretemp
|
||||
label = Core 0
|
||||
warn_if_above = 60
|
||||
[temp.3]
|
||||
sensor = coretemp
|
||||
label = Core 1
|
||||
warn_if_above = 60
|
||||
|
||||
[network.1]
|
||||
network_interface = enp0s31f6
|
||||
|
||||
[drive.1]
|
||||
; NVME /dev/nvme0n1p3
|
||||
; TODO NVME 49 warn, 55 limit
|
||||
uuid = b8ef1da9-d76d-44b4-86d4-71c82c888b6f
|
||||
warn_if_above = 50
|
||||
[drive.2]
|
||||
; HDD 12TB1
|
||||
; TODO HDD 39 warn, 45 limit
|
||||
uuid = 68c349e8-5118-4773-9fd5-5dbad9acee4e
|
||||
warn_if_above = 40
|
||||
[drive.3]
|
||||
; HDD 3TB1
|
||||
uuid = 20d86155-30d4-404c-95e8-c701cfb16ca5
|
||||
warn_if_above = 40
|
||||
[drive.4]
|
||||
; HDD 3TB2
|
||||
uuid = 4651c3f1-e4b8-45aa-a823-df762530a307
|
||||
warn_if_above = 40
|
||||
|
||||
; TODO digitemp sensor
|
||||
;[digitemp_DS9097.1]
|
||||
;cfg = /root/.digitemprc
|
||||
;sensor_num = 0
|
||||
;name = room-temp
|
79
cfg/yodaTux.ini
Normal file
@ -0,0 +1,79 @@
|
||||
[logging]
|
||||
; The CSV logfiles are saved in this directory.
|
||||
dir = /var/log/de-p1st-monitor/
|
||||
|
||||
|
||||
[temp.1]
|
||||
; `sensor` and `label` are used to identify one temperature value.
|
||||
sensor = k10temp
|
||||
label = Tctl
|
||||
|
||||
; Warn if temperature is above this value.
|
||||
; Unit: °C
|
||||
warn_if_above = 80
|
||||
|
||||
; Send warning if critical values were reached 2 times during the last 4 logs.
|
||||
warn_threshold = 2
|
||||
warn_data_range = 4
|
||||
|
||||
[temp.2]
|
||||
sensor = amdgpu
|
||||
label = edge
|
||||
warn_if_above = 50
|
||||
warn_threshold = 2
|
||||
warn_data_range = 4
|
||||
|
||||
|
||||
[network.1]
|
||||
network_interface = wlan0
|
||||
|
||||
|
||||
[memory]
|
||||
; Warn if memory usage is above this value.
|
||||
; Range: (0.0, 1.0)
|
||||
warn_if_above = 0.1
|
||||
|
||||
[swap]
|
||||
; Warn if swap usage is above this value.
|
||||
; Range: (0.0, 1.0)
|
||||
warn_if_above = 0.5
|
||||
|
||||
|
||||
[cpu1]
|
||||
; Warn if CPU load of the last 1 minute is above this value.
|
||||
; Range: (0.0, infinite)
|
||||
; `1.0` corresponds to 100% CPU utilisation.
|
||||
; However, there can be more processes in the queue than can be processed.
|
||||
; As a result, the value can go above `1.0`.
|
||||
warn_if_above = 0.95
|
||||
[cpu5]
|
||||
; Warn if CPU load of the last 5 minutes is above this value.
|
||||
warn_if_above = 0.85
|
||||
[cpu15]
|
||||
; Warn if CPU load of the last 15 minutes is above this value.
|
||||
warn_if_above = 0.75
|
||||
|
||||
|
||||
[filesystem.1]
|
||||
; Either `uuid` or `mountpoint` must be given.
|
||||
;
|
||||
; If both are given but the UUID of the disk mounted at `mountpoint` differs from `uuid`, then an exception is raised.
|
||||
uuid = 7fb12542-bd59-4727-9beb-7cf1f79f8293
|
||||
mountpoint = /
|
||||
|
||||
; If `true` don't log or warn if the filesystem is not found.
|
||||
unmounted_ok = true
|
||||
|
||||
; Warn if disk usage is above this value.
|
||||
; Range: (0.0, 1.0)
|
||||
warn_if_above = 0.1
|
||||
|
||||
|
||||
[drive.1]
|
||||
; Either `uuid` or `device` must be given.
|
||||
;uuid =
|
||||
device = /dev/nvme0n1p3
|
||||
|
||||
; Warn if temperature is above this value.
|
||||
; Unit: °C
|
||||
warn_if_above = 25
|
11
cron.d/de-p1st-monitor
Normal file
@ -0,0 +1,11 @@
|
||||
# Run command every 3min
|
||||
# - https://crontab.guru/every-3-minutes
|
||||
# `/etc/cron.d/` requires user field
|
||||
# - https://unix.stackexchange.com/questions/458713/how-are-files-under-etc-cron-d-used#comment1019389_458715
|
||||
# Some users report that files in `/etc/cron.d/` containing `-` are not executed
|
||||
# - https://unix.stackexchange.com/questions/296347/crontab-never-running-while-in-etc-cron-d#comment640748_296351
|
||||
# PATH is restricted to `/bin:/usr/bin` but `exec-notify` resides in `/usr/local/bin/`
|
||||
# - https://serverfault.com/a/449652
|
||||
|
||||
PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin/
|
||||
*/3 * * * * root exec-notify de-p1st-monitor > /var/log/de-p1st-monitor.cron 2>&1
|
BIN
images/cpu_1min.csv.resample-1H.png
Normal file
After Width: | Height: | Size: 114 KiB |
After Width: | Height: | Size: 118 KiB |
After Width: | Height: | Size: 89 KiB |
BIN
images/memory.csv.resample-1H.png
Normal file
After Width: | Height: | Size: 107 KiB |
BIN
images/net_enp0s31f6.csv.exported.csv.resample-1H.png
Normal file
After Width: | Height: | Size: 138 KiB |
BIN
images/swap.csv.resample-1H.png
Normal file
After Width: | Height: | Size: 107 KiB |
BIN
images/temp_coretemp_Package id 0.csv.resample-1H.png
Normal file
After Width: | Height: | Size: 132 KiB |
56
packaging/PKGBUILD
Normal file
@ -0,0 +1,56 @@
|
||||
# Maintainer: Daniel Langbein < daniel [ at ] systemli [ dot ] org >
|
||||
|
||||
# This PKGBUILD is based on the instructions from the Arch Linux wiki:
|
||||
# https://wiki.archlinux.org/title/Python_package_guidelines
|
||||
|
||||
_name=de-p1st-monitor
|
||||
pkgname="python-$_name-git"
|
||||
pkgver=r202.f3f2f46
|
||||
pkgrel=1
|
||||
pkgdesc='periodically monitor and warn'
|
||||
arch=(any)
|
||||
url="https://git.privacy1st.de/langfingaz/$_name"
|
||||
license=('custom:BSD-3-Clause-Clear-License')
|
||||
|
||||
provides=(de-p1st-monitor)
|
||||
depends=(python exec-notify)
|
||||
makedepends=(git python-build python-installer python-wheel)
|
||||
optdepends=('python-psutil: CPU, memory, network monitoring'
|
||||
'digitemp: USB temperature sensor'
|
||||
'smartmontools: disk temperature monitoring')
|
||||
|
||||
source=("git+https://git.privacy1st.de/langfingaz/$_name.git")
|
||||
b2sums=(SKIP)
|
||||
|
||||
# If there are no tags then use number of revisions since beginning of the history:
|
||||
# https://wiki.archlinux.org/title/VCS_package_guidelines
|
||||
pkgver() {
|
||||
cd "$_name"
|
||||
printf "r%s.%s" "$(git rev-list --count HEAD)" "$(git rev-parse --short=7 HEAD)"
|
||||
}
|
||||
|
||||
prepare() {
|
||||
git -C "$srcdir/$_name" clean -dfx
|
||||
}
|
||||
|
||||
build() {
|
||||
# cd "$_name-$pkgver"
|
||||
cd "$_name"
|
||||
python -m build --wheel --no-isolation
|
||||
}
|
||||
|
||||
package() {
|
||||
# cd "$_name-$pkgver"
|
||||
cd "$_name"
|
||||
python -m installer --destdir="$pkgdir" dist/*.whl
|
||||
}
|
||||
|
||||
check(){
|
||||
cd "$srcdir/$_name"
|
||||
|
||||
# For nosetests
|
||||
# nosetests
|
||||
|
||||
# For pytest
|
||||
# pytest
|
||||
}
|
8
pyproject.toml
Normal file
@ -0,0 +1,8 @@
|
||||
# https://packaging.python.org/tutorials/packaging-projects/#creating-pyproject-toml
|
||||
|
||||
[build-system]
|
||||
requires = [
|
||||
"setuptools>=42",
|
||||
"wheel"
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
psutil>=5.9
|
@ -0,0 +1,9 @@
|
||||
@inproceedings{32774,
|
||||
title = {Failure Trends in a Large Disk Drive Population},
|
||||
author = {Eduardo Pinheiro and Wolf-Dietrich Weber and Luiz André Barroso},
|
||||
year = {2007},
|
||||
booktitle = {5th USENIX Conference on File and Storage Technologies (FAST 2007)},
|
||||
pages = {17-29}
|
||||
}
|
||||
|
||||
|
35
setup.cfg
Normal file
@ -0,0 +1,35 @@
|
||||
; setup.cfg is the configuration file for setuptools.
|
||||
; https://packaging.python.org/tutorials/packaging-projects/#configuring-metadata
|
||||
|
||||
[metadata]
|
||||
name = de.p1st.monitor
|
||||
version = 0.8.0
|
||||
author = Daniel Langbein
|
||||
author_email = daniel@systemli.org
|
||||
description = periodically monitor and warn
|
||||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
url = https://git.privacy1st.de/langfingaz/de-p1st-monitor
|
||||
project_urls =
|
||||
Bug Tracker = https://git.privacy1st.de/langfingaz/de-p1st-monitor/issues
|
||||
|
||||
; https://pypi.org/classifiers/
|
||||
classifiers =
|
||||
Development Status :: 4 - Beta
|
||||
Programming Language :: Python :: 3
|
||||
; License :: BSD 3-Clause Clear License
|
||||
Operating System :: Unix
|
||||
|
||||
[options]
|
||||
package_dir =
|
||||
= src
|
||||
packages = find:
|
||||
python_requires = >=3.6.9
|
||||
|
||||
[options.packages.find]
|
||||
where = src
|
||||
|
||||
[options.entry_points]
|
||||
; https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html
|
||||
console_scripts=
|
||||
de-p1st-monitor = de.p1st.monitor.main:main
|
4
setup.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This file is required for `pip install` on Ubuntu 18.04.
|
||||
# It loads `setup.cfg`.
|
||||
from setuptools import setup
|
||||
setup()
|
0
src/de/__init__.py
Normal file
0
src/de/p1st/__init__.py
Normal file
0
src/de/p1st/monitor/__init__.py
Normal file
0
src/de/p1st/monitor/cfg/__init__.py
Normal file
106
src/de/p1st/monitor/cfg/loggers.py
Normal file
@ -0,0 +1,106 @@
|
||||
import configparser
|
||||
from pathlib import Path
|
||||
|
||||
from de.p1st.monitor.cfg.singleton import get_cfg
|
||||
from de.p1st.monitor.logger_ex import LoggerArgEx
|
||||
from de.p1st.monitor.loggers.cpu import CPULogger1, CPULogger5, CPULogger15
|
||||
from de.p1st.monitor.loggers.drive import DriveLogger
|
||||
from de.p1st.monitor.loggers.filesystem import FilesystemLogger
|
||||
from de.p1st.monitor.loggers.memory import MemoryLogger
|
||||
from de.p1st.monitor.loggers.network import NetworkLogger
|
||||
from de.p1st.monitor.loggers.swap import SwapLogger
|
||||
from de.p1st.monitor.loggers.temp import TempLogger
|
||||
from de.p1st.monitor.logger import Logger
|
||||
|
||||
def get_or_raise(cfg: configparser.SectionProxy, key: str) -> str:
|
||||
if key in cfg:
|
||||
return cfg[key]
|
||||
else:
|
||||
raise LoggerArgEx(f'Missing key {key} in section {cfg.name}')
|
||||
|
||||
def get_loggers() -> tuple[list[Logger], list[LoggerArgEx]]:
|
||||
def temp(cfg: configparser.SectionProxy) -> Logger:
|
||||
sensor = get_or_raise(cfg, 'sensor')
|
||||
label = get_or_raise(cfg, 'label')
|
||||
warn_if_above = float(cfg['warn_if_above']) if 'warn_if_above' in cfg else None
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return TempLogger(sensor, label, warn_if_above, warn_threshold, warn_data_range)
|
||||
def cpu1(cfg: configparser.SectionProxy) -> Logger:
|
||||
warn_if_above = float(cfg['warn_if_above']) if 'warn_if_above' in cfg else None
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return CPULogger1(warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def cpu5(cfg: configparser.SectionProxy) -> Logger:
|
||||
warn_if_above = float(cfg['warn_if_above']) if 'warn_if_above' in cfg else None
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return CPULogger5(warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def cpu15(cfg: configparser.SectionProxy) -> Logger:
|
||||
warn_if_above = float(cfg['warn_if_above']) if 'warn_if_above' in cfg else None
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return CPULogger15(warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def net(cfg: configparser.SectionProxy) -> Logger:
|
||||
network_interface = get_or_raise(cfg, 'network_interface')
|
||||
return NetworkLogger(network_interface)
|
||||
|
||||
def filesystem(cfg: configparser.SectionProxy) -> Logger:
|
||||
uuid = cfg.get('uuid', None)
|
||||
mountpoint = Path(cfg.get('mountpoint')) if 'mountpoint' in cfg else None
|
||||
unmounted_ok = bool(cfg.get('unmounted_ok', 'false'))
|
||||
warn_if_above = float(cfg.get('warn_if_above', '1.0'))
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return FilesystemLogger(uuid, mountpoint, unmounted_ok, warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def drive(cfg: configparser.SectionProxy) -> Logger:
|
||||
uuid = cfg.get('uuid', None)
|
||||
device = Path(cfg.get('device')) if 'device' in cfg else None
|
||||
warn_if_above = int(cfg['warn_if_above']) if 'warn_if_above' in cfg else None
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return DriveLogger(uuid, device, warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
def memory(cfg: configparser.SectionProxy) -> Logger:
|
||||
warn_if_above = float(cfg.get('warn_if_above', '1.0'))
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return MemoryLogger(warn_if_above, warn_threshold, warn_data_range)
|
||||
def swap(cfg: configparser.SectionProxy) -> Logger:
|
||||
warn_if_above = float(cfg.get('warn_if_above', '1.0'))
|
||||
warn_threshold = int(cfg.get('warn_threshold', '1'))
|
||||
warn_data_range = int(cfg.get('warn_data_range', '1'))
|
||||
return SwapLogger(warn_if_above, warn_threshold, warn_data_range)
|
||||
|
||||
|
||||
mapping = {
|
||||
'temp': temp,
|
||||
'cpu1': cpu1,
|
||||
'cpu5': cpu5,
|
||||
'cpu15': cpu15,
|
||||
'network': net,
|
||||
'filesystem': filesystem,
|
||||
'drive': drive,
|
||||
'memory': memory,
|
||||
'swap': swap,
|
||||
}
|
||||
|
||||
loggers = []
|
||||
exceptions = []
|
||||
cfg: configparser.ConfigParser = get_cfg()
|
||||
for section_name in cfg.sections():
|
||||
if section_name == 'logging':
|
||||
continue
|
||||
prefix = section_name.split('.', maxsplit=1)[0]
|
||||
try:
|
||||
loggers.append(
|
||||
mapping[prefix](cfg[section_name])
|
||||
)
|
||||
except LoggerArgEx as e:
|
||||
exceptions.append(e)
|
||||
|
||||
return loggers, exceptions
|
11
src/de/p1st/monitor/cfg/logging_dir.py
Normal file
@ -0,0 +1,11 @@
|
||||
from pathlib import Path
|
||||
|
||||
from de.p1st.monitor.cfg.singleton import get_cfg
|
||||
|
||||
|
||||
def logging_dir() -> Path:
|
||||
cfg = get_cfg()
|
||||
default = '/var/log/de-p1st-monitor'
|
||||
if 'logging' not in cfg:
|
||||
return Path(default)
|
||||
return Path(cfg['logging'].get('dir', default))
|
30
src/de/p1st/monitor/cfg/singleton.py
Normal file
@ -0,0 +1,30 @@
|
||||
import configparser
|
||||
from pathlib import Path
|
||||
|
||||
_cfg: configparser.ConfigParser | None = None
|
||||
|
||||
|
||||
def init_cfg(config_file: Path = None):
|
||||
global _cfg
|
||||
|
||||
if _cfg is not None:
|
||||
raise ValueError('already initialized')
|
||||
|
||||
if config_file is None:
|
||||
import socket
|
||||
hostname = socket.gethostname()
|
||||
config_file = Path(f'/etc/de-p1st-monitor/{hostname}.ini')
|
||||
|
||||
if not config_file.exists():
|
||||
raise Exception(f'Configuration file does not exist! {config_file}')
|
||||
|
||||
_cfg = configparser.ConfigParser()
|
||||
_cfg.read(config_file)
|
||||
|
||||
|
||||
def get_cfg() -> configparser.ConfigParser:
|
||||
global _cfg
|
||||
|
||||
if _cfg is None:
|
||||
raise ValueError('uninitialized')
|
||||
return _cfg
|
90
src/de/p1st/monitor/csv_util.py
Normal file
@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
import csv
|
||||
from collections import deque
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def read(file: Path) -> list[list[str]]:
|
||||
"""
|
||||
Returns all rows from the CSV file `file`.
|
||||
"""
|
||||
with open(file, newline='') as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
return [row for row in reader]
|
||||
|
||||
|
||||
def read_last(file: Path, num_rows: int, skip: int = 0) -> list[list[str]]:
|
||||
"""
|
||||
Returns the last `num_rows` from the CSV file `file`.
|
||||
|
||||
:param file:
|
||||
:param num_rows:
|
||||
:param skip: If given, the first `skip` rows are skipped.
|
||||
"""
|
||||
with open(file, newline='') as csvfile:
|
||||
reader = csv.reader(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
|
||||
# Skip the first `skip` rows.
|
||||
for i in range(skip):
|
||||
try:
|
||||
next(reader)
|
||||
except StopIteration:
|
||||
break # EOF
|
||||
|
||||
# Read all other rows but only keep the last `num_rows` rows.
|
||||
q = deque(reader, num_rows)
|
||||
# Return the last `num_rows` as list.
|
||||
return [row for row in q]
|
||||
|
||||
|
||||
def write(file: Path,
|
||||
rows: list[list[str]],
|
||||
header: list[str] = None,
|
||||
create_parent_dirs: bool = True,
|
||||
recreate_file: bool = False) -> None:
|
||||
"""
|
||||
Create new .csv file if missing or append to existing .csv file.
|
||||
|
||||
:param file:
|
||||
:param rows: The rows to write as csv table to file.
|
||||
:param header: If given will be inserted as first row into the csv table.
|
||||
:param create_parent_dirs: If `file.parent` does not exist, create it.
|
||||
:param recreate_file: Never append, always recreate the .csv file.
|
||||
"""
|
||||
if create_parent_dirs and not file.parent.exists():
|
||||
file.parent.mkdir(parents=True, exist_ok=False)
|
||||
if recreate_file and file.exists():
|
||||
file.unlink(missing_ok=False)
|
||||
if file.exists():
|
||||
append(file, rows)
|
||||
else:
|
||||
if header is not None:
|
||||
rows = [header] + rows
|
||||
create(file, rows)
|
||||
|
||||
text = file.read_text()
|
||||
if text.count('\n') != len(rows) or not text.endswith('\n'):
|
||||
raise Exception(f'Created a new csv file with {len(rows)} rows but it does not have {len(rows)} lines. '
|
||||
f'Make sure that there are no concurrent writes to this file!')
|
||||
|
||||
|
||||
def create(file: Path, rows: list[list[str]]) -> None:
|
||||
with open(file, 'x', newline='') as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def append(file: Path, rows: list[list[str]]) -> None:
|
||||
with open(file, 'a', newline='') as csvfile:
|
||||
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def test():
|
||||
file = Path('/var/log/de-p1st-monitor/cpu_avg.csv')
|
||||
data = read_last(file, 4, 10)
|
||||
print(data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
49
src/de/p1st/monitor/datetime_util.py
Executable file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def test():
|
||||
dt = datetime.now()
|
||||
|
||||
print('non UTC:')
|
||||
print(dt)
|
||||
|
||||
print('\nUTC:')
|
||||
print(now())
|
||||
print(to_str(now()))
|
||||
print(now_str())
|
||||
print(from_str(to_str(now())))
|
||||
|
||||
print('\nlocalized:')
|
||||
print(dt.tzinfo)
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
print(dt)
|
||||
|
||||
|
||||
def now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def now_str() -> str:
|
||||
return to_str(now())
|
||||
|
||||
|
||||
def to_str(dt: datetime) -> str:
|
||||
return dt.strftime(fmt())
|
||||
|
||||
|
||||
def from_str(dt_str: str) -> datetime:
|
||||
dt = datetime.strptime(dt_str, fmt())
|
||||
return dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def fmt() -> str:
|
||||
return '%Y%m%dT%H%M%S'
|
||||
|
||||
|
||||
def fmt_len() -> int:
|
||||
return 13
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
12
src/de/p1st/monitor/exec_capture.py
Normal file
@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import subprocess
|
||||
|
||||
|
||||
def execute_capture(command: list[str]) -> tuple[int, str, str]:
|
||||
completed: subprocess.CompletedProcess = subprocess.run(
|
||||
command,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return completed.returncode, completed.stdout, completed.stderr
|
230
src/de/p1st/monitor/logger.py
Normal file
@ -0,0 +1,230 @@
|
||||
from pathlib import Path
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from de.p1st.monitor import csv_util
|
||||
from de.p1st.monitor.cfg.logging_dir import logging_dir
|
||||
from de.p1st.monitor.string_conversion import to_string, from_string
|
||||
from de.p1st.monitor.warn import WarnMessage, WarnLevel
|
||||
|
||||
|
||||
# https://www.geeksforgeeks.org/abstract-classes-in-python/
|
||||
class Logger(ABC):
|
||||
def __init__(self,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
warn_if_above: int | float = None,
|
||||
critical_if_above: int | float = None,
|
||||
):
|
||||
self.data: list[any] | None = None
|
||||
# True if the data held by this object is already appended to the logfile.
|
||||
self.logged = False
|
||||
|
||||
self.warn_threshold = warn_threshold
|
||||
self.warn_data_range = warn_data_range
|
||||
|
||||
# Either both variables are given, or both are None
|
||||
if warn_if_above is not None and critical_if_above is not None:
|
||||
assert critical_if_above > warn_if_above
|
||||
else:
|
||||
assert warn_if_above is None and critical_if_above is None
|
||||
self.warn_if_above = warn_if_above
|
||||
self.critical_if_above = critical_if_above
|
||||
|
||||
def export_data(self) -> Path:
|
||||
"""
|
||||
This method is intended to be overriden in a subclass!
|
||||
|
||||
With most loggers the `get_log_file()` is ready-to-use.
|
||||
In this case this method simply returns `get_log_file()`.
|
||||
|
||||
But some loggers require postprocessing of that data before it can be used.
|
||||
In this case this method creates a new .csv file and returns it.
|
||||
|
||||
@return: Path to .csv file with ready-to-use data.
|
||||
"""
|
||||
return self.get_log_file()
|
||||
|
||||
def check(self) -> WarnMessage:
|
||||
"""
|
||||
Checks the latest `self.warn_data_range` datasets for problems using `self.check_data()`.
|
||||
|
||||
If at least `self.warn_threshold` problems are found,
|
||||
then a WarnMessage with the highest reported WarnLevel is returned.
|
||||
|
||||
If at least one WarnLevel is above NORMAL,
|
||||
then a WarnMessage is returned independent of the number of problems.
|
||||
"""
|
||||
datasets = self.get_datasets(self.warn_data_range)
|
||||
warnings = [self.check_data(data) for data in datasets]
|
||||
warnings = [warning for warning in warnings
|
||||
if not warning.level == WarnLevel.NONE]
|
||||
|
||||
if len(warnings) == 0:
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
# max() must not be called with an empty list.
|
||||
highest_warn_level = max([warning.level for warning in warnings])
|
||||
|
||||
messages: list[str] = [warning.message for warning in warnings]
|
||||
message = f'{len(warnings)} of the last {self.warn_data_range} datasets are above limits:\n\t' \
|
||||
+ '\n\t'.join(messages)
|
||||
|
||||
if highest_warn_level > WarnLevel.NORMAL:
|
||||
return WarnMessage(highest_warn_level, message)
|
||||
if len(warnings) >= self.warn_threshold:
|
||||
return WarnMessage(highest_warn_level, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
@abstractmethod
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
"""
|
||||
Check the given data for problems.
|
||||
Return a WarnLevel indicating how serious the problems are.
|
||||
|
||||
If there are no problems, return `WarnLevel.NONE`.
|
||||
"""
|
||||
raise ValueError('Subclasses must implement this')
|
||||
|
||||
def get_all_datasets(self) -> list[list[any]]:
|
||||
# See also: self.get_datasets()
|
||||
|
||||
if self.get_log_file().exists():
|
||||
# We skip the first row as it is the data schema.
|
||||
raw = csv_util.read(self.get_log_file())[1:]
|
||||
data = [self.get_data_from_row(row) for row in raw]
|
||||
else:
|
||||
data = []
|
||||
|
||||
if not self.logged and self.has_data():
|
||||
data.append(self.get_data())
|
||||
|
||||
return data
|
||||
|
||||
def get_datasets(self, num: int) -> list[list[any]]:
|
||||
"""
|
||||
Returns the last `num` datasets (including the current dataset).
|
||||
"""
|
||||
if not self.logged and self.has_data():
|
||||
# We will append the current data manually.
|
||||
# Thus, we need to read one less line from the CSV file.
|
||||
read_last = num - 1
|
||||
else:
|
||||
read_last = num
|
||||
|
||||
if self.get_log_file().exists():
|
||||
# Read rows from CSV file.
|
||||
# We skip the first row as it is the data schema.
|
||||
# We keep only the last `read_last` rows.
|
||||
raw = csv_util.read_last(self.get_log_file(), read_last, 1)
|
||||
# Convert from string to data types defined in the data schema.
|
||||
data = [self.get_data_from_row(row) for row in raw]
|
||||
else:
|
||||
data = []
|
||||
|
||||
if not self.logged and self.has_data():
|
||||
# We append the current data.
|
||||
# It has not yet been logged and is therefore not included in the CSV file we just read.
|
||||
data.append(self.get_data())
|
||||
|
||||
return data
|
||||
|
||||
def log(self, skip_if_no_data: bool = False) -> None:
|
||||
"""
|
||||
Appends the current data (e.g. temperature of a sensor)
|
||||
to a logfile.
|
||||
|
||||
:param skip_if_no_data: Can be used to do nothing if no data is available. If one is sure to have called update() previously, this can be set to True.
|
||||
:raise Exception: If method is called but no data is available. Please do call update() first to avoid this!
|
||||
"""
|
||||
if self.logged:
|
||||
return
|
||||
if skip_if_no_data and not self.has_data():
|
||||
return
|
||||
|
||||
csv_util.write(file=self.get_log_file(), rows=[self.get_data_as_row()], header=self.data_schema())
|
||||
self.logged = True
|
||||
|
||||
def update(self):
|
||||
self.set_data(self.read_data())
|
||||
self.logged = False
|
||||
|
||||
@abstractmethod
|
||||
def read_data(self) -> list[any] | None:
|
||||
"""
|
||||
Collects current data (e.g. temperature of a sensor).
|
||||
|
||||
Might return None if sensor is detached / not available.
|
||||
|
||||
:raise LoggerReadEx:
|
||||
"""
|
||||
raise ValueError('Subclasses must implement this')
|
||||
|
||||
@abstractmethod
|
||||
def data_schema(self) -> list[str]:
|
||||
"""
|
||||
Describes the type and meaning of the elements in self.values().
|
||||
|
||||
Returns a list with elements f'{data-type}#{column-description}'.
|
||||
|
||||
Example:
|
||||
['datetime#Date', 'float#Disk usage']
|
||||
"""
|
||||
raise ValueError('Subclasses must implement this')
|
||||
|
||||
def get_data_from_row(self, data: list[str]) -> list[any]:
|
||||
return [
|
||||
from_string(v, type_str)
|
||||
for v, type_str
|
||||
in zip(data, self.data_type_strs())
|
||||
]
|
||||
|
||||
def get_data_as_row(self) -> list[str]:
|
||||
"""
|
||||
Returns `self.get_data()` as string list that can easily be added as row to a CSV file.
|
||||
"""
|
||||
return self.as_row(self.get_data())
|
||||
|
||||
def as_row(self, data: list, data_schema: list[str] = None) -> list[str]:
|
||||
"""
|
||||
Returns the given `data` as string list that can easily be added as row to a CSV file.
|
||||
"""
|
||||
if data_schema is None:
|
||||
data_schema = self.data_schema()
|
||||
return [
|
||||
to_string(v, type_str)
|
||||
for v, type_str
|
||||
in zip(data, self.data_type_strs(data_schema))
|
||||
]
|
||||
|
||||
def has_data(self) -> bool:
|
||||
return self.data is not None
|
||||
|
||||
def get_data(self) -> list[any]:
|
||||
"""
|
||||
Returns the last data collected by `self.update()`.
|
||||
"""
|
||||
if self.has_data():
|
||||
return self.data
|
||||
else:
|
||||
raise ValueError(f'Data has not yet been read. {self.__str__()}')
|
||||
|
||||
def set_data(self, data: list[any] | None):
|
||||
self.data = data
|
||||
|
||||
def data_type_strs(self, data_schema: list[str] = None) -> list[str]:
|
||||
if data_schema is None:
|
||||
data_schema = self.data_schema()
|
||||
return [x.split('#', maxsplit=1)[0] for x in data_schema]
|
||||
|
||||
@abstractmethod
|
||||
def get_log_file(self) -> Path:
|
||||
raise ValueError('Subclasses must implement this')
|
||||
|
||||
@classmethod
|
||||
def get_log_dir(cls) -> Path:
|
||||
return logging_dir()
|
||||
|
||||
def __str__(self) -> str:
|
||||
key_value_strings = [f'classname: {type(self).__name__}']
|
||||
for key, value in vars(self).items():
|
||||
key_value_strings.append(f'{key}: {value}')
|
||||
return ', '.join(key_value_strings)
|
14
src/de/p1st/monitor/logger_ex.py
Normal file
@ -0,0 +1,14 @@
|
||||
class LoggerReadEx(Exception):
|
||||
"""
|
||||
Used by Logger subclasses if
|
||||
- sensor data could not be read
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class LoggerArgEx(Exception):
|
||||
"""
|
||||
Used by Logger subclasses if
|
||||
- Logger object created with illegal arguments
|
||||
"""
|
||||
pass
|
0
src/de/p1st/monitor/loggers/__init__.py
Normal file
90
src/de/p1st/monitor/loggers/cpu.py
Normal file
@ -0,0 +1,90 @@
|
||||
from pathlib import Path
|
||||
from abc import abstractmethod
|
||||
from typing import Literal
|
||||
|
||||
import psutil
|
||||
|
||||
from de.p1st.monitor import datetime_util
|
||||
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.warn import WarnMessage, WarnLevel
|
||||
|
||||
|
||||
class CPULogger(Logger):
|
||||
def __init__(self,
|
||||
warn_if_above: float = None,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
|
||||
critical_if_above = warn_if_above * 1.5
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above)
|
||||
self.warn_if_above = warn_if_above
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
load_avg = data[1]
|
||||
message = f'CPU load avg of last {self.get_load_timespan()} minutes is at {load_avg}'
|
||||
|
||||
if load_avg > self.critical_if_above:
|
||||
return WarnMessage(WarnLevel.HIGH, message)
|
||||
if load_avg > self.warn_if_above:
|
||||
return WarnMessage(WarnLevel.NORMAL, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def read_data(self) -> list[any] | None:
|
||||
return [
|
||||
datetime_util.now(),
|
||||
self.get_load(self.get_load_timespan())
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return [
|
||||
'datetime#Date',
|
||||
f'float#LoadAverage{self.get_load_timespan()}min'
|
||||
]
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
return self.get_log_dir() / f'cpu_{self.get_load_timespan()}min.csv'
|
||||
|
||||
@abstractmethod
|
||||
def get_load_timespan(self) -> Literal[1, 5, 15]:
|
||||
raise ValueError('Subclasses must implement this')
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_load(minutes: Literal[1, 5, 15]) -> float:
|
||||
"""
|
||||
:param minutes: avg of last 1/5/15 minutes
|
||||
:return: Average CPU load of last 1/5/15 minutes
|
||||
"""
|
||||
idx_dict = {
|
||||
1: 0,
|
||||
5: 1,
|
||||
15: 2
|
||||
}
|
||||
idx = idx_dict[minutes]
|
||||
|
||||
# Number of processes in the system run queue averaged over
|
||||
# the last 1, 5, and 15 minutes:
|
||||
# one, five, fifteen = psutil.getloadavg()
|
||||
|
||||
# Load percentage during last 5 minutes.
|
||||
# This value has been tested to be correct on my AMD Ryzen 4800H CPU.
|
||||
return psutil.getloadavg()[idx] / psutil.cpu_count()
|
||||
|
||||
class CPULogger1(CPULogger):
|
||||
def get_load_timespan(self) -> Literal[1, 5, 15]:
|
||||
return 1
|
||||
class CPULogger5(CPULogger):
|
||||
def get_load_timespan(self) -> Literal[1, 5, 15]:
|
||||
return 5
|
||||
class CPULogger15(CPULogger):
|
||||
def get_load_timespan(self) -> Literal[1, 5, 15]:
|
||||
return 15
|
108
src/de/p1st/monitor/loggers/drive.py
Normal file
@ -0,0 +1,108 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from de.p1st.monitor import datetime_util
|
||||
|
||||
from de.p1st.monitor.exec_capture import execute_capture
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.logger_ex import LoggerArgEx, LoggerReadEx
|
||||
from de.p1st.monitor.warn import WarnLevel, WarnMessage
|
||||
|
||||
class BlkidException(Exception):
|
||||
pass
|
||||
|
||||
class DriveLogger(Logger):
|
||||
def __init__(self, uuid: str = None,
|
||||
device: Path = None,
|
||||
warn_if_above: int = None,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
|
||||
critical_if_above = warn_if_above + 10
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above
|
||||
)
|
||||
|
||||
if uuid is None and device is None:
|
||||
raise LoggerArgEx('uuid or device required')
|
||||
|
||||
if uuid is None:
|
||||
try:
|
||||
self.uuid = self.get_partition_uuid(device)
|
||||
except BlkidException as e:
|
||||
raise LoggerArgEx(getattr(e, 'message', e))
|
||||
else:
|
||||
self.uuid = uuid
|
||||
|
||||
if device is None:
|
||||
self.device = self.get_partition_path(uuid)
|
||||
else:
|
||||
self.device = device
|
||||
|
||||
self.warn_if_above = warn_if_above
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
temp = data[1]
|
||||
message = f'Temperature of {self.uuid} ist at {temp}'
|
||||
|
||||
if temp > self.critical_if_above:
|
||||
return WarnMessage(WarnLevel.HIGH, message)
|
||||
if temp > self.warn_if_above:
|
||||
return WarnMessage(WarnLevel.NORMAL, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def read_data(self) -> list[any]:
|
||||
return [
|
||||
datetime_util.now(),
|
||||
self.get_temp_from_device(self.device),
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return ['datetime#Date', 'int#Temperature']
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
# self.device might change overtime.
|
||||
# Thus, we use self.uuid to identify a partition.
|
||||
return self.get_log_dir() / f'drive_{self.uuid}.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
@classmethod
|
||||
def get_partition_path(cls, uuid: str) -> Path:
|
||||
"""
|
||||
:return: Partition path, e.g. /dev/sda1
|
||||
"""
|
||||
return Path(f'/dev/disk/by-uuid/{uuid}').resolve()
|
||||
|
||||
@classmethod
|
||||
def get_partition_uuid(cls, device: Path) -> str:
|
||||
"""
|
||||
:param device: E.g. /dev/sda1
|
||||
:return: UUID of e.g. partition /dev/sda1
|
||||
:raise BlkidException: If UUID could not be determined.
|
||||
"""
|
||||
returncode, stdout, stderr = execute_capture(['blkid', '-s', 'UUID', '-o', 'value', f'{device}'])
|
||||
|
||||
if returncode != 0:
|
||||
raise BlkidException(f'blkid failed with returncode {returncode}\nstdout: {stdout}\nstderr: {stderr}')
|
||||
|
||||
return stdout.strip()
|
||||
|
||||
@classmethod
|
||||
def get_temp_from_device(cls, device: Path) -> int:
|
||||
"""
|
||||
:param device: For example `/dev/sda` or `/dev/disk/by-uuid/<uuid>`
|
||||
:return: Temperature in celsius
|
||||
"""
|
||||
returncode, stdout, stderr = execute_capture(['smartctl', '-j', '-a', f'{device}'])
|
||||
|
||||
if returncode != 0:
|
||||
raise LoggerReadEx(f'smartctl failed with returncode {returncode}\nstdout: {stdout}\nstderr: {stderr}')
|
||||
j = json.loads(stdout)
|
||||
|
||||
return j['temperature']['current']
|
164
src/de/p1st/monitor/loggers/filesystem.py
Normal file
@ -0,0 +1,164 @@
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
from de.p1st.monitor import datetime_util
|
||||
from de.p1st.monitor.exec_capture import execute_capture
|
||||
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.logger_ex import LoggerArgEx, LoggerReadEx
|
||||
from de.p1st.monitor.warn import WarnLevel, WarnMessage
|
||||
|
||||
|
||||
class NotMounted(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FilesystemLogger(Logger):
|
||||
def __init__(self, uuid: str = None,
|
||||
mountpoint: Path = None,
|
||||
unmounted_ok: bool = False,
|
||||
warn_if_above: float = 1.0,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
|
||||
# The space between disk is at `self.warn_if_above` and disk is full at `1.0`.
|
||||
buffer = 1 - warn_if_above
|
||||
critical_if_above = warn_if_above + 0.5 * buffer
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above,
|
||||
)
|
||||
|
||||
if uuid is None and mountpoint is None:
|
||||
raise LoggerArgEx('uuid or mountpoint required')
|
||||
|
||||
self.uuid = uuid
|
||||
self.mountpoint = mountpoint
|
||||
self.unmounted_ok = unmounted_ok
|
||||
self.warn_if_above = warn_if_above
|
||||
|
||||
self.mounted = True
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
# If uuid and mountpoint are both specified,
|
||||
# raise warning if unexpected uuid is mounted at mountpoint.
|
||||
if self.mountpoint is not None and self.uuid is not None:
|
||||
try:
|
||||
actual_uuid = self.get_uuid(self.mountpoint)
|
||||
self.mounted = True
|
||||
if self.uuid != actual_uuid:
|
||||
raise LoggerReadEx(f'Expected {self.uuid} at {self.mountpoint} but got {actual_uuid}')
|
||||
except NotMounted as e:
|
||||
if self.unmounted_ok:
|
||||
self.mounted = False
|
||||
else:
|
||||
raise LoggerArgEx(getattr(e, 'message', e))
|
||||
|
||||
# Try to get UUID (if only mountpoint given)
|
||||
if self.uuid is None:
|
||||
try:
|
||||
self.uuid = self.get_uuid(self.mountpoint)
|
||||
self.mounted = True
|
||||
except NotMounted as e:
|
||||
if self.unmounted_ok:
|
||||
self.mounted = False
|
||||
else:
|
||||
raise LoggerArgEx(getattr(e, 'message', e))
|
||||
|
||||
# Try to get mountpoint (if only uuid given)
|
||||
if self.mountpoint is None:
|
||||
try:
|
||||
self.mountpoint = self.get_mountpoint(self.uuid)
|
||||
self.mounted = True
|
||||
except NotMounted as e:
|
||||
if self.unmounted_ok:
|
||||
self.mounted = False
|
||||
else:
|
||||
raise LoggerReadEx(getattr(e, 'message', e))
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
if not self.mounted:
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
disk_usage = data[1]
|
||||
message = f'Disk usage of {self.uuid} ist at {disk_usage}'
|
||||
|
||||
if disk_usage > self.critical_if_above:
|
||||
return WarnMessage(WarnLevel.HIGH, message)
|
||||
if disk_usage > self.warn_if_above:
|
||||
return WarnMessage(WarnLevel.NORMAL, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def read_data(self) -> list[any] | None:
|
||||
if not self.mounted:
|
||||
return None
|
||||
|
||||
disk_usage: float = self.get_disk_usage(self.mountpoint)
|
||||
return [
|
||||
datetime_util.now(),
|
||||
disk_usage,
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return ['datetime#Date', 'float#Disk usage']
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
# The mountpoint of a filesystem might change overtime.
|
||||
# Thus, we use self.uuid to identify a filesystem.
|
||||
return self.get_log_dir() / f'filesystem_{self.uuid}.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
@classmethod
|
||||
def get_disk_usage(cls, mountpoint: Path) -> float:
|
||||
"""
|
||||
:returns: used space / total space
|
||||
"""
|
||||
return psutil.disk_usage(str(mountpoint)).percent / 100.0
|
||||
|
||||
@classmethod
|
||||
def get_mountpoint(cls, uuid: str) -> Path:
|
||||
"""
|
||||
Throws an error if the corresponding partition is not mounted.
|
||||
"""
|
||||
|
||||
partition_list: list[psutil._common.sdiskpart] = psutil.disk_partitions(all=False)
|
||||
partitions: dict[Path, psutil._common.sdiskpart] = {Path(partition.device).resolve(): partition for partition in
|
||||
partition_list}
|
||||
|
||||
partition_path = cls.get_partition_path(uuid)
|
||||
if partition_path not in partitions:
|
||||
raise NotMounted(
|
||||
f'Partition {partition_path} is probably not mounted '
|
||||
f'as it is not in psutil partition list: {partitions}')
|
||||
|
||||
partition = partitions[partition_path]
|
||||
return Path(partition.mountpoint)
|
||||
|
||||
@classmethod
|
||||
def get_uuid(cls, mountpoint: Path) -> str:
|
||||
# Returns the UUID of the device mounted at `/`.
|
||||
# Fails if there is no disk mounted at `/`.
|
||||
#
|
||||
# findmnt / -o UUID -n
|
||||
|
||||
returncode, stdout, stderr = execute_capture(['findmnt', str(mountpoint), '-o', 'UUID', '-n'])
|
||||
if returncode != 0:
|
||||
raise NotMounted(
|
||||
f'No partition mounted at {mountpoint}. Stderr of findmnt: {stderr}')
|
||||
|
||||
return stdout.strip()
|
||||
|
||||
@classmethod
|
||||
def get_partition_path(cls, uuid: str) -> Path:
|
||||
"""
|
||||
:return: Partition path, e.g. /dev/sda1
|
||||
"""
|
||||
return Path(f'/dev/disk/by-uuid/{uuid}').resolve()
|
74
src/de/p1st/monitor/loggers/memory.py
Normal file
@ -0,0 +1,74 @@
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
from de.p1st.monitor import datetime_util
|
||||
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.warn import WarnMessage, WarnLevel
|
||||
|
||||
|
||||
class MemoryLogger(Logger):
|
||||
def __init__(self,
|
||||
warn_if_above: float = 1.0,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
|
||||
# The space between memory is at `self.warn_if_above` and memory is full at `1.0`.
|
||||
buffer = 1 - warn_if_above
|
||||
critical_if_above = warn_if_above + 0.5 * buffer
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above)
|
||||
self.warn_if_above = warn_if_above
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
used_mb = data[1]
|
||||
total_available_mb = data[3]
|
||||
message = f'Memory usage ist at {used_mb} MB of {total_available_mb} MB'
|
||||
|
||||
used = used_mb / total_available_mb
|
||||
|
||||
if used > self.critical_if_above:
|
||||
return WarnMessage(WarnLevel.HIGH, message)
|
||||
if used > self.warn_if_above:
|
||||
return WarnMessage(WarnLevel.NORMAL, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def read_data(self) -> list[any]:
|
||||
used_mb, free_mb, available_mb, total_mb = self.get_memory()
|
||||
used_and_cached_mb = total_mb - free_mb
|
||||
total_available_mb = used_mb + available_mb
|
||||
return [
|
||||
datetime_util.now(),
|
||||
used_mb,
|
||||
used_and_cached_mb,
|
||||
total_available_mb,
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return ['datetime#Date', 'int#Used memory in MB', 'int#Used and cached in MB', 'int#Total available memory in MB']
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
return self.get_log_dir() / f'memory.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
@classmethod
|
||||
def get_memory(cls) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
:return: Tuple[used memory in MB, free memory in MB, total memory in MB]. This does not include swap.
|
||||
"""
|
||||
mb = 1024 * 1024
|
||||
mem = psutil.virtual_memory()
|
||||
|
||||
# mem.available:
|
||||
# The memory that can be given instantly to processes,
|
||||
# excluding swap.
|
||||
# mem.total:
|
||||
# Total physical memory (exclusive swap).
|
||||
# mem.used + mem.available != mem.total
|
||||
return int(mem.used / mb), int(mem.free / mb), int(mem.available / mb), int(mem.total / mb)
|
113
src/de/p1st/monitor/loggers/network.py
Normal file
@ -0,0 +1,113 @@
|
||||
import sys
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from de.p1st.monitor import datetime_util, csv_util
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.logger_ex import LoggerReadEx
|
||||
from de.p1st.monitor.warn import WarnLevel, WarnMessage
|
||||
|
||||
|
||||
class NetworkLogger(Logger):
|
||||
def __init__(self, network_interface: str):
|
||||
super().__init__()
|
||||
self.network_interface = network_interface
|
||||
|
||||
def export_data(self) -> Path:
|
||||
data = self.get_all_datasets()
|
||||
|
||||
export_schema = [
|
||||
'datetime#Date',
|
||||
'float#Bytes sent per second',
|
||||
'float#Bytes received per second',
|
||||
]
|
||||
export_data = []
|
||||
# Append all other rows.
|
||||
for prev_row, curr_row in zip(data[:-1], data[1:]):
|
||||
# if boot time differs -> reboot between data points -> invalid sent/received deltas
|
||||
if prev_row[3] != curr_row[3]:
|
||||
continue
|
||||
|
||||
elapsed_time: timedelta = curr_row[0] - prev_row[0]
|
||||
delta_sent = curr_row[1] - prev_row[1]
|
||||
delta_received = curr_row[2] - prev_row[2]
|
||||
|
||||
if delta_sent < 0 or delta_received < 0:
|
||||
print(f'bytes received/sent counter did overflow after {prev_row[0]}',
|
||||
file=sys.stderr)
|
||||
continue
|
||||
|
||||
elapsed_seconds = elapsed_time.total_seconds()
|
||||
export_data.append([
|
||||
# datetime#Date
|
||||
prev_row[0] + 0.5 * elapsed_time,
|
||||
# float#Bytes sent per second
|
||||
delta_sent / elapsed_seconds,
|
||||
# float#Bytes received per second
|
||||
delta_received / elapsed_seconds,
|
||||
])
|
||||
|
||||
export_file = self.get_log_file().parent.joinpath(self.get_log_file().name + '.exported.csv')
|
||||
rows = [self.as_row(export_row, export_schema) for export_row in export_data]
|
||||
csv_util.write(file=export_file, rows=rows, header=export_schema, recreate_file=True)
|
||||
return export_file
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return [
|
||||
'datetime#Date',
|
||||
'int#Bytes sent since boot',
|
||||
'int#Bytes received since boot',
|
||||
'datetime#Boot date',
|
||||
]
|
||||
|
||||
def read_data(self) -> list[any]:
|
||||
sent, received = self.get_net_usage()
|
||||
return [
|
||||
datetime_util.now(),
|
||||
sent,
|
||||
received,
|
||||
self.get_boot_time(),
|
||||
]
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
return self.get_log_dir() / f'net_{self.network_interface}.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
def get_net_usage(self) -> tuple[int, int]:
|
||||
"""
|
||||
Warning: The returned values may overflow if the system is running for a long time.
|
||||
|
||||
:return: bytes sent, bytes received
|
||||
"""
|
||||
# noinspection PyTypeChecker
|
||||
nics_data: dict[str, psutil._common.snetio] = psutil.net_io_counters(pernic=True, nowrap=True)
|
||||
|
||||
if self.network_interface not in nics_data:
|
||||
raise LoggerReadEx(f'Network interface {self.network_interface} not found')
|
||||
|
||||
nic_data = nics_data[self.network_interface]
|
||||
return nic_data.bytes_sent, nic_data.bytes_recv
|
||||
|
||||
@classmethod
|
||||
def get_boot_time(cls) -> datetime:
|
||||
epoch_seconds = psutil.boot_time()
|
||||
return datetime.fromtimestamp(epoch_seconds, tz=timezone.utc)
|
||||
|
||||
def test():
|
||||
from de.p1st.monitor.cfg.singleton import init_cfg
|
||||
init_cfg()
|
||||
|
||||
logger = NetworkLogger('wlp1s0')
|
||||
logger.update()
|
||||
logger.log()
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
69
src/de/p1st/monitor/loggers/swap.py
Normal file
@ -0,0 +1,69 @@
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
from de.p1st.monitor import datetime_util
|
||||
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.warn import WarnMessage, WarnLevel
|
||||
|
||||
|
||||
class SwapLogger(Logger):
|
||||
def __init__(self,
|
||||
warn_if_above: float = 1.0,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
|
||||
# The space between swap is at `self.warn_if_above` and swap is full at `1.0`.
|
||||
buffer = 1 - warn_if_above
|
||||
critical_if_above = warn_if_above + 0.5 * buffer
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above)
|
||||
self.warn_if_above = warn_if_above
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
used_mb = data[1]
|
||||
total_mb = data[2]
|
||||
message = f'Swap usage ist at {used_mb} MB of {total_mb} MB'
|
||||
|
||||
if used_mb == 0 and total_mb == 0:
|
||||
# Swap not enabled.
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
usage = used_mb / total_mb
|
||||
|
||||
if usage > self.critical_if_above:
|
||||
return WarnMessage(WarnLevel.HIGH, message)
|
||||
if usage > self.warn_if_above:
|
||||
return WarnMessage(WarnLevel.NORMAL, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def read_data(self) -> list[any]:
|
||||
used_mb, total_mb = self.get_swap()
|
||||
return [
|
||||
datetime_util.now(),
|
||||
used_mb,
|
||||
total_mb,
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return ['datetime#Date', 'int#Used swap in MB', 'int#Total swap in MB']
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
return self.get_log_dir() / f'swap.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
@classmethod
|
||||
def get_swap(cls) -> (int, int):
|
||||
"""
|
||||
:return: Tuple[used swap in MB, total swap in MB].
|
||||
"""
|
||||
mb = 1024 * 1024
|
||||
mem = psutil.swap_memory()
|
||||
|
||||
return int(mem.used / mb), int(mem.total / mb)
|
82
src/de/p1st/monitor/loggers/temp.py
Normal file
@ -0,0 +1,82 @@
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
|
||||
from de.p1st.monitor import datetime_util
|
||||
from de.p1st.monitor.logger import Logger
|
||||
from de.p1st.monitor.logger_ex import LoggerReadEx
|
||||
from de.p1st.monitor.warn import WarnMessage, WarnLevel
|
||||
|
||||
|
||||
class TempLogger(Logger):
|
||||
def __init__(self, sensor_name: str,
|
||||
sensor_label: str,
|
||||
warn_if_above: float = None,
|
||||
warn_threshold: int = 1,
|
||||
warn_data_range: int = 1,
|
||||
):
|
||||
|
||||
critical_if_above = warn_if_above + 10
|
||||
super().__init__(warn_threshold,
|
||||
warn_data_range,
|
||||
warn_if_above,
|
||||
critical_if_above)
|
||||
self.name = sensor_name
|
||||
self.label = sensor_label
|
||||
|
||||
self.warn_if_above = warn_if_above
|
||||
|
||||
def check_data(self, data: list[any]) -> WarnMessage:
|
||||
temp = data[1]
|
||||
message = f'Temperature of {self.name} {self.label} ist at {temp}'
|
||||
|
||||
if temp > self.critical_if_above:
|
||||
return WarnMessage(WarnLevel.HIGH, message)
|
||||
if temp > self.warn_if_above:
|
||||
return WarnMessage(WarnLevel.NORMAL, message)
|
||||
return WarnMessage(WarnLevel.NONE)
|
||||
|
||||
def read_data(self) -> list[any]:
|
||||
return [
|
||||
datetime_util.now(),
|
||||
self.get_temp()
|
||||
]
|
||||
|
||||
def data_schema(self) -> list[str]:
|
||||
return [
|
||||
'datetime#Date',
|
||||
'float#Temperature'
|
||||
]
|
||||
|
||||
def get_log_file(self) -> Path:
|
||||
return self.get_log_dir() / f'temp_{self.name}_{self.label}.csv'
|
||||
|
||||
#
|
||||
# HELPERS
|
||||
#
|
||||
|
||||
def get_temp(self) -> float:
|
||||
"""
|
||||
:return: Temperature in celsius
|
||||
"""
|
||||
data = psutil.sensors_temperatures(fahrenheit=False)
|
||||
if not self.name in data:
|
||||
raise LoggerReadEx(f'Sensor {self.name} not found')
|
||||
for i in data[self.name]:
|
||||
if i.label == self.label:
|
||||
return i.current
|
||||
raise LoggerReadEx(f'Label {self.label} of sensor {self.name} not found')
|
||||
|
||||
|
||||
def test():
|
||||
from de.p1st.monitor.cfg import singleton
|
||||
singleton.init_cfg()
|
||||
|
||||
logger = TempLogger('amdgpu', 'edge', 47, 2, 4)
|
||||
logger.update()
|
||||
logger.log()
|
||||
logger.check().print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
83
src/de/p1st/monitor/main.py
Executable file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from de.p1st.monitor.cfg.singleton import init_cfg
|
||||
from de.p1st.monitor.cfg.loggers import get_loggers
|
||||
from de.p1st.monitor.logger_ex import LoggerReadEx
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog='de-p1st-monitor',
|
||||
description='Iterates over all config sections. '
|
||||
'For each section the current sensor data is read '
|
||||
'and logged to a .csv file.')
|
||||
parser.add_argument('--config', '-c', default=None, type=Path,
|
||||
help='Path to .ini configuration file.')
|
||||
parser.add_argument('--export', '-e', default=False, action='store_true',
|
||||
help='If `True`, export .csv files and print their paths to stdout. '
|
||||
'No sensor data is logged during this.')
|
||||
# parser.add_argument('--export', '-e', default=False, type=bool,
|
||||
# help='If `True`, export .csv files and print their paths to stdout.
|
||||
# No sensor data is logged during this.')
|
||||
args = parser.parse_args()
|
||||
init_cfg(args.config)
|
||||
|
||||
if args.export:
|
||||
export()
|
||||
else:
|
||||
log()
|
||||
|
||||
|
||||
def export():
|
||||
loggers, logger_arg_exs = get_loggers()
|
||||
if len(logger_arg_exs) > 0:
|
||||
print('\nCONFIGURATION ERROR: Could not instantiate some of the loggers!', file=sys.stderr)
|
||||
print_exs(logger_arg_exs, [f'{n}.' for n in range(1, 1 + len(logger_arg_exs))])
|
||||
exit(1)
|
||||
|
||||
for logger in loggers:
|
||||
export_path: Path = logger.export_data()
|
||||
print(export_path)
|
||||
|
||||
|
||||
def log():
|
||||
loggers, logger_arg_exs = get_loggers()
|
||||
logger_read_exs = []
|
||||
logger_warnings = 0
|
||||
for logger_ct, logger in enumerate(loggers, start=1):
|
||||
print(f'Running logger {logger_ct}/{len(loggers)} ...')
|
||||
try:
|
||||
logger.update()
|
||||
except LoggerReadEx as e:
|
||||
logger_read_exs.append(e)
|
||||
continue
|
||||
# After logger.update() there might still be no data
|
||||
# Example: FilesystemLogger if partition is not mounted (and unmounted_ok is True)
|
||||
logger.log(skip_if_no_data=True)
|
||||
if logger.check().print().is_warning():
|
||||
logger_warnings += 1
|
||||
|
||||
if len(logger_arg_exs) > 0:
|
||||
print('\nCONFIGURATION ERROR: Could not instantiate some of the loggers!', file=sys.stderr)
|
||||
print_exs(logger_arg_exs, [f'{n}.' for n in range(1, 1 + len(logger_arg_exs))])
|
||||
if len(logger_read_exs) > 0:
|
||||
print('\nRUNTIME ERROR: Some loggers could not fetch sensor data!', file=sys.stderr)
|
||||
print_exs(logger_read_exs, [f'{n}.' for n in range(1, 1 + len(logger_read_exs))])
|
||||
|
||||
if len(logger_arg_exs) + len(logger_read_exs) > 0 or logger_warnings > 0:
|
||||
exit(1)
|
||||
|
||||
|
||||
def print_exs(exs: list[Exception], headers: list):
|
||||
for e, header in zip(exs, headers):
|
||||
# Indent str(e) with \t
|
||||
body = '\t' + '\n\t'.join(str(e).splitlines())
|
||||
|
||||
print(f'{header}\n{body}', file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
24
src/de/p1st/monitor/string_conversion.py
Normal file
@ -0,0 +1,24 @@
|
||||
from typing import Callable
|
||||
|
||||
from de.p1st.monitor import datetime_util
|
||||
|
||||
|
||||
def data_types() -> dict[str, dict[str, Callable[[any], any]]]:
|
||||
"""
|
||||
Returns a dictionary. Its key-value pairs contain the following:
|
||||
|
||||
Key: Name of type.
|
||||
Value: Dict containing to_string and from_string conversion methods, called 'to' and 'from'.
|
||||
"""
|
||||
return {
|
||||
'str': {'to': lambda x: x, 'from': lambda x: x},
|
||||
'int': {'to': lambda x: str(x), 'from': lambda x: int(x)},
|
||||
'float': {'to': lambda x: str(x), 'from': lambda x: float(x)},
|
||||
'datetime': {'to': datetime_util.to_str, 'from': datetime_util.from_str},
|
||||
}
|
||||
|
||||
def to_string(v: any, type_str: str) -> str:
|
||||
return data_types()[type_str]['to'](v)
|
||||
|
||||
def from_string(v: str, type_str: str) -> any:
|
||||
return data_types()[type_str]['from'](v)
|
49
src/de/p1st/monitor/warn.py
Normal file
@ -0,0 +1,49 @@
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from enum import Enum
|
||||
from functools import total_ordering
|
||||
|
||||
|
||||
# https://docs.python.org/3/library/functools.html#functools.total_ordering
|
||||
@total_ordering
|
||||
class WarnLevel(Enum):
|
||||
NONE = 0 # Not a warning. Everything is ok.
|
||||
LOW = 1
|
||||
NORMAL = 2
|
||||
HIGH = 3
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, WarnLevel):
|
||||
return self.value == other.value
|
||||
return NotImplemented
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, WarnLevel):
|
||||
return self.value < other.value
|
||||
return NotImplemented
|
||||
|
||||
|
||||
class WarnMessage:
|
||||
def __init__(self, level: WarnLevel, message: str = None):
|
||||
self.level = level
|
||||
self.message = message
|
||||
|
||||
def is_warning(self) -> bool:
|
||||
return self.level > WarnLevel.NONE
|
||||
|
||||
def print(self, default_message: str = 'Warning!') -> WarnMessage:
|
||||
"""
|
||||
return: self
|
||||
"""
|
||||
message = default_message if self.message is None else self.message
|
||||
|
||||
if self.level == WarnLevel.NONE:
|
||||
pass
|
||||
elif self.level == WarnLevel.LOW:
|
||||
print(message)
|
||||
elif self.level == WarnLevel.NORMAL:
|
||||
print(message, file=sys.stderr)
|
||||
elif self.level == WarnLevel.HIGH:
|
||||
print(f'[CRITICAL] {message}', file=sys.stderr)
|
||||
|
||||
return self
|