mirror of
https://codeberg.org/privacy1st/de-p1st-monitor
synced 2024-11-21 19:33:18 +01:00
feat: log drive temperature with psutil
This commit is contained in:
parent
be10017165
commit
23aa370cbf
@ -8,6 +8,7 @@ from de.p1st.monitor.cfg.singleton import get_cfg
|
|||||||
from de.p1st.monitor.logger_ex import LoggerArgEx
|
from de.p1st.monitor.logger_ex import LoggerArgEx
|
||||||
from de.p1st.monitor.loggers.cpu import CPULogger1, CPULogger5, CPULogger15
|
from de.p1st.monitor.loggers.cpu import CPULogger1, CPULogger5, CPULogger15
|
||||||
from de.p1st.monitor.loggers.drive import DriveLogger
|
from de.p1st.monitor.loggers.drive import DriveLogger
|
||||||
|
from de.p1st.monitor.loggers.drive_temp import DriveTempLogger
|
||||||
from de.p1st.monitor.loggers.filesystem import FilesystemLogger
|
from de.p1st.monitor.loggers.filesystem import FilesystemLogger
|
||||||
from de.p1st.monitor.loggers.memory import MemoryLogger
|
from de.p1st.monitor.loggers.memory import MemoryLogger
|
||||||
from de.p1st.monitor.loggers.network import NetworkLogger
|
from de.p1st.monitor.loggers.network import NetworkLogger
|
||||||
@ -86,6 +87,13 @@ def get_loggers() -> tuple[list[Logger], list[LoggerArgEx]]:
|
|||||||
warn_data_range = int(cfg_.get('warn_data_range', '1'))
|
warn_data_range = int(cfg_.get('warn_data_range', '1'))
|
||||||
return DriveLogger(uuid, id_, device, warn_if_above, warn_threshold, warn_data_range)
|
return DriveLogger(uuid, id_, device, warn_if_above, warn_threshold, warn_data_range)
|
||||||
|
|
||||||
|
def drive_temp(cfg_: configparser.SectionProxy) -> Logger:
|
||||||
|
type_ = cfg_.get('type', None)
|
||||||
|
warn_if_above = int(cfg_['warn_if_above']) if 'warn_if_above' in cfg_ else None
|
||||||
|
warn_threshold = int(cfg_.get('warn_threshold', '1'))
|
||||||
|
warn_data_range = int(cfg_.get('warn_data_range', '1'))
|
||||||
|
return DriveTempLogger(type_, warn_if_above, warn_threshold, warn_data_range)
|
||||||
|
|
||||||
def memory(cfg_: configparser.SectionProxy) -> Logger:
|
def memory(cfg_: configparser.SectionProxy) -> Logger:
|
||||||
warn_if_above = float(cfg_.get('warn_if_above', '1.0'))
|
warn_if_above = float(cfg_.get('warn_if_above', '1.0'))
|
||||||
warn_threshold = int(cfg_.get('warn_threshold', '1'))
|
warn_threshold = int(cfg_.get('warn_threshold', '1'))
|
||||||
@ -107,6 +115,7 @@ def get_loggers() -> tuple[list[Logger], list[LoggerArgEx]]:
|
|||||||
'network': net,
|
'network': net,
|
||||||
'filesystem': filesystem,
|
'filesystem': filesystem,
|
||||||
'drive': drive,
|
'drive': drive,
|
||||||
|
'drive_temp': drive_temp,
|
||||||
'memory': memory,
|
'memory': memory,
|
||||||
'swap': swap,
|
'swap': swap,
|
||||||
}
|
}
|
||||||
|
@ -53,27 +53,15 @@ warn_if_above = 60
|
|||||||
[network.1]
|
[network.1]
|
||||||
network_interface = enp0s31f6
|
network_interface = enp0s31f6
|
||||||
|
|
||||||
[drive.1]
|
[drive_temp.1]
|
||||||
; NVMe 256GB2
|
; NVMe drives
|
||||||
; TODO NVMe 49 warn, 55 limit
|
; TODO: 49 warn, 55 limit
|
||||||
id = nvme-WDC_PC_SN520_SDAPNUW-256G-1002_183873801941
|
type = nvme
|
||||||
warn_if_above = 50
|
warn_if_above = 50
|
||||||
[drive.2]
|
[drive_temp.2]
|
||||||
; HDD 12TB1
|
; HDD drives
|
||||||
; TODO HDD 39 warn, 45 limit
|
; TODO: 39 warn, 45 limit
|
||||||
id = ata-TOSHIBA_MG07ACA12TE_X1E0A0WKF95G
|
type = drivetemp
|
||||||
warn_if_above = 40
|
|
||||||
[drive.3]
|
|
||||||
; HDD 3TB1
|
|
||||||
id = ata-WDC_WD30EFRX-68EUZN0_WD-WCC4N1173157
|
|
||||||
warn_if_above = 40
|
|
||||||
[drive.4]
|
|
||||||
; HDD 3TB2
|
|
||||||
id = ata-WDC_WD30EFRX-68EUZN0_WD-WMC4N0564095
|
|
||||||
warn_if_above = 40
|
|
||||||
[drive.5]
|
|
||||||
; HDD 4TB1
|
|
||||||
id = ata-WDC_WD40EFRX-68N32N0_WD-WCC7K0CPF0N1
|
|
||||||
warn_if_above = 40
|
warn_if_above = 40
|
||||||
|
|
||||||
; [sensor_script.1]
|
; [sensor_script.1]
|
||||||
|
@ -69,20 +69,14 @@ unmounted_ok = true
|
|||||||
warn_if_above = 0.1
|
warn_if_above = 0.1
|
||||||
|
|
||||||
|
|
||||||
[drive.1]
|
[drive_temp.1]
|
||||||
; Either `uuid`, `id` or `device` must be given.
|
; Either `nvme` (for NVMe drives) or `drivetemp` (for HDDs)
|
||||||
;
|
type = nvme
|
||||||
; `uuid` as in /dev/disk/by-uuid/*
|
|
||||||
;uuid = ea7099e3-320d-4eb3-a4c3-9910a9af817b
|
|
||||||
; `id` as in /dev/disk/by-id/*
|
|
||||||
; id = nvme-XPG_GAMMIX_S50_Lite_2K462L2JN9KG
|
|
||||||
; device as in /dev/*
|
|
||||||
device = /dev/nvme0n1
|
|
||||||
|
|
||||||
; Warn if temperature is above this value.
|
; Warn if temperature is above this value.
|
||||||
; Unit: °C
|
; Unit: °C
|
||||||
warn_if_above = 25
|
warn_if_above = 25
|
||||||
|
|
||||||
|
|
||||||
[sensor_script.1]
|
[sensor_script.1]
|
||||||
; The command will be executed.
|
; The command will be executed.
|
||||||
; It has to return a float (or int) and exit code 0 on success.
|
; It has to return a float (or int) and exit code 0 on success.
|
||||||
|
@ -135,17 +135,24 @@ class DriveLogger(Logger):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def get_temp_from_device(cls, device: Path) -> int:
|
def get_temp_from_device(cls, device: Path) -> int:
|
||||||
"""
|
"""
|
||||||
|
Use `smartctl` to get HDD/SSD temperature.
|
||||||
|
|
||||||
|
As reading SMART data wakes up standby HDD drives, we skip them.
|
||||||
|
|
||||||
:param device: Partition path, e.g. `/dev/sda`
|
:param device: Partition path, e.g. `/dev/sda`
|
||||||
:return: Temperature in celsius
|
:return: Temperature in Celsius
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# -n standby: Don't spin-up an HDD if it is in standby mode.
|
||||||
# -j: JSON output.
|
# -j: JSON output.
|
||||||
# -a: Print all SMART device information.
|
# -a: Print all SMART device information.
|
||||||
# For NVMe, this is equivalent to: '-H -i -c -A -l error -l selftest'.
|
# For NVMe, this is equivalent to: '-H -i -c -A -l error -l selftest'.
|
||||||
# -H: Print health status.
|
# -H: Print health status.
|
||||||
# -A: Prints only the vendor specific SMART Attributes.
|
# -A: Prints only the vendor specific SMART Attributes.
|
||||||
returncode, stdout, stderr = execute_capture(['smartctl', '-j', '-A', f'{device}'])
|
returncode, stdout, stderr = execute_capture(['smartctl', '-n', 'standby', '-j', '-A', f'{device}'])
|
||||||
|
|
||||||
|
if returncode == 2 and 'Device is in STANDBY mode' in stdout:
|
||||||
|
raise LoggerReadEx(f'Could not read drive temperature as it is in standby mode: {device}')
|
||||||
if returncode != 0:
|
if returncode != 0:
|
||||||
raise LoggerReadEx(f'smartctl failed with returncode {returncode}\nstdout: {stdout}\nstderr: {stderr}')
|
raise LoggerReadEx(f'smartctl failed with returncode {returncode}\nstdout: {stdout}\nstderr: {stderr}')
|
||||||
j = json.loads(stdout)
|
j = json.loads(stdout)
|
||||||
|
98
src/de/p1st/monitor/loggers/drive_temp.py
Normal file
98
src/de/p1st/monitor/loggers/drive_temp.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import math
|
||||||
|
from typing import Literal
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
from de.p1st.monitor import datetime_util
|
||||||
|
from de.p1st.monitor.logger import Logger
|
||||||
|
from de.p1st.monitor.logger_ex import LoggerReadEx
|
||||||
|
from de.p1st.monitor.warn_data import WarnData
|
||||||
|
|
||||||
|
|
||||||
|
class DriveTempLogger(Logger):
|
||||||
|
def __init__(self,
|
||||||
|
type_: Literal['drivetemp', 'nvme'],
|
||||||
|
warn_if_above: int = None,
|
||||||
|
warn_threshold: int = 1,
|
||||||
|
warn_data_range: int = 1,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
:param type_: HDD -> drivetemp, NVMe -> nvme
|
||||||
|
"""
|
||||||
|
|
||||||
|
critical_if_above = warn_if_above + 10
|
||||||
|
super().__init__(warn_threshold,
|
||||||
|
warn_data_range,
|
||||||
|
warn_if_above,
|
||||||
|
critical_if_above
|
||||||
|
)
|
||||||
|
|
||||||
|
self.type = type_
|
||||||
|
|
||||||
|
def get_warn_data(self, data: list[any]) -> WarnData:
|
||||||
|
min_temp = data[1]
|
||||||
|
max_temp = data[2]
|
||||||
|
message = f'Temperature of drive type {self.type} is in range {min_temp}:{max_temp}'
|
||||||
|
return WarnData(data[0], max_temp, message)
|
||||||
|
|
||||||
|
def read_data(self) -> list[any]:
|
||||||
|
min_temp, max_temp = self.get_drive_temp()
|
||||||
|
return [
|
||||||
|
datetime_util.now(),
|
||||||
|
min_temp,
|
||||||
|
max_temp
|
||||||
|
]
|
||||||
|
|
||||||
|
def data_schema(self) -> list[str]:
|
||||||
|
return ['datetime#Date', 'float#MinTemperature', 'float#MaxTemperature']
|
||||||
|
|
||||||
|
def get_log_file(self) -> Path:
|
||||||
|
return self.get_log_dir() / f'drive-temp_{self.type}.csv'
|
||||||
|
|
||||||
|
#
|
||||||
|
# HELPERS
|
||||||
|
#
|
||||||
|
|
||||||
|
def get_drive_temp(self) -> (float, float):
|
||||||
|
"""
|
||||||
|
Use `psutil` Python library to get HDD/SSD temperature.
|
||||||
|
https://psutil.readthedocs.io/en/latest/index.html#psutil.sensors_temperatures
|
||||||
|
|
||||||
|
Not sure if this changed the results:
|
||||||
|
sudo modprobe drivetemp
|
||||||
|
|
||||||
|
Example output:
|
||||||
|
{
|
||||||
|
'nvme': [
|
||||||
|
shwtemp(label='Composite', current=37.85, high=81.85, critical=85.85)
|
||||||
|
],
|
||||||
|
'pch_skylake': [...],
|
||||||
|
'coretemp': [...],
|
||||||
|
'drivetemp': [
|
||||||
|
shwtemp(label='', current=23.0, high=65.0, critical=85.0),
|
||||||
|
shwtemp(label='', current=25.0, high=55.0, critical=70.0),
|
||||||
|
shwtemp(label='', current=24.0, high=60.0, critical=85.0),
|
||||||
|
shwtemp(label='', current=22.0, high=60.0, critical=85.0)]
|
||||||
|
}
|
||||||
|
|
||||||
|
Problem: If one has multiple drives attached, they can't be distinguished.
|
||||||
|
https://github.com/giampaolo/psutil/issues/1902
|
||||||
|
|
||||||
|
Therefore, we currently accumulate the maximum and minimum values of all drives of the same type.
|
||||||
|
"""
|
||||||
|
min_temp, max_temp = -math.inf, math.inf
|
||||||
|
|
||||||
|
data = psutil.sensors_temperatures(fahrenheit=False)
|
||||||
|
if self.type not in data:
|
||||||
|
raise LoggerReadEx(f'Sensor {self.type} not found')
|
||||||
|
if len(data[self.type]) == 0:
|
||||||
|
raise LoggerReadEx(f'Sensor {self.type} has no entries')
|
||||||
|
for i in data[self.type]:
|
||||||
|
current = i.current
|
||||||
|
min_temp = max(min_temp, current)
|
||||||
|
max_temp = min(max_temp, current)
|
||||||
|
|
||||||
|
return min_temp, max_temp
|
@ -54,7 +54,7 @@ class TempLogger(Logger):
|
|||||||
|
|
||||||
def get_temp(self) -> float:
|
def get_temp(self) -> float:
|
||||||
"""
|
"""
|
||||||
:return: Temperature in celsius
|
:return: Temperature in Celsius
|
||||||
"""
|
"""
|
||||||
data = psutil.sensors_temperatures(fahrenheit=False)
|
data = psutil.sensors_temperatures(fahrenheit=False)
|
||||||
if self.name not in data:
|
if self.name not in data:
|
||||||
|
Loading…
Reference in New Issue
Block a user